From c6a31e656283ce85cf47e539c767d312b6e84d50 Mon Sep 17 00:00:00 2001 From: s123188 Date: Thu, 15 May 2014 15:55:45 +0200 Subject: wordcloud implemented, still needs output tho --- src/main/Analyzor.java | 42 ++++++++++++++++++++++++++++++++++++------ src/main/FarmShell.java | 14 ++++++++------ 2 files changed, 44 insertions(+), 12 deletions(-) diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 1d0e8aa..51e080e 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -61,7 +61,7 @@ public class Analyzor { //query the database //fills the ResultSet data - void Query(String query) throws SQLException { + void query(String query) throws SQLException { PreparedStatement statement; //make a connection to the database and execute the query @@ -72,7 +72,10 @@ public class Analyzor { //analyzes the tweet on their positivity //this is just a base version - void sentimentAnalysis() throws SQLException, IOException { + void sentimentAnalysis(String query) throws SQLException, IOException { + + query(query); + //read the lexicons readLexicon(); @@ -121,7 +124,9 @@ public class Analyzor { } //makes a wordcloud of the tweets in the ResultSet data - void makeWordCloud() throws SQLException { + void makeWordCloud(String query) throws SQLException { + + query(query); //go to the start of the ResultSet data if (data == null) { System.err.println("data is empty, try querying first"); @@ -130,17 +135,34 @@ public class Analyzor { //make the hashmap with the words and their frequency HashMap wordcloud = new HashMap<>(); - - //set the pointer at the start of the ResultSet - data.beforeFirst(); + + String text; + String[] words; + Integer value; while(data.next()){ //get the text + text = data.getString("text"); + //remove punctuation, convert to lowercase and split on words + text = removePunct(text); + text = text.toLowerCase(); + words = text.split("\\s+"); + //count the words + for(String word : words){ + value = wordcloud.get(word); + if(value == null){ + wordcloud.put(word, 1); + } + else{ + wordcloud.put(word, value++); + } + } } } //replaces punctuation so it will be splitted + //also removes urls private String replacePunct(String text) { text = text.replaceAll("https?://\\S*", ""); text = text.replaceAll("[!?):;\"']", " $0"); @@ -148,4 +170,12 @@ public class Analyzor { text = text.replaceAll("\\s[(\"']", "$0 "); return text; } + + //removes punctuation + //also removes urls + private String removePunct(String text){ + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[.,!?()-:;\"']", " "); + return text; + } } diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index d56c72a..93cb928 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -92,8 +92,9 @@ public class FarmShell { enum Command { query("make a query to the database; needed to do analysis", 1), - filterbots("marks all users as bot or not"), - sentiment("analyzes all tweets on positivity (about a brand)"), + filterbots("marks all users as bot or not", 1), + sentiment("analyzes all tweets on positivity (about a brand)", 1), + wordcloud("makes a wordcloud of the text of the tweets", 1), exit("Returns to shell"), help("Get help"); @@ -142,15 +143,16 @@ public class FarmShell { } switch (command) { case query: - //make a new Analyzor - analyzor = new Analyzor(); - analyzor.Query(params[0]); + System.err.println("isn't supported anymore, now enter query after analysis type"); break; case filterbots: System.out.println("not yet implemented"); break; case sentiment: - analyzor.sentimentAnalysis(); + analyzor.sentimentAnalysis(params[0]); + break; + case wordcloud: + analyzor.makeWordCloud(params[0]); break; case help: for (String line : HELP) { -- cgit v1.2.1