summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authors123188 <s123188@S123188.campus.tue.nl>2014-05-15 15:55:45 +0200
committers123188 <s123188@S123188.campus.tue.nl>2014-05-15 15:55:45 +0200
commitc6a31e656283ce85cf47e539c767d312b6e84d50 (patch)
tree06eeab24e7cc67471a98bf9858b15602144236cb /src
parent87908edbd832f8e8d63b11159a6ec331592521ed (diff)
downloadGoldfarmer-c6a31e656283ce85cf47e539c767d312b6e84d50.tar.gz
wordcloud implemented, still needs output tho
Diffstat (limited to 'src')
-rw-r--r--src/main/Analyzor.java42
-rw-r--r--src/main/FarmShell.java14
2 files changed, 44 insertions, 12 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 1d0e8aa..51e080e 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -61,7 +61,7 @@ public class Analyzor {
//query the database
//fills the ResultSet data
- void Query(String query) throws SQLException {
+ void query(String query) throws SQLException {
PreparedStatement statement;
//make a connection to the database and execute the query
@@ -72,7 +72,10 @@ public class Analyzor {
//analyzes the tweet on their positivity
//this is just a base version
- void sentimentAnalysis() throws SQLException, IOException {
+ void sentimentAnalysis(String query) throws SQLException, IOException {
+
+ query(query);
+
//read the lexicons
readLexicon();
@@ -121,7 +124,9 @@ public class Analyzor {
}
//makes a wordcloud of the tweets in the ResultSet data
- void makeWordCloud() throws SQLException {
+ void makeWordCloud(String query) throws SQLException {
+
+ query(query);
//go to the start of the ResultSet data
if (data == null) {
System.err.println("data is empty, try querying first");
@@ -130,17 +135,34 @@ public class Analyzor {
//make the hashmap with the words and their frequency
HashMap<String, Integer> wordcloud = new HashMap<>();
-
- //set the pointer at the start of the ResultSet
- data.beforeFirst();
+
+ String text;
+ String[] words;
+ Integer value;
while(data.next()){
//get the text
+ text = data.getString("text");
+ //remove punctuation, convert to lowercase and split on words
+ text = removePunct(text);
+ text = text.toLowerCase();
+ words = text.split("\\s+");
+ //count the words
+ for(String word : words){
+ value = wordcloud.get(word);
+ if(value == null){
+ wordcloud.put(word, 1);
+ }
+ else{
+ wordcloud.put(word, value++);
+ }
+ }
}
}
//replaces punctuation so it will be splitted
+ //also removes urls
private String replacePunct(String text) {
text = text.replaceAll("https?://\\S*", "");
text = text.replaceAll("[!?):;\"']", " $0");
@@ -148,4 +170,12 @@ public class Analyzor {
text = text.replaceAll("\\s[(\"']", "$0 ");
return text;
}
+
+ //removes punctuation
+ //also removes urls
+ private String removePunct(String text){
+ text = text.replaceAll("https?://\\S*", "");
+ text = text.replaceAll("[.,!?()-:;\"']", " ");
+ return text;
+ }
}
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index d56c72a..93cb928 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -92,8 +92,9 @@ public class FarmShell {
enum Command {
query("make a query to the database; needed to do analysis", 1),
- filterbots("marks all users as bot or not"),
- sentiment("analyzes all tweets on positivity (about a brand)"),
+ filterbots("marks all users as bot or not", 1),
+ sentiment("analyzes all tweets on positivity (about a brand)", 1),
+ wordcloud("makes a wordcloud of the text of the tweets", 1),
exit("Returns to shell"),
help("Get help");
@@ -142,15 +143,16 @@ public class FarmShell {
}
switch (command) {
case query:
- //make a new Analyzor
- analyzor = new Analyzor();
- analyzor.Query(params[0]);
+ System.err.println("isn't supported anymore, now enter query after analysis type");
break;
case filterbots:
System.out.println("not yet implemented");
break;
case sentiment:
- analyzor.sentimentAnalysis();
+ analyzor.sentimentAnalysis(params[0]);
+ break;
+ case wordcloud:
+ analyzor.makeWordCloud(params[0]);
break;
case help:
for (String line : HELP) {