summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authors123188 <s123188@S123188.campus.tue.nl>2014-05-18 21:37:04 +0200
committers123188 <s123188@S123188.campus.tue.nl>2014-05-18 21:37:04 +0200
commit1cffe65168ca81848ce31ac747b145c38f382edf (patch)
tree906340e13669b5a56634a8507c5d287a8f3a8796 /src
parent2485a30d1ffb69b6ff2c033f55cd13b9a7d05826 (diff)
downloadGoldfarmer-1cffe65168ca81848ce31ac747b145c38f382edf.tar.gz
wordcloud now outputs a csv file with "tweetid, word" to be used in disco. When the brandchecker works, wordcloud can use it to divide it into brands.
Diffstat (limited to 'src')
-rw-r--r--src/main/Analyzor.java53
1 files changed, 30 insertions, 23 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 4793b38..3457136 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -12,6 +12,7 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
+import java.util.Map.Entry;
import java.util.Scanner;
/**
@@ -139,7 +140,7 @@ public class Analyzor {
}
//makes a wordcloud of the tweets in the ResultSet data
- void makeWordCloud(String query) throws SQLException {
+ void makeWordCloud(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
query(query);
//go to the start of the ResultSet data
@@ -148,12 +149,14 @@ public class Analyzor {
return;
}
- //make the hashmap with the words and their frequency
- HashMap<String, Integer> wordcloud = new HashMap<>();
-
String text;
String[] words;
Integer value;
+ String tweetid;
+
+ PrintWriter writer = new PrintWriter("wordcloud.csv", "UTF-8");
+ //print the first row
+ writer.println("tweetid, word");
while (data.next()) {
//get the text
@@ -162,36 +165,41 @@ public class Analyzor {
text = removePunct(text);
text = text.toLowerCase();
words = text.split("\\s+");
-
- //count the words
+ //we use the tweetid as case id
+ tweetid = Long.toString(data.getLong("tweetid"));
+
for (String word : words) {
- value = wordcloud.get(word);
- if (value == null) {
- wordcloud.put(word, 1);
- } else {
- wordcloud.put(word, value++);
- }
+ writer.println(tweetid + ", " + word);
}
}
+ //print it in a csv file to put in disco
+
+ //print the first row
+
+ //print the values
+ writer.close();
}
-
+
//generate csv for disco from the query
- public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
+ //do the query
query(query);
PrintWriter writer = new PrintWriter("output.csv", "UTF-8");
- for(int i = 1; i<data.getMetaData().getColumnCount();i++){
- writer.print(data.getMetaData().getColumnLabel(i)+", ");
+ //print the first row
+ for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
+ writer.print(data.getMetaData().getColumnLabel(i) + ", ");
}
writer.println(data.getMetaData().getColumnLabel(data.getMetaData().getColumnCount()));
- while(data.next()){
- for(int i = 1; i<data.getMetaData().getColumnCount();i++){
- if(data.getObject(i)==null){
+ //print the values
+ while (data.next()) {
+ for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
+ if (data.getObject(i) == null) {
writer.print(", ");
} else {
- writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ")+", ");
+ writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ") + ", ");
}
}
- if(data.getObject(data.getMetaData().getColumnCount())==null){
+ if (data.getObject(data.getMetaData().getColumnCount()) == null) {
writer.println(" ");
} else {
writer.println(data.getObject(data.getMetaData().getColumnCount()).toString().replace(",", " "));
@@ -199,8 +207,7 @@ public class Analyzor {
}
writer.close();
}
-
-
+
//replaces punctuation so it will be splitted
//also removes urls
private String splitPunctToWords(String text) {