diff options
author | s123188 <s123188@S123188.campus.tue.nl> | 2014-05-18 21:37:04 +0200 |
---|---|---|
committer | s123188 <s123188@S123188.campus.tue.nl> | 2014-05-18 21:37:04 +0200 |
commit | 1cffe65168ca81848ce31ac747b145c38f382edf (patch) | |
tree | 906340e13669b5a56634a8507c5d287a8f3a8796 /src | |
parent | 2485a30d1ffb69b6ff2c033f55cd13b9a7d05826 (diff) | |
download | Goldfarmer-1cffe65168ca81848ce31ac747b145c38f382edf.tar.gz |
wordcloud now outputs a csv file with "tweetid, word" to be used in disco. When the brandchecker works, wordcloud can use it to divide it into brands.
Diffstat (limited to 'src')
-rw-r--r-- | src/main/Analyzor.java | 53 |
1 files changed, 30 insertions, 23 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 4793b38..3457136 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -12,6 +12,7 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.HashMap; +import java.util.Map.Entry; import java.util.Scanner; /** @@ -139,7 +140,7 @@ public class Analyzor { } //makes a wordcloud of the tweets in the ResultSet data - void makeWordCloud(String query) throws SQLException { + void makeWordCloud(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException { query(query); //go to the start of the ResultSet data @@ -148,12 +149,14 @@ public class Analyzor { return; } - //make the hashmap with the words and their frequency - HashMap<String, Integer> wordcloud = new HashMap<>(); - String text; String[] words; Integer value; + String tweetid; + + PrintWriter writer = new PrintWriter("wordcloud.csv", "UTF-8"); + //print the first row + writer.println("tweetid, word"); while (data.next()) { //get the text @@ -162,36 +165,41 @@ public class Analyzor { text = removePunct(text); text = text.toLowerCase(); words = text.split("\\s+"); - - //count the words + //we use the tweetid as case id + tweetid = Long.toString(data.getLong("tweetid")); + for (String word : words) { - value = wordcloud.get(word); - if (value == null) { - wordcloud.put(word, 1); - } else { - wordcloud.put(word, value++); - } + writer.println(tweetid + ", " + word); } } + //print it in a csv file to put in disco + + //print the first row + + //print the values + writer.close(); } - + //generate csv for disco from the query - public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException { + //do the query query(query); PrintWriter writer = new PrintWriter("output.csv", "UTF-8"); - for(int i = 1; i<data.getMetaData().getColumnCount();i++){ - writer.print(data.getMetaData().getColumnLabel(i)+", "); + //print the first row + for (int i = 1; i < data.getMetaData().getColumnCount(); i++) { + writer.print(data.getMetaData().getColumnLabel(i) + ", "); } writer.println(data.getMetaData().getColumnLabel(data.getMetaData().getColumnCount())); - while(data.next()){ - for(int i = 1; i<data.getMetaData().getColumnCount();i++){ - if(data.getObject(i)==null){ + //print the values + while (data.next()) { + for (int i = 1; i < data.getMetaData().getColumnCount(); i++) { + if (data.getObject(i) == null) { writer.print(", "); } else { - writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ")+", "); + writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ") + ", "); } } - if(data.getObject(data.getMetaData().getColumnCount())==null){ + if (data.getObject(data.getMetaData().getColumnCount()) == null) { writer.println(" "); } else { writer.println(data.getObject(data.getMetaData().getColumnCount()).toString().replace(",", " ")); @@ -199,8 +207,7 @@ public class Analyzor { } writer.close(); } - - + //replaces punctuation so it will be splitted //also removes urls private String splitPunctToWords(String text) { |