summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-06-04 16:16:41 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-06-04 16:16:41 +0200
commit6ed880b928ceaee3935562c2eb975ddaa49a8530 (patch)
tree391a20427ca47e2952ff115aeeba5e69ea764de3
parent554f33b510656d5de46eecd5fef11f237fd38043 (diff)
downloadGoldfarmer-6ed880b928ceaee3935562c2eb975ddaa49a8530.tar.gz
Formatting, remove unused imports
-rw-r--r--src/main/Analyzor.java225
-rw-r--r--src/main/FarmShell.java14
2 files changed, 116 insertions, 123 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 40ec38a..5a201be 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -18,7 +18,6 @@ import java.sql.Timestamp;
import java.util.List;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Locale;
import java.util.Map.Entry;
import java.util.Scanner;
@@ -186,14 +185,14 @@ public class Analyzor {
String text;
String brand;
String[] words;
- HashMap<String,HashMap<String, Integer>> wordcloud = new HashMap<>();
+ HashMap<String, HashMap<String, Integer>> wordcloud = new HashMap<>();
while (data.next()) {
//get brand
brand = data.getString("brand");
//make hashmap for each brand
- if(!wordcloud.containsKey(brand)){
- wordcloud.put(brand, new HashMap<String,Integer>());
+ if (!wordcloud.containsKey(brand)) {
+ wordcloud.put(brand, new HashMap<String, Integer>());
}
//get the text
text = data.getString("text");
@@ -204,15 +203,14 @@ public class Analyzor {
//for all words
for (String word : words) {
//if it is empty, a space or a stripe, skip it
- if(word.equals("") || word.equals(" ") || word.equals("-")){
+ if (word.equals("") || word.equals(" ") || word.equals("-")) {
continue;
}
//if the word is already in the map, increment the amount
- if(wordcloud.get(brand).containsKey(word)){
+ if (wordcloud.get(brand).containsKey(word)) {
wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
- }
- //if the word is not already in the map, make an entry with amount = 1
- else{
+ } //if the word is not already in the map, make an entry with amount = 1
+ else {
wordcloud.get(brand).put(word, 1);
}
}
@@ -251,14 +249,14 @@ public class Analyzor {
/**
* Obtain the brands of select tweet texts.
- *
+ *
* @param queryText The rows to select.
* @param reset Whether to reset mentionsbrand.
* @throws SQLException If the query is unsuccesfull.
*/
public void getBrands(String queryText, boolean reset) throws SQLException {
BrandChecker checker = new BrandChecker("brandonlyrules.txt");
-
+
PreparedStatement statement;
// make a connection to the database and execute the query
if (reset) {
@@ -266,7 +264,7 @@ public class Analyzor {
statement = connection.prepareStatement("delete from mentionsbrand");
statement.executeUpdate();
}
-
+
System.out.println("Obtaining all selected entries in tweet.");
if (queryText.isEmpty()) {
query("select * from tweet");
@@ -274,13 +272,13 @@ public class Analyzor {
query(queryText);
}
System.out.println("Query finished.");
-
+
NamedPreparedStatement insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
-
+
int brandCount = 0;
int count = 0;
long timestamp = System.currentTimeMillis();
-
+
while (data.next()) {
List<String> brands = checker.getBrands(data.getString("text"));
if (brands.isEmpty()) {
@@ -294,112 +292,108 @@ public class Analyzor {
insertBrand.executeUpdate();
}
}
-
+
count++;
if (count % 10000 == 0) {
System.out.println("Processed " + count + " tweets in " + (System.currentTimeMillis() - timestamp) + " ms");
}
}
-
- System.out.println("Processed " + count + " tweets in " + (System.currentTimeMillis() - timestamp) + " ms");
+
+ System.out.println("Processed " + count + " tweets in " + (System.currentTimeMillis() - timestamp) + " ms");
System.out.println("Finished getBrands, processed " + count + " number of tweets, added " + brandCount + " brands or no.");
}
-
+
//gets the amount of users that tweet about a brand in a timezone
//makes a csv file timezone, brand, amount
- public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
query(query);
-
+
InputStream inFile = new FileInputStream("timezone.txt");
Scanner readFile = new Scanner(inFile);
- HashMap<String,String> toTimezone = new HashMap<>();
+ HashMap<String, String> toTimezone = new HashMap<>();
while (readFile.hasNextLine()) {
String line = readFile.nextLine();
- if(line.split(",").length>1){
+ if (line.split(",").length > 1) {
toTimezone.put(line.split(",")[0], line.split(",")[1]);
}
}
-
+
//hashmap timezone, brand, amount
HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
String timezone;
String brand;
-
- while(data.next()){
+
+ while (data.next()) {
timezone = data.getString("timezone");
- if (toTimezone.containsKey(timezone)){
- timezone=toTimezone.get(timezone);
+ if (toTimezone.containsKey(timezone)) {
+ timezone = toTimezone.get(timezone);
} else {
- timezone="other";
+ timezone = "other";
}
brand = data.getString("brand");
-
+
//if the timezone is already in the map
- if(timeMap.containsKey(timezone)){
+ if (timeMap.containsKey(timezone)) {
//if the brand for that timezone is already in the map
- if(timeMap.get(timezone).containsKey(brand)){
+ if (timeMap.get(timezone).containsKey(brand)) {
//increment the amount
timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
- }
- //if the brand for that timezone is not yet in the map
- else{
+ } //if the brand for that timezone is not yet in the map
+ else {
//make a new entry for that brand with amount = 1
timeMap.get(timezone).put(brand, 1);
}
- }
- //if the timezone is not yet in the map
- else{
+ } //if the timezone is not yet in the map
+ else {
//make a new hashmap for this map and fill it with the brand and the amount
timeMap.put(timezone, new HashMap<String, Integer>());
timeMap.get(timezone).put(brand, 1);
}
}
-
+
//make the CSV out of the map
ssiMapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
}
-
+
//gets the positivity of the tweets about a brand
//makes a csv file for posnegVisualizer
- void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
query(query);
-
+
String brand;
int rating;
int ratingInterval;
-
+
int intervalSize = 10;
//brand, ratingInterval, amount
HashMap<String, HashMap<Integer, Integer>> posnegMap = new HashMap<>();
/*
- the rating interval is given by an integer, which is the result of the
- tweets sentiment value divided by interval size rounded down.
- This puts all data in boxes for the histogram.
- */
-
- while(data.next()){
-
+ the rating interval is given by an integer, which is the result of the
+ tweets sentiment value divided by interval size rounded down.
+ This puts all data in boxes for the histogram.
+ */
+
+ while (data.next()) {
+
brand = data.getString("brand");
rating = data.getInt("rating");
//ratingInterval is an integer divisible by intervalSize
//if a rating is between a ratingInterval+-0.5*intervalSize, it belongs in that interval
- ratingInterval = (rating + (int)(0.5 * intervalSize))/intervalSize*intervalSize;
-
+ ratingInterval = (rating + (int) (0.5 * intervalSize)) / intervalSize * intervalSize;
+
//if the brand is already in the map
- if(posnegMap.containsKey(brand)){
+ if (posnegMap.containsKey(brand)) {
//if the brand for that brand is already in the map
- if(posnegMap.get(brand).containsKey(ratingInterval)){
+ if (posnegMap.get(brand).containsKey(ratingInterval)) {
//increment the amount
posnegMap.get(brand).put(ratingInterval, posnegMap.get(brand).get(ratingInterval) + 1);
- }
- //if the brand for that brand is not yet in the map
- else{
+ } //if the brand for that brand is not yet in the map
+ else {
//make a new entry for that brand with amount = 1
posnegMap.get(brand).put(ratingInterval, 1);
}
- }
- //if the brand is not yet in the map
- else{
+ } //if the brand is not yet in the map
+ else {
//make a new hashmap for this map and fill it with the brand and the amount
posnegMap.put(brand, new HashMap<Integer, Integer>());
posnegMap.get(brand).put(ratingInterval, 1);
@@ -407,39 +401,39 @@ public class Analyzor {
}
siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
}
-
+
/*
- makes a csv for disco of a process of news spreading
+ makes a csv for disco of a process of news spreading
- the query should be as follows:
- - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
- - pick two tables of tweet (t1 and t2) and one of TYPEof
- - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
- - t1.tweetid should be named maintweetid
- - t2.tweetid should be named TYPEid
- - t1.timestamp should be names maintime
- - t2.timestamp should be named othertime
- - t1.userid should be named mainuserid
- - t2.userid should be named otheruserid
+ the query should be as follows:
+ - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
+ - pick two tables of tweet (t1 and t2) and one of TYPEof
+ - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
+ - t1.tweetid should be named maintweetid
+ - t2.tweetid should be named TYPEid
+ - t1.timestamp should be names maintime
+ - t2.timestamp should be named othertime
+ - t1.userid should be named mainuserid
+ - t2.userid should be named otheruserid
- so the resulting tables should be:
- maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
+ so the resulting tables should be:
+ maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
- note that one of replyid and retweetid has to be null and the other a long for each row
- how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
+ note that one of replyid and retweetid has to be null and the other a long for each row
+ how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
- the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
- which corresponds to: caseID , activity , timestamp, resource , rescource
- */
- void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
+ which corresponds to: caseID , activity , timestamp, resource , rescource
+ */
+ void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
query(query);
-
+
long maintweetID;
long replyID;
long retweetID;
-
+
//tweetID, set of replyID's
HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>();
//tweetID, set of retweetID's
@@ -448,20 +442,19 @@ public class Analyzor {
HashMap<Long, Timestamp> timestamp = new HashMap<>();
//tweetID, its userID
HashMap<Long, Long> user = new HashMap<>();
-
- while(data.next()){
-
+
+ while (data.next()) {
+
maintweetID = data.getLong("thetweetid");
replyID = data.getLong("replyid");
retweetID = data.getLong("retweetid");
-
+
//put these in the corresponding maps
//note that exact one of the two if statements below will hold
-
//if the replyID is not null
- if(replyID != 0){
+ if (replyID != 0) {
//if this tweetID has no set yet, make one
- if(hasReplies.get(maintweetID) == null){
+ if (hasReplies.get(maintweetID) == null) {
hasReplies.put(maintweetID, new HashSet<Long>());
}
//add the replyID to the tweetID
@@ -472,9 +465,9 @@ public class Analyzor {
user.put(replyID, data.getLong("otheruser"));
}
//if the retweetID is not null
- if(retweetID != 0){
+ if (retweetID != 0) {
//if this tweetID has no set yet, make one
- if(hasRetweets.get(maintweetID) == null){
+ if (hasRetweets.get(maintweetID) == null) {
hasRetweets.put(maintweetID, new HashSet<Long>());
}
//add the retweetID to the tweetID
@@ -485,27 +478,27 @@ public class Analyzor {
user.put(retweetID, data.getLong("otheruser"));
}
}
-
+
//now use this data to make a csv for disco
PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8");
//print the first line
writer.println("caseID,activity,timestamp,tweet,user");
-
+
//print all replies
- for(Long tweetid : hasReplies.keySet()){
- for(Long replyid : hasReplies.get(tweetid)){
+ for (Long tweetid : hasReplies.keySet()) {
+ for (Long replyid : hasReplies.get(tweetid)) {
writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid));
}
}
//print all retweets
- for(Long tweetid : hasRetweets.keySet()){
- for(Long retweetid : hasRetweets.get(tweetid)){
+ for (Long tweetid : hasRetweets.keySet()) {
+ for (Long retweetid : hasRetweets.get(tweetid)) {
writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid));
}
}
writer.close();
}
-
+
//replaces punctuation so it will be splitted
//also removes urls
private String splitPunctToWords(String text) {
@@ -524,44 +517,44 @@ public class Analyzor {
text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
return text;
}
-
+
//prints a hashmap into a csv for a html application
//Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
//only for String, String, Integer
- void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
- throws FileNotFoundException, UnsupportedEncodingException{
-
+ void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
+ throws FileNotFoundException, UnsupportedEncodingException {
+
PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-
+
writer.println(firstLine);
-
+
//loop over brands
- for(Entry en : map.entrySet()){
+ for (Entry en : map.entrySet()) {
//loop over words
- for(Entry e : map.get(en.getKey()).entrySet()){
+ for (Entry e : map.get(en.getKey()).entrySet()) {
writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
}
}
-
+
writer.close();
System.out.println("csv file made, please put it next to html file and run this");
}
-
- void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine)
- throws FileNotFoundException, UnsupportedEncodingException{
-
+
+ void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine)
+ throws FileNotFoundException, UnsupportedEncodingException {
+
PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-
+
writer.println(firstLine);
-
+
//loop over brands
- for(Entry en : map.entrySet()){
+ for (Entry en : map.entrySet()) {
//loop over words
- for(Entry e : map.get(en.getKey()).entrySet()){
+ for (Entry e : map.get(en.getKey()).entrySet()) {
writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
}
}
-
+
writer.close();
System.out.println("csv file made, please put it next to html file and run this");
}
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 9e47a74..766e652 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -147,26 +147,26 @@ public class FarmShell {
getAnalyzor().newsSpread(params[0]);
break;
case getBrands:
- String trimmed = params[0].trim();
+ String trimmed = params[0].trim();
String bool = trimmed;
String query = null;
-
+
int index = trimmed.indexOf(" ");
-
+
if (index > -1) {
- bool = trimmed.substring(0, index);
+ bool = trimmed.substring(0, index);
query = trimmed.substring(index + 1, trimmed.length());
}
-
+
boolean reset = false;
if (bool.equals("true")) {
reset = true;
- } else if (bool.equals("false")){
+ } else if (bool.equals("false")) {
reset = false;
} else {
throw new IllegalArgumentException("getBrands: expected boolean, got " + params[0]);
}
-
+
if (query != null) {
getAnalyzor().getBrands(query, reset);
} else {