From 6ed880b928ceaee3935562c2eb975ddaa49a8530 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 4 Jun 2014 16:16:41 +0200 Subject: Formatting, remove unused imports --- src/main/Analyzor.java | 225 +++++++++++++++++++++++------------------------- src/main/FarmShell.java | 14 +-- 2 files changed, 116 insertions(+), 123 deletions(-) diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 40ec38a..5a201be 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -18,7 +18,6 @@ import java.sql.Timestamp; import java.util.List; import java.util.HashMap; import java.util.HashSet; -import java.util.Locale; import java.util.Map.Entry; import java.util.Scanner; @@ -186,14 +185,14 @@ public class Analyzor { String text; String brand; String[] words; - HashMap> wordcloud = new HashMap<>(); + HashMap> wordcloud = new HashMap<>(); while (data.next()) { //get brand brand = data.getString("brand"); //make hashmap for each brand - if(!wordcloud.containsKey(brand)){ - wordcloud.put(brand, new HashMap()); + if (!wordcloud.containsKey(brand)) { + wordcloud.put(brand, new HashMap()); } //get the text text = data.getString("text"); @@ -204,15 +203,14 @@ public class Analyzor { //for all words for (String word : words) { //if it is empty, a space or a stripe, skip it - if(word.equals("") || word.equals(" ") || word.equals("-")){ + if (word.equals("") || word.equals(" ") || word.equals("-")) { continue; } //if the word is already in the map, increment the amount - if(wordcloud.get(brand).containsKey(word)){ + if (wordcloud.get(brand).containsKey(word)) { wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1); - } - //if the word is not already in the map, make an entry with amount = 1 - else{ + } //if the word is not already in the map, make an entry with amount = 1 + else { wordcloud.get(brand).put(word, 1); } } @@ -251,14 +249,14 @@ public class Analyzor { /** * Obtain the brands of select tweet texts. - * + * * @param queryText The rows to select. * @param reset Whether to reset mentionsbrand. * @throws SQLException If the query is unsuccesfull. */ public void getBrands(String queryText, boolean reset) throws SQLException { BrandChecker checker = new BrandChecker("brandonlyrules.txt"); - + PreparedStatement statement; // make a connection to the database and execute the query if (reset) { @@ -266,7 +264,7 @@ public class Analyzor { statement = connection.prepareStatement("delete from mentionsbrand"); statement.executeUpdate(); } - + System.out.println("Obtaining all selected entries in tweet."); if (queryText.isEmpty()) { query("select * from tweet"); @@ -274,13 +272,13 @@ public class Analyzor { query(queryText); } System.out.println("Query finished."); - + NamedPreparedStatement insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand); - + int brandCount = 0; int count = 0; long timestamp = System.currentTimeMillis(); - + while (data.next()) { List brands = checker.getBrands(data.getString("text")); if (brands.isEmpty()) { @@ -294,112 +292,108 @@ public class Analyzor { insertBrand.executeUpdate(); } } - + count++; if (count % 10000 == 0) { System.out.println("Processed " + count + " tweets in " + (System.currentTimeMillis() - timestamp) + " ms"); } } - - System.out.println("Processed " + count + " tweets in " + (System.currentTimeMillis() - timestamp) + " ms"); + + System.out.println("Processed " + count + " tweets in " + (System.currentTimeMillis() - timestamp) + " ms"); System.out.println("Finished getBrands, processed " + count + " number of tweets, added " + brandCount + " brands or no."); } - + //gets the amount of users that tweet about a brand in a timezone //makes a csv file timezone, brand, amount - public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException { query(query); - + InputStream inFile = new FileInputStream("timezone.txt"); Scanner readFile = new Scanner(inFile); - HashMap toTimezone = new HashMap<>(); + HashMap toTimezone = new HashMap<>(); while (readFile.hasNextLine()) { String line = readFile.nextLine(); - if(line.split(",").length>1){ + if (line.split(",").length > 1) { toTimezone.put(line.split(",")[0], line.split(",")[1]); } } - + //hashmap timezone, brand, amount HashMap> timeMap = new HashMap<>(); String timezone; String brand; - - while(data.next()){ + + while (data.next()) { timezone = data.getString("timezone"); - if (toTimezone.containsKey(timezone)){ - timezone=toTimezone.get(timezone); + if (toTimezone.containsKey(timezone)) { + timezone = toTimezone.get(timezone); } else { - timezone="other"; + timezone = "other"; } brand = data.getString("brand"); - + //if the timezone is already in the map - if(timeMap.containsKey(timezone)){ + if (timeMap.containsKey(timezone)) { //if the brand for that timezone is already in the map - if(timeMap.get(timezone).containsKey(brand)){ + if (timeMap.get(timezone).containsKey(brand)) { //increment the amount timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1); - } - //if the brand for that timezone is not yet in the map - else{ + } //if the brand for that timezone is not yet in the map + else { //make a new entry for that brand with amount = 1 timeMap.get(timezone).put(brand, 1); } - } - //if the timezone is not yet in the map - else{ + } //if the timezone is not yet in the map + else { //make a new hashmap for this map and fill it with the brand and the amount timeMap.put(timezone, new HashMap()); timeMap.get(timezone).put(brand, 1); } } - + //make the CSV out of the map ssiMapToCSV(timeMap, "timezone.csv", "timezone,brand,count"); } - + //gets the positivity of the tweets about a brand //makes a csv file for posnegVisualizer - void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException { query(query); - + String brand; int rating; int ratingInterval; - + int intervalSize = 10; //brand, ratingInterval, amount HashMap> posnegMap = new HashMap<>(); /* - the rating interval is given by an integer, which is the result of the - tweets sentiment value divided by interval size rounded down. - This puts all data in boxes for the histogram. - */ - - while(data.next()){ - + the rating interval is given by an integer, which is the result of the + tweets sentiment value divided by interval size rounded down. + This puts all data in boxes for the histogram. + */ + + while (data.next()) { + brand = data.getString("brand"); rating = data.getInt("rating"); //ratingInterval is an integer divisible by intervalSize //if a rating is between a ratingInterval+-0.5*intervalSize, it belongs in that interval - ratingInterval = (rating + (int)(0.5 * intervalSize))/intervalSize*intervalSize; - + ratingInterval = (rating + (int) (0.5 * intervalSize)) / intervalSize * intervalSize; + //if the brand is already in the map - if(posnegMap.containsKey(brand)){ + if (posnegMap.containsKey(brand)) { //if the brand for that brand is already in the map - if(posnegMap.get(brand).containsKey(ratingInterval)){ + if (posnegMap.get(brand).containsKey(ratingInterval)) { //increment the amount posnegMap.get(brand).put(ratingInterval, posnegMap.get(brand).get(ratingInterval) + 1); - } - //if the brand for that brand is not yet in the map - else{ + } //if the brand for that brand is not yet in the map + else { //make a new entry for that brand with amount = 1 posnegMap.get(brand).put(ratingInterval, 1); } - } - //if the brand is not yet in the map - else{ + } //if the brand is not yet in the map + else { //make a new hashmap for this map and fill it with the brand and the amount posnegMap.put(brand, new HashMap()); posnegMap.get(brand).put(ratingInterval, 1); @@ -407,39 +401,39 @@ public class Analyzor { } siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count"); } - + /* - makes a csv for disco of a process of news spreading + makes a csv for disco of a process of news spreading - the query should be as follows: - - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply - - pick two tables of tweet (t1 and t2) and one of TYPEof - - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid - - t1.tweetid should be named maintweetid - - t2.tweetid should be named TYPEid - - t1.timestamp should be names maintime - - t2.timestamp should be named othertime - - t1.userid should be named mainuserid - - t2.userid should be named otheruserid + the query should be as follows: + - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply + - pick two tables of tweet (t1 and t2) and one of TYPEof + - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid + - t1.tweetid should be named maintweetid + - t2.tweetid should be named TYPEid + - t1.timestamp should be names maintime + - t2.timestamp should be named othertime + - t1.userid should be named mainuserid + - t2.userid should be named otheruserid - so the resulting tables should be: - maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid + so the resulting tables should be: + maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid - note that one of replyid and retweetid has to be null and the other a long for each row - how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns + note that one of replyid and retweetid has to be null and the other a long for each row + how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns - the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid - which corresponds to: caseID , activity , timestamp, resource , rescource - */ - void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid + which corresponds to: caseID , activity , timestamp, resource , rescource + */ + void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException { query(query); - + long maintweetID; long replyID; long retweetID; - + //tweetID, set of replyID's HashMap> hasReplies = new HashMap<>(); //tweetID, set of retweetID's @@ -448,20 +442,19 @@ public class Analyzor { HashMap timestamp = new HashMap<>(); //tweetID, its userID HashMap user = new HashMap<>(); - - while(data.next()){ - + + while (data.next()) { + maintweetID = data.getLong("thetweetid"); replyID = data.getLong("replyid"); retweetID = data.getLong("retweetid"); - + //put these in the corresponding maps //note that exact one of the two if statements below will hold - //if the replyID is not null - if(replyID != 0){ + if (replyID != 0) { //if this tweetID has no set yet, make one - if(hasReplies.get(maintweetID) == null){ + if (hasReplies.get(maintweetID) == null) { hasReplies.put(maintweetID, new HashSet()); } //add the replyID to the tweetID @@ -472,9 +465,9 @@ public class Analyzor { user.put(replyID, data.getLong("otheruser")); } //if the retweetID is not null - if(retweetID != 0){ + if (retweetID != 0) { //if this tweetID has no set yet, make one - if(hasRetweets.get(maintweetID) == null){ + if (hasRetweets.get(maintweetID) == null) { hasRetweets.put(maintweetID, new HashSet()); } //add the retweetID to the tweetID @@ -485,27 +478,27 @@ public class Analyzor { user.put(retweetID, data.getLong("otheruser")); } } - + //now use this data to make a csv for disco PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8"); //print the first line writer.println("caseID,activity,timestamp,tweet,user"); - + //print all replies - for(Long tweetid : hasReplies.keySet()){ - for(Long replyid : hasReplies.get(tweetid)){ + for (Long tweetid : hasReplies.keySet()) { + for (Long replyid : hasReplies.get(tweetid)) { writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid)); } } //print all retweets - for(Long tweetid : hasRetweets.keySet()){ - for(Long retweetid : hasRetweets.get(tweetid)){ + for (Long tweetid : hasRetweets.keySet()) { + for (Long retweetid : hasRetweets.get(tweetid)) { writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid)); } } writer.close(); } - + //replaces punctuation so it will be splitted //also removes urls private String splitPunctToWords(String text) { @@ -524,44 +517,44 @@ public class Analyzor { text = text.replaceAll("[^a-zA-Z0-9#_-]", " "); return text; } - + //prints a hashmap into a csv for a html application //Hashmap> becomes key1, key2, value //only for String, String, Integer - void ssiMapToCSV(HashMap> map, String fileName, String firstLine) - throws FileNotFoundException, UnsupportedEncodingException{ - + void ssiMapToCSV(HashMap> map, String fileName, String firstLine) + throws FileNotFoundException, UnsupportedEncodingException { + PrintWriter writer = new PrintWriter(fileName, "UTF-8"); - + writer.println(firstLine); - + //loop over brands - for(Entry en : map.entrySet()){ + for (Entry en : map.entrySet()) { //loop over words - for(Entry e : map.get(en.getKey()).entrySet()){ + for (Entry e : map.get(en.getKey()).entrySet()) { writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue()); } } - + writer.close(); System.out.println("csv file made, please put it next to html file and run this"); } - - void siiMapToCSV(HashMap> map, String fileName, String firstLine) - throws FileNotFoundException, UnsupportedEncodingException{ - + + void siiMapToCSV(HashMap> map, String fileName, String firstLine) + throws FileNotFoundException, UnsupportedEncodingException { + PrintWriter writer = new PrintWriter(fileName, "UTF-8"); - + writer.println(firstLine); - + //loop over brands - for(Entry en : map.entrySet()){ + for (Entry en : map.entrySet()) { //loop over words - for(Entry e : map.get(en.getKey()).entrySet()){ + for (Entry e : map.get(en.getKey()).entrySet()) { writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue()); } } - + writer.close(); System.out.println("csv file made, please put it next to html file and run this"); } diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 9e47a74..766e652 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -147,26 +147,26 @@ public class FarmShell { getAnalyzor().newsSpread(params[0]); break; case getBrands: - String trimmed = params[0].trim(); + String trimmed = params[0].trim(); String bool = trimmed; String query = null; - + int index = trimmed.indexOf(" "); - + if (index > -1) { - bool = trimmed.substring(0, index); + bool = trimmed.substring(0, index); query = trimmed.substring(index + 1, trimmed.length()); } - + boolean reset = false; if (bool.equals("true")) { reset = true; - } else if (bool.equals("false")){ + } else if (bool.equals("false")) { reset = false; } else { throw new IllegalArgumentException("getBrands: expected boolean, got " + params[0]); } - + if (query != null) { getAnalyzor().getBrands(query, reset); } else { -- cgit v1.2.1