summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaurice Laveaux <m.laveaux@student.tue.nl>2014-06-04 13:36:34 +0200
committerMaurice Laveaux <m.laveaux@student.tue.nl>2014-06-04 13:36:34 +0200
commit8c274c0bcb451cb90c9870fcfb765e4d5a2b8b3b (patch)
tree5dd7d953faed3568b337329088aa7787dcb880ad
parent7dd0cc0d27e6b741eb29089ef836f98e47cc9329 (diff)
parent7b476a73d999a9be4e49247873add1c66ef49824 (diff)
downloadGoldfarmer-8c274c0bcb451cb90c9870fcfb765e4d5a2b8b3b.tar.gz
Merge remote-tracking branch 'origin/master'
Conflicts: src/main/Analyzor.java src/main/FarmShell.java
-rw-r--r--nbproject/project.properties162
-rw-r--r--src/main/Analyzor.java180
-rw-r--r--src/main/FarmShell.java11
3 files changed, 264 insertions, 89 deletions
diff --git a/nbproject/project.properties b/nbproject/project.properties
index ab8ae05..b262ab6 100644
--- a/nbproject/project.properties
+++ b/nbproject/project.properties
@@ -1,81 +1,81 @@
-annotation.processing.enabled=true
-annotation.processing.enabled.in.editor=false
-annotation.processing.processors.list=
-annotation.processing.run.all.processors=true
-annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
-application.title=Goldfarmer
-application.vendor=maurice
-build.classes.dir=${build.dir}/classes
-build.classes.excludes=**/*.java,**/*.form
-# This directory is removed when the project is cleaned:
-build.dir=build
-build.generated.dir=${build.dir}/generated
-build.generated.sources.dir=${build.dir}/generated-sources
-# Only compile against the classpath explicitly listed here:
-build.sysclasspath=ignore
-build.test.classes.dir=${build.dir}/test/classes
-build.test.results.dir=${build.dir}/test/results
-# Uncomment to specify the preferred debugger connection transport:
-#debug.transport=dt_socket
-debug.classpath=\
- ${run.classpath}
-debug.test.classpath=\
- ${run.test.classpath}
-# Files in build.classes.dir which should be excluded from distribution jar
-dist.archive.excludes=
-# This directory is removed when the project is cleaned:
-dist.dir=dist
-dist.jar=${dist.dir}/Goldfarmer.jar
-dist.javadoc.dir=${dist.dir}/javadoc
-endorsed.classpath=
-excludes=
-file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
-file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
-includes=**
-jar.compress=false
-javac.classpath=\
- ${file.reference.joda-time-2.3.jar}:\
- ${file.reference.postgresql-9.3-1101.jdbc41.jar}
-# Space-separated list of extra javac options
-javac.compilerargs=
-javac.deprecation=false
-javac.processorpath=\
- ${javac.classpath}
-javac.source=1.7
-javac.target=1.7
-javac.test.classpath=\
- ${javac.classpath}:\
- ${build.classes.dir}:\
- ${libs.junit_4.classpath}
-javac.test.processorpath=\
- ${javac.test.classpath}
-javadoc.additionalparam=
-javadoc.author=false
-javadoc.encoding=${source.encoding}
-javadoc.noindex=false
-javadoc.nonavbar=false
-javadoc.notree=false
-javadoc.private=false
-javadoc.splitindex=true
-javadoc.use=true
-javadoc.version=false
-javadoc.windowtitle=
-main.class=main.Main
-manifest.file=manifest.mf
-meta.inf.dir=${src.dir}/META-INF
-mkdist.disabled=false
-platform.active=default_platform
-project.licensePath=./nbproject/licenseheader.txt
-run.classpath=\
- ${javac.classpath}:\
- ${build.classes.dir}
-# Space-separated list of JVM arguments used when running the project.
-# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
-# To set system properties for unit tests define test-sys-prop.name=value:
-run.jvmargs=
-run.test.classpath=\
- ${javac.test.classpath}:\
- ${build.test.classes.dir}
-source.encoding=UTF-8
-src.dir=src
-test.src.dir=test
+annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=Goldfarmer
+application.vendor=maurice
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# Files in build.classes.dir which should be excluded from distribution jar
+dist.archive.excludes=
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/Goldfarmer.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${file.reference.joda-time-2.3.jar}:\
+ ${file.reference.postgresql-9.3-1101.jdbc41.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}:\
+ ${libs.junit_4.classpath}
+javac.test.processorpath=\
+ ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=main.Main
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+project.licensePath=./nbproject/licenseheader.txt
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=src
+test.src.dir=test
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 1560417..40ec38a 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -14,8 +14,10 @@ import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
+import java.sql.Timestamp;
import java.util.List;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Scanner;
@@ -188,7 +190,7 @@ public class Analyzor {
while (data.next()) {
//get brand
- brand=data.getString("brand");
+ brand = data.getString("brand");
//make hashmap for each brand
if(!wordcloud.containsKey(brand)){
wordcloud.put(brand, new HashMap<String,Integer>());
@@ -216,7 +218,7 @@ public class Analyzor {
}
}
//print the words and their frequency in a csv file
- mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
+ ssiMapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
}
//generate csv for disco from the query
@@ -318,8 +320,6 @@ public class Analyzor {
}
}
-
-
//hashmap timezone, brand, amount
HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
String timezone;
@@ -333,6 +333,7 @@ public class Analyzor {
timezone="other";
}
brand = data.getString("brand");
+
//if the timezone is already in the map
if(timeMap.containsKey(timezone)){
//if the brand for that timezone is already in the map
@@ -354,9 +355,155 @@ public class Analyzor {
}
}
-
//make the CSV out of the map
- mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
+ ssiMapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
+ }
+
+ //gets the positivity of the tweets about a brand
+ //makes a csv file for posnegVisualizer
+ void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ query(query);
+
+ String brand;
+ int rating;
+ int ratingInterval;
+
+ int intervalSize = 10;
+ //brand, ratingInterval, amount
+ HashMap<String, HashMap<Integer, Integer>> posnegMap = new HashMap<>();
+ /*
+ the rating interval is given by an integer, which is the result of the
+ tweets sentiment value divided by interval size rounded down.
+ This puts all data in boxes for the histogram.
+ */
+
+ while(data.next()){
+
+ brand = data.getString("brand");
+ rating = data.getInt("rating");
+ //ratingInterval is an integer divisible by intervalSize
+ //if a rating is between a ratingInterval+-0.5*intervalSize, it belongs in that interval
+ ratingInterval = (rating + (int)(0.5 * intervalSize))/intervalSize*intervalSize;
+
+ //if the brand is already in the map
+ if(posnegMap.containsKey(brand)){
+ //if the brand for that brand is already in the map
+ if(posnegMap.get(brand).containsKey(ratingInterval)){
+ //increment the amount
+ posnegMap.get(brand).put(ratingInterval, posnegMap.get(brand).get(ratingInterval) + 1);
+ }
+ //if the brand for that brand is not yet in the map
+ else{
+ //make a new entry for that brand with amount = 1
+ posnegMap.get(brand).put(ratingInterval, 1);
+ }
+ }
+ //if the brand is not yet in the map
+ else{
+ //make a new hashmap for this map and fill it with the brand and the amount
+ posnegMap.put(brand, new HashMap<Integer, Integer>());
+ posnegMap.get(brand).put(ratingInterval, 1);
+ }
+ }
+ siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
+ }
+
+ /*
+ makes a csv for disco of a process of news spreading
+
+
+ the query should be as follows:
+ - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
+ - pick two tables of tweet (t1 and t2) and one of TYPEof
+ - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
+ - t1.tweetid should be named maintweetid
+ - t2.tweetid should be named TYPEid
+ - t1.timestamp should be names maintime
+ - t2.timestamp should be named othertime
+ - t1.userid should be named mainuserid
+ - t2.userid should be named otheruserid
+
+ so the resulting tables should be:
+ maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
+
+ note that one of replyid and retweetid has to be null and the other a long for each row
+ how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
+
+
+ the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
+ which corresponds to: caseID , activity , timestamp, resource , rescource
+ */
+ void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ query(query);
+
+ long maintweetID;
+ long replyID;
+ long retweetID;
+
+ //tweetID, set of replyID's
+ HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>();
+ //tweetID, set of retweetID's
+ HashMap<Long, HashSet<Long>> hasRetweets = new HashMap<>();
+ //tweetID, its timestamp
+ HashMap<Long, Timestamp> timestamp = new HashMap<>();
+ //tweetID, its userID
+ HashMap<Long, Long> user = new HashMap<>();
+
+ while(data.next()){
+
+ maintweetID = data.getLong("thetweetid");
+ replyID = data.getLong("replyid");
+ retweetID = data.getLong("retweetid");
+
+ //put these in the corresponding maps
+ //note that exact one of the two if statements below will hold
+
+ //if the replyID is not null
+ if(replyID != 0){
+ //if this tweetID has no set yet, make one
+ if(hasReplies.get(maintweetID) == null){
+ hasReplies.put(maintweetID, new HashSet<Long>());
+ }
+ //add the replyID to the tweetID
+ hasReplies.get(maintweetID).add(replyID);
+ //store the time of the tweet
+ timestamp.put(replyID, data.getTimestamp("othertime"));
+ //store teh user of the tweet
+ user.put(replyID, data.getLong("otheruser"));
+ }
+ //if the retweetID is not null
+ if(retweetID != 0){
+ //if this tweetID has no set yet, make one
+ if(hasRetweets.get(maintweetID) == null){
+ hasRetweets.put(maintweetID, new HashSet<Long>());
+ }
+ //add the retweetID to the tweetID
+ hasRetweets.get(maintweetID).add(retweetID);
+ //store the time of the tweet
+ timestamp.put(retweetID, data.getTimestamp("othertime"));
+ //store teh user of the tweet
+ user.put(retweetID, data.getLong("otheruser"));
+ }
+ }
+
+ //now use this data to make a csv for disco
+ PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8");
+ //print the first line
+ writer.println("caseID,activity,timestamp,tweet,user");
+
+ //print all replies
+ for(Long tweetid : hasReplies.keySet()){
+ for(Long replyid : hasReplies.get(tweetid)){
+ writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid));
+ }
+ }
+ //print all retweets
+ for(Long tweetid : hasRetweets.keySet()){
+ for(Long retweetid : hasRetweets.get(tweetid)){
+ writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid));
+ }
+ }
+ writer.close();
}
//replaces punctuation so it will be splitted
@@ -381,7 +528,26 @@ public class Analyzor {
//prints a hashmap into a csv for a html application
//Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
//only for String, String, Integer
- void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
+ void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
+ throws FileNotFoundException, UnsupportedEncodingException{
+
+ PrintWriter writer = new PrintWriter(fileName, "UTF-8");
+
+ writer.println(firstLine);
+
+ //loop over brands
+ for(Entry en : map.entrySet()){
+ //loop over words
+ for(Entry e : map.get(en.getKey()).entrySet()){
+ writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
+ }
+ }
+
+ writer.close();
+ System.out.println("csv file made, please put it next to html file and run this");
+ }
+
+ void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine)
throws FileNotFoundException, UnsupportedEncodingException{
PrintWriter writer = new PrintWriter(fileName, "UTF-8");
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 1f2835c..5fa468e 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -140,6 +140,12 @@ public class FarmShell {
case disco:
getAnalyzor().disco(params[0]);
break;
+ case posneg:
+ getAnalyzor().posNeg(params[0]);
+ break;
+ case newsspread:
+ getAnalyzor().newsSpread(params[0]);
+ break;
case getBrands:
String trimmed = params[0].trim();
String bool = trimmed;
@@ -195,10 +201,13 @@ public class FarmShell {
filterbots("marks all users as bot or not", 1),
sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"),
- wordcloud("makes a wordcloud of the text of the tweets", 1),
+ wordcloud("makes a csv for a wordcloud of the text of the tweets", 1),
getBrands("fills the database with the brands of a tweet, arg: bool indicating whether to reset mentionsbrand (optional arg: tweet selection query", 1),
+ timezone("makes a csv ", 1),
timezone("makes a map per brand for the users", 1),
disco("makes a outputfile for disco", 1),
+ posneg("makes a csv for a histogram for positive or negative tweets", 1),
+ newsspread("makes a csv for disco to show a news spread process", 1),
exit("Returns to shell"),
help("Get help");