diff options
author | Maurice Laveaux <m.laveaux@student.tue.nl> | 2014-06-04 13:36:34 +0200 |
---|---|---|
committer | Maurice Laveaux <m.laveaux@student.tue.nl> | 2014-06-04 13:36:34 +0200 |
commit | 8c274c0bcb451cb90c9870fcfb765e4d5a2b8b3b (patch) | |
tree | 5dd7d953faed3568b337329088aa7787dcb880ad | |
parent | 7dd0cc0d27e6b741eb29089ef836f98e47cc9329 (diff) | |
parent | 7b476a73d999a9be4e49247873add1c66ef49824 (diff) | |
download | Goldfarmer-8c274c0bcb451cb90c9870fcfb765e4d5a2b8b3b.tar.gz |
Merge remote-tracking branch 'origin/master'
Conflicts:
src/main/Analyzor.java
src/main/FarmShell.java
-rw-r--r-- | nbproject/project.properties | 162 | ||||
-rw-r--r-- | src/main/Analyzor.java | 180 | ||||
-rw-r--r-- | src/main/FarmShell.java | 11 |
3 files changed, 264 insertions, 89 deletions
diff --git a/nbproject/project.properties b/nbproject/project.properties index ab8ae05..b262ab6 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -1,81 +1,81 @@ -annotation.processing.enabled=true -annotation.processing.enabled.in.editor=false -annotation.processing.processors.list= -annotation.processing.run.all.processors=true -annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output -application.title=Goldfarmer -application.vendor=maurice -build.classes.dir=${build.dir}/classes -build.classes.excludes=**/*.java,**/*.form -# This directory is removed when the project is cleaned: -build.dir=build -build.generated.dir=${build.dir}/generated -build.generated.sources.dir=${build.dir}/generated-sources -# Only compile against the classpath explicitly listed here: -build.sysclasspath=ignore -build.test.classes.dir=${build.dir}/test/classes -build.test.results.dir=${build.dir}/test/results -# Uncomment to specify the preferred debugger connection transport: -#debug.transport=dt_socket -debug.classpath=\ - ${run.classpath} -debug.test.classpath=\ - ${run.test.classpath} -# Files in build.classes.dir which should be excluded from distribution jar -dist.archive.excludes= -# This directory is removed when the project is cleaned: -dist.dir=dist -dist.jar=${dist.dir}/Goldfarmer.jar -dist.javadoc.dir=${dist.dir}/javadoc -endorsed.classpath= -excludes= -file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar -file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar -includes=** -jar.compress=false -javac.classpath=\ - ${file.reference.joda-time-2.3.jar}:\ - ${file.reference.postgresql-9.3-1101.jdbc41.jar} -# Space-separated list of extra javac options -javac.compilerargs= -javac.deprecation=false -javac.processorpath=\ - ${javac.classpath} -javac.source=1.7 -javac.target=1.7 -javac.test.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir}:\ - ${libs.junit_4.classpath} -javac.test.processorpath=\ - ${javac.test.classpath} -javadoc.additionalparam= -javadoc.author=false -javadoc.encoding=${source.encoding} -javadoc.noindex=false -javadoc.nonavbar=false -javadoc.notree=false -javadoc.private=false -javadoc.splitindex=true -javadoc.use=true -javadoc.version=false -javadoc.windowtitle= -main.class=main.Main -manifest.file=manifest.mf -meta.inf.dir=${src.dir}/META-INF -mkdist.disabled=false -platform.active=default_platform -project.licensePath=./nbproject/licenseheader.txt -run.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir} -# Space-separated list of JVM arguments used when running the project. -# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value. -# To set system properties for unit tests define test-sys-prop.name=value: -run.jvmargs= -run.test.classpath=\ - ${javac.test.classpath}:\ - ${build.test.classes.dir} -source.encoding=UTF-8 -src.dir=src -test.src.dir=test +annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=Goldfarmer
+application.vendor=maurice
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# Files in build.classes.dir which should be excluded from distribution jar
+dist.archive.excludes=
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/Goldfarmer.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${file.reference.joda-time-2.3.jar}:\
+ ${file.reference.postgresql-9.3-1101.jdbc41.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}:\
+ ${libs.junit_4.classpath}
+javac.test.processorpath=\
+ ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=main.Main
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+project.licensePath=./nbproject/licenseheader.txt
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=src
+test.src.dir=test
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 1560417..40ec38a 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -14,8 +14,10 @@ import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Timestamp; import java.util.List; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; import java.util.Map.Entry; import java.util.Scanner; @@ -188,7 +190,7 @@ public class Analyzor { while (data.next()) { //get brand - brand=data.getString("brand"); + brand = data.getString("brand"); //make hashmap for each brand if(!wordcloud.containsKey(brand)){ wordcloud.put(brand, new HashMap<String,Integer>()); @@ -216,7 +218,7 @@ public class Analyzor { } } //print the words and their frequency in a csv file - mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count"); + ssiMapToCSV(wordcloud, "wordcloud.csv", "brand,word,count"); } //generate csv for disco from the query @@ -318,8 +320,6 @@ public class Analyzor { } } - - //hashmap timezone, brand, amount HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>(); String timezone; @@ -333,6 +333,7 @@ public class Analyzor { timezone="other"; } brand = data.getString("brand"); + //if the timezone is already in the map if(timeMap.containsKey(timezone)){ //if the brand for that timezone is already in the map @@ -354,9 +355,155 @@ public class Analyzor { } } - //make the CSV out of the map - mapToCSV(timeMap, "timezone.csv", "timezone,brand,count"); + ssiMapToCSV(timeMap, "timezone.csv", "timezone,brand,count"); + } + + //gets the positivity of the tweets about a brand + //makes a csv file for posnegVisualizer + void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + query(query); + + String brand; + int rating; + int ratingInterval; + + int intervalSize = 10; + //brand, ratingInterval, amount + HashMap<String, HashMap<Integer, Integer>> posnegMap = new HashMap<>(); + /* + the rating interval is given by an integer, which is the result of the + tweets sentiment value divided by interval size rounded down. + This puts all data in boxes for the histogram. + */ + + while(data.next()){ + + brand = data.getString("brand"); + rating = data.getInt("rating"); + //ratingInterval is an integer divisible by intervalSize + //if a rating is between a ratingInterval+-0.5*intervalSize, it belongs in that interval + ratingInterval = (rating + (int)(0.5 * intervalSize))/intervalSize*intervalSize; + + //if the brand is already in the map + if(posnegMap.containsKey(brand)){ + //if the brand for that brand is already in the map + if(posnegMap.get(brand).containsKey(ratingInterval)){ + //increment the amount + posnegMap.get(brand).put(ratingInterval, posnegMap.get(brand).get(ratingInterval) + 1); + } + //if the brand for that brand is not yet in the map + else{ + //make a new entry for that brand with amount = 1 + posnegMap.get(brand).put(ratingInterval, 1); + } + } + //if the brand is not yet in the map + else{ + //make a new hashmap for this map and fill it with the brand and the amount + posnegMap.put(brand, new HashMap<Integer, Integer>()); + posnegMap.get(brand).put(ratingInterval, 1); + } + } + siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count"); + } + + /* + makes a csv for disco of a process of news spreading + + + the query should be as follows: + - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply + - pick two tables of tweet (t1 and t2) and one of TYPEof + - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid + - t1.tweetid should be named maintweetid + - t2.tweetid should be named TYPEid + - t1.timestamp should be names maintime + - t2.timestamp should be named othertime + - t1.userid should be named mainuserid + - t2.userid should be named otheruserid + + so the resulting tables should be: + maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid + + note that one of replyid and retweetid has to be null and the other a long for each row + how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns + + + the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid + which corresponds to: caseID , activity , timestamp, resource , rescource + */ + void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + query(query); + + long maintweetID; + long replyID; + long retweetID; + + //tweetID, set of replyID's + HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>(); + //tweetID, set of retweetID's + HashMap<Long, HashSet<Long>> hasRetweets = new HashMap<>(); + //tweetID, its timestamp + HashMap<Long, Timestamp> timestamp = new HashMap<>(); + //tweetID, its userID + HashMap<Long, Long> user = new HashMap<>(); + + while(data.next()){ + + maintweetID = data.getLong("thetweetid"); + replyID = data.getLong("replyid"); + retweetID = data.getLong("retweetid"); + + //put these in the corresponding maps + //note that exact one of the two if statements below will hold + + //if the replyID is not null + if(replyID != 0){ + //if this tweetID has no set yet, make one + if(hasReplies.get(maintweetID) == null){ + hasReplies.put(maintweetID, new HashSet<Long>()); + } + //add the replyID to the tweetID + hasReplies.get(maintweetID).add(replyID); + //store the time of the tweet + timestamp.put(replyID, data.getTimestamp("othertime")); + //store teh user of the tweet + user.put(replyID, data.getLong("otheruser")); + } + //if the retweetID is not null + if(retweetID != 0){ + //if this tweetID has no set yet, make one + if(hasRetweets.get(maintweetID) == null){ + hasRetweets.put(maintweetID, new HashSet<Long>()); + } + //add the retweetID to the tweetID + hasRetweets.get(maintweetID).add(retweetID); + //store the time of the tweet + timestamp.put(retweetID, data.getTimestamp("othertime")); + //store teh user of the tweet + user.put(retweetID, data.getLong("otheruser")); + } + } + + //now use this data to make a csv for disco + PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8"); + //print the first line + writer.println("caseID,activity,timestamp,tweet,user"); + + //print all replies + for(Long tweetid : hasReplies.keySet()){ + for(Long replyid : hasReplies.get(tweetid)){ + writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid)); + } + } + //print all retweets + for(Long tweetid : hasRetweets.keySet()){ + for(Long retweetid : hasRetweets.get(tweetid)){ + writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid)); + } + } + writer.close(); } //replaces punctuation so it will be splitted @@ -381,7 +528,26 @@ public class Analyzor { //prints a hashmap into a csv for a html application //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value //only for String, String, Integer - void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) + void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) + throws FileNotFoundException, UnsupportedEncodingException{ + + PrintWriter writer = new PrintWriter(fileName, "UTF-8"); + + writer.println(firstLine); + + //loop over brands + for(Entry en : map.entrySet()){ + //loop over words + for(Entry e : map.get(en.getKey()).entrySet()){ + writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue()); + } + } + + writer.close(); + System.out.println("csv file made, please put it next to html file and run this"); + } + + void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine) throws FileNotFoundException, UnsupportedEncodingException{ PrintWriter writer = new PrintWriter(fileName, "UTF-8"); diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 1f2835c..5fa468e 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -140,6 +140,12 @@ public class FarmShell { case disco: getAnalyzor().disco(params[0]); break; + case posneg: + getAnalyzor().posNeg(params[0]); + break; + case newsspread: + getAnalyzor().newsSpread(params[0]); + break; case getBrands: String trimmed = params[0].trim(); String bool = trimmed; @@ -195,10 +201,13 @@ public class FarmShell { filterbots("marks all users as bot or not", 1), sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"), - wordcloud("makes a wordcloud of the text of the tweets", 1), + wordcloud("makes a csv for a wordcloud of the text of the tweets", 1), getBrands("fills the database with the brands of a tweet, arg: bool indicating whether to reset mentionsbrand (optional arg: tweet selection query", 1), + timezone("makes a csv ", 1), timezone("makes a map per brand for the users", 1), disco("makes a outputfile for disco", 1), + posneg("makes a csv for a histogram for positive or negative tweets", 1), + newsspread("makes a csv for disco to show a news spread process", 1), exit("Returns to shell"), help("Get help"); |