summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authors123188 <s123188@S123188.campus.tue.nl>2014-05-31 18:07:22 +0200
committers123188 <s123188@S123188.campus.tue.nl>2014-05-31 18:07:22 +0200
commit89719fb5be745b6a6e71d12553ccf2946bffd6c4 (patch)
tree10eee696da7bd0e2a79242b66076d3a5ee657a8b
parenta39323f583e1878d3cdafed8de99b4614dab7682 (diff)
downloadGoldfarmer-89719fb5be745b6a6e71d12553ccf2946bffd6c4.tar.gz
made a method that makes a csv to show the newsspreading process in disco. Only looks at the replies/retweets of one tweet for a case, no tree by recursion (don't know how to show this in disco). Still needs testing.
-rw-r--r--src/main/Analyzor.java100
-rw-r--r--src/main/FarmShell.java12
2 files changed, 110 insertions, 2 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 89a8403..45e469f 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -14,8 +14,10 @@ import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
+import java.sql.Timestamp;
import java.util.List;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Scanner;
@@ -370,6 +372,104 @@ public class Analyzor {
siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
}
+ /*
+ makes a csv for disco of a process of news spreading
+
+
+ the query should be as follows:
+ - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
+ - pick two tables of tweet (t1 and t2) and one of TYPEof
+ - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
+ - t1.tweetid should be named maintweetid
+ - t2.tweetid should be named TYPEid
+ - t1.timestamp should be names maintime
+ - t2.timestamp should be named othertime
+ - t1.userid should be named mainuserid
+ - t2.userid should be named otheruserid
+
+ so the resulting tables should be:
+ maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
+
+ note that one of replyid and retweetid has to be null and the other a long for each row
+ how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
+
+
+ the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
+ which corresponds to: caseID , activity , timestamp, resource , rescource
+ */
+ void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ query(query);
+
+ long maintweetID;
+ long replyID;
+ long retweetID;
+
+ //tweetID, set of replyID's
+ HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>();
+ //tweetID, set of retweetID's
+ HashMap<Long, HashSet<Long>> hasRetweets = new HashMap<>();
+ //tweetID, its timestamp
+ HashMap<Long, Timestamp> timestamp = new HashMap<>();
+ //tweetID, its userID
+ HashMap<Long, Long> user = new HashMap<>();
+
+ while(data.next()){
+
+ maintweetID = data.getLong("thetweetid");
+ replyID = data.getLong("replyid");
+ retweetID = data.getLong("retweetid");
+
+ //put these in the corresponding maps
+ //note that exact one of the two if statements below will hold
+
+ //if the replyID is not null
+ if(replyID != 0){
+ //if this tweetID has no set yet, make one
+ if(hasReplies.get(maintweetID) == null){
+ hasReplies.put(maintweetID, new HashSet<Long>());
+ }
+ //add the replyID to the tweetID
+ hasReplies.get(maintweetID).add(replyID);
+ //store the time of the tweet
+ timestamp.put(replyID, data.getTimestamp("othertime"));
+ //store teh user of the tweet
+ user.put(replyID, data.getLong("otheruser"));
+ }
+ //if the retweetID is not null
+ if(retweetID != 0){
+ //if this tweetID has no set yet, make one
+ if(hasRetweets.get(maintweetID) == null){
+ hasRetweets.put(maintweetID, new HashSet<Long>());
+ }
+ //add the retweetID to the tweetID
+ hasRetweets.get(maintweetID).add(retweetID);
+ //store the time of the tweet
+ timestamp.put(retweetID, data.getTimestamp("othertime"));
+ //store teh user of the tweet
+ user.put(retweetID, data.getLong("otheruser"));
+ }
+ }
+
+ //now use this data to make a csv for disco
+ PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8");
+ //print the first line
+ writer.println("caseID,activity,timestamp,tweet,user");
+
+ //print all replies
+ for(Long tweetid : hasReplies.keySet()){
+ for(Long replyid : hasReplies.get(tweetid)){
+ writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid));
+ }
+ }
+ //print all retweets
+ for(Long tweetid : hasRetweets.keySet()){
+ for(Long retweetid : hasRetweets.get(tweetid)){
+ writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid));
+ }
+ }
+ writer.close();
+ }
+
//replaces punctuation so it will be splitted
//also removes urls
private String splitPunctToWords(String text) {
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 6bf350e..9342d0b 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -140,6 +140,12 @@ public class FarmShell {
case disco:
getAnalyzor().disco(params[0]);
break;
+ case posneg:
+ getAnalyzor().posNeg(params[0]);
+ break;
+ case newsspread:
+ getAnalyzor().newsSpread(params[0]);
+ break;
case getBrands:
getAnalyzor().getBrands();
break;
@@ -171,10 +177,12 @@ public class FarmShell {
filterbots("marks all users as bot or not", 1),
sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"),
- wordcloud("makes a wordcloud of the text of the tweets", 1),
+ wordcloud("makes a csv for a wordcloud of the text of the tweets", 1),
getBrands("fills the database with the brands of a tweet"),
- timezone("makes a map per brand for the users", 1),
+ timezone("makes a csv ", 1),
disco("makes a outputfile for disco", 1),
+ posneg("makes a csv for a histogram for positive or negative tweets", 1),
+ newsspread("makes a csv for disco to show a news spread process", 1),
exit("Returns to shell"),
help("Get help");