From 89719fb5be745b6a6e71d12553ccf2946bffd6c4 Mon Sep 17 00:00:00 2001 From: s123188 Date: Sat, 31 May 2014 18:07:22 +0200 Subject: made a method that makes a csv to show the newsspreading process in disco. Only looks at the replies/retweets of one tweet for a case, no tree by recursion (don't know how to show this in disco). Still needs testing. --- src/main/Analyzor.java | 100 ++++++++++++++++++++++++++++++++++++++++++++++++ src/main/FarmShell.java | 12 +++++- 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 89a8403..45e469f 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -14,8 +14,10 @@ import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; +import java.sql.Timestamp; import java.util.List; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; import java.util.Map.Entry; import java.util.Scanner; @@ -370,6 +372,104 @@ public class Analyzor { siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count"); } + /* + makes a csv for disco of a process of news spreading + + + the query should be as follows: + - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply + - pick two tables of tweet (t1 and t2) and one of TYPEof + - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid + - t1.tweetid should be named maintweetid + - t2.tweetid should be named TYPEid + - t1.timestamp should be names maintime + - t2.timestamp should be named othertime + - t1.userid should be named mainuserid + - t2.userid should be named otheruserid + + so the resulting tables should be: + maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid + + note that one of replyid and retweetid has to be null and the other a long for each row + how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns + + + the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid + which corresponds to: caseID , activity , timestamp, resource , rescource + */ + void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{ + query(query); + + long maintweetID; + long replyID; + long retweetID; + + //tweetID, set of replyID's + HashMap> hasReplies = new HashMap<>(); + //tweetID, set of retweetID's + HashMap> hasRetweets = new HashMap<>(); + //tweetID, its timestamp + HashMap timestamp = new HashMap<>(); + //tweetID, its userID + HashMap user = new HashMap<>(); + + while(data.next()){ + + maintweetID = data.getLong("thetweetid"); + replyID = data.getLong("replyid"); + retweetID = data.getLong("retweetid"); + + //put these in the corresponding maps + //note that exact one of the two if statements below will hold + + //if the replyID is not null + if(replyID != 0){ + //if this tweetID has no set yet, make one + if(hasReplies.get(maintweetID) == null){ + hasReplies.put(maintweetID, new HashSet()); + } + //add the replyID to the tweetID + hasReplies.get(maintweetID).add(replyID); + //store the time of the tweet + timestamp.put(replyID, data.getTimestamp("othertime")); + //store teh user of the tweet + user.put(replyID, data.getLong("otheruser")); + } + //if the retweetID is not null + if(retweetID != 0){ + //if this tweetID has no set yet, make one + if(hasRetweets.get(maintweetID) == null){ + hasRetweets.put(maintweetID, new HashSet()); + } + //add the retweetID to the tweetID + hasRetweets.get(maintweetID).add(retweetID); + //store the time of the tweet + timestamp.put(retweetID, data.getTimestamp("othertime")); + //store teh user of the tweet + user.put(retweetID, data.getLong("otheruser")); + } + } + + //now use this data to make a csv for disco + PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8"); + //print the first line + writer.println("caseID,activity,timestamp,tweet,user"); + + //print all replies + for(Long tweetid : hasReplies.keySet()){ + for(Long replyid : hasReplies.get(tweetid)){ + writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid)); + } + } + //print all retweets + for(Long tweetid : hasRetweets.keySet()){ + for(Long retweetid : hasRetweets.get(tweetid)){ + writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid)); + } + } + writer.close(); + } + //replaces punctuation so it will be splitted //also removes urls private String splitPunctToWords(String text) { diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 6bf350e..9342d0b 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -140,6 +140,12 @@ public class FarmShell { case disco: getAnalyzor().disco(params[0]); break; + case posneg: + getAnalyzor().posNeg(params[0]); + break; + case newsspread: + getAnalyzor().newsSpread(params[0]); + break; case getBrands: getAnalyzor().getBrands(); break; @@ -171,10 +177,12 @@ public class FarmShell { filterbots("marks all users as bot or not", 1), sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"), - wordcloud("makes a wordcloud of the text of the tweets", 1), + wordcloud("makes a csv for a wordcloud of the text of the tweets", 1), getBrands("fills the database with the brands of a tweet"), - timezone("makes a map per brand for the users", 1), + timezone("makes a csv ", 1), disco("makes a outputfile for disco", 1), + posneg("makes a csv for a histogram for positive or negative tweets", 1), + newsspread("makes a csv for disco to show a news spread process", 1), exit("Returns to shell"), help("Get help"); -- cgit v1.2.1