summaryrefslogtreecommitdiff
path: root/src/main/Analyzor.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/Analyzor.java')
-rw-r--r--src/main/Analyzor.java100
1 files changed, 100 insertions, 0 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 89a8403..45e469f 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -14,8 +14,10 @@ import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
+import java.sql.Timestamp;
import java.util.List;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Map.Entry;
import java.util.Scanner;
@@ -370,6 +372,104 @@ public class Analyzor {
siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
}
+ /*
+ makes a csv for disco of a process of news spreading
+
+
+ the query should be as follows:
+ - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
+ - pick two tables of tweet (t1 and t2) and one of TYPEof
+ - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
+ - t1.tweetid should be named maintweetid
+ - t2.tweetid should be named TYPEid
+ - t1.timestamp should be names maintime
+ - t2.timestamp should be named othertime
+ - t1.userid should be named mainuserid
+ - t2.userid should be named otheruserid
+
+ so the resulting tables should be:
+ maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
+
+ note that one of replyid and retweetid has to be null and the other a long for each row
+ how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
+
+
+ the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
+ which corresponds to: caseID , activity , timestamp, resource , rescource
+ */
+ void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+ query(query);
+
+ long maintweetID;
+ long replyID;
+ long retweetID;
+
+ //tweetID, set of replyID's
+ HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>();
+ //tweetID, set of retweetID's
+ HashMap<Long, HashSet<Long>> hasRetweets = new HashMap<>();
+ //tweetID, its timestamp
+ HashMap<Long, Timestamp> timestamp = new HashMap<>();
+ //tweetID, its userID
+ HashMap<Long, Long> user = new HashMap<>();
+
+ while(data.next()){
+
+ maintweetID = data.getLong("thetweetid");
+ replyID = data.getLong("replyid");
+ retweetID = data.getLong("retweetid");
+
+ //put these in the corresponding maps
+ //note that exact one of the two if statements below will hold
+
+ //if the replyID is not null
+ if(replyID != 0){
+ //if this tweetID has no set yet, make one
+ if(hasReplies.get(maintweetID) == null){
+ hasReplies.put(maintweetID, new HashSet<Long>());
+ }
+ //add the replyID to the tweetID
+ hasReplies.get(maintweetID).add(replyID);
+ //store the time of the tweet
+ timestamp.put(replyID, data.getTimestamp("othertime"));
+ //store teh user of the tweet
+ user.put(replyID, data.getLong("otheruser"));
+ }
+ //if the retweetID is not null
+ if(retweetID != 0){
+ //if this tweetID has no set yet, make one
+ if(hasRetweets.get(maintweetID) == null){
+ hasRetweets.put(maintweetID, new HashSet<Long>());
+ }
+ //add the retweetID to the tweetID
+ hasRetweets.get(maintweetID).add(retweetID);
+ //store the time of the tweet
+ timestamp.put(retweetID, data.getTimestamp("othertime"));
+ //store teh user of the tweet
+ user.put(retweetID, data.getLong("otheruser"));
+ }
+ }
+
+ //now use this data to make a csv for disco
+ PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8");
+ //print the first line
+ writer.println("caseID,activity,timestamp,tweet,user");
+
+ //print all replies
+ for(Long tweetid : hasReplies.keySet()){
+ for(Long replyid : hasReplies.get(tweetid)){
+ writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid));
+ }
+ }
+ //print all retweets
+ for(Long tweetid : hasRetweets.keySet()){
+ for(Long retweetid : hasRetweets.get(tweetid)){
+ writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid));
+ }
+ }
+ writer.close();
+ }
+
//replaces punctuation so it will be splitted
//also removes urls
private String splitPunctToWords(String text) {