From a39323f583e1878d3cdafed8de99b4614dab7682 Mon Sep 17 00:00:00 2001
From: s123188 <s123188@S123188.campus.tue.nl>
Date: Sat, 31 May 2014 15:44:18 +0200
Subject: made a method for the posnegVisualizer

---
 src/main/Analyzor.java | 78 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 71 insertions(+), 7 deletions(-)

(limited to 'src')

diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index b896f62..89a8403 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -188,7 +188,7 @@ public class Analyzor {
 
         while (data.next()) {
             //get brand
-            brand=data.getString("brand");
+            brand = data.getString("brand");
             //make hashmap for each brand
             if(!wordcloud.containsKey(brand)){
                 wordcloud.put(brand, new HashMap<String,Integer>());
@@ -216,7 +216,7 @@ public class Analyzor {
             }
         }
         //print the words and their frequency in a csv file
-        mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
+        ssiMapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
     }
 
     //generate csv for disco from the query
@@ -284,8 +284,6 @@ public class Analyzor {
             }
         }
         
-        
-        
         //hashmap timezone, brand, amount
         HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
         String timezone;
@@ -299,6 +297,7 @@ public class Analyzor {
                 timezone="other";
             }
             brand = data.getString("brand");
+            
             //if the timezone is already in the map
             if(timeMap.containsKey(timezone)){
                 //if the brand for that timezone is already in the map
@@ -320,9 +319,55 @@ public class Analyzor {
             }
         }
         
-        
         //make the CSV out of the map
-        mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
+        ssiMapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
+    }
+    
+    //gets the positivity of the tweets about a brand
+    //makes a csv file for posnegVisualizer
+    void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+        query(query);
+        
+        String brand;
+        int rating;
+        int ratingInterval;
+        
+        int intervalSize = 10;
+        //brand, ratingInterval, amount
+        HashMap<String, HashMap<Integer, Integer>> posnegMap = new HashMap<>();
+        /*
+          the rating interval is given by an integer, which is the result of the
+          tweets sentiment value divided by interval size rounded down.
+          This puts all data in boxes for the histogram.
+        */
+        
+        while(data.next()){
+            
+            brand = data.getString("brand");
+            rating = data.getInt("rating");
+            ratingInterval = rating/intervalSize;
+            
+            //if the brand is already in the map
+            if(posnegMap.containsKey(brand)){
+                //if the brand for that brand is already in the map
+                if(posnegMap.get(brand).containsKey(ratingInterval)){
+                    //increment the amount
+                    posnegMap.get(brand).put(ratingInterval, posnegMap.get(brand).get(ratingInterval) + 1);
+                }
+                //if the brand for that brand is not yet in the map
+                else{
+                    //make a new entry for that brand with amount = 1
+                    posnegMap.get(brand).put(ratingInterval, 1);
+                }
+            }
+            //if the brand is not yet in the map
+            else{
+                //make a new hashmap for this map and fill it with the brand and the amount
+                posnegMap.put(brand, new HashMap<Integer, Integer>());
+                posnegMap.get(brand).put(ratingInterval, 1);
+            }
+        }
+        siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
     }
     
     //replaces punctuation so it will be splitted
@@ -347,7 +392,26 @@ public class Analyzor {
     //prints a hashmap into a csv for a html application
     //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
     //only for String, String, Integer
-    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) 
+    void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) 
+                throws FileNotFoundException, UnsupportedEncodingException{
+        
+        PrintWriter writer = new PrintWriter(fileName, "UTF-8");
+        
+        writer.println(firstLine);
+          
+        //loop over brands
+        for(Entry en : map.entrySet()){
+            //loop over words
+            for(Entry e : map.get(en.getKey()).entrySet()){
+                writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
+            }
+        }
+        
+        writer.close();
+        System.out.println("csv file made, please put it next to html file and run this");
+    }
+    
+    void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine) 
                 throws FileNotFoundException, UnsupportedEncodingException{
         
         PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-- 
cgit v1.2.1


From 89719fb5be745b6a6e71d12553ccf2946bffd6c4 Mon Sep 17 00:00:00 2001
From: s123188 <s123188@S123188.campus.tue.nl>
Date: Sat, 31 May 2014 18:07:22 +0200
Subject: made a method that makes a csv to show the newsspreading process in
 disco. Only looks at the replies/retweets of one tweet for a case, no tree by
 recursion (don't know how to show this in disco). Still needs testing.

---
 src/main/Analyzor.java  | 100 ++++++++++++++++++++++++++++++++++++++++++++++++
 src/main/FarmShell.java |  12 +++++-
 2 files changed, 110 insertions(+), 2 deletions(-)

(limited to 'src')

diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 89a8403..45e469f 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -14,8 +14,10 @@ import java.sql.Connection;
 import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
+import java.sql.Timestamp;
 import java.util.List;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Locale;
 import java.util.Map.Entry;
 import java.util.Scanner;
@@ -370,6 +372,104 @@ public class Analyzor {
         siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
     }
     
+    /*
+    makes a csv for disco of a process of news spreading
+    
+    
+    the query should be as follows:
+        - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
+            - pick two tables of tweet (t1 and t2) and one of TYPEof
+            - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
+            - t1.tweetid should be named maintweetid
+            - t2.tweetid should be named TYPEid
+            - t1.timestamp should be names maintime
+            - t2.timestamp should be named othertime
+            - t1.userid should be named mainuserid
+            - t2.userid should be named otheruserid
+    
+    so the resulting tables should be:
+    maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
+    
+    note that one of replyid and retweetid has to be null and the other a long for each row
+    how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
+    
+    
+    the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
+    which corresponds to: caseID                             , activity     , timestamp, resource                    , rescource
+    */ 
+    void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+        query(query);
+        
+        long maintweetID;
+        long replyID;
+        long retweetID;
+        
+        //tweetID, set of replyID's
+        HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>();
+        //tweetID, set of retweetID's
+        HashMap<Long, HashSet<Long>> hasRetweets = new HashMap<>();
+        //tweetID, its timestamp
+        HashMap<Long, Timestamp> timestamp = new HashMap<>();
+        //tweetID, its userID
+        HashMap<Long, Long> user = new HashMap<>();
+        
+        while(data.next()){
+            
+            maintweetID = data.getLong("thetweetid");
+            replyID = data.getLong("replyid");
+            retweetID = data.getLong("retweetid");
+            
+            //put these in the corresponding maps
+            //note that exact one of the two if statements below will hold
+            
+            //if the replyID is not null
+            if(replyID != 0){
+                //if this tweetID has no set yet, make one
+                if(hasReplies.get(maintweetID) == null){
+                    hasReplies.put(maintweetID, new HashSet<Long>());
+                }
+                //add the replyID to the tweetID
+                hasReplies.get(maintweetID).add(replyID);
+                //store the time of the tweet
+                timestamp.put(replyID, data.getTimestamp("othertime"));
+                //store teh user of the tweet
+                user.put(replyID, data.getLong("otheruser"));
+            }
+            //if the retweetID is not null
+            if(retweetID != 0){
+                //if this tweetID has no set yet, make one
+                if(hasRetweets.get(maintweetID) == null){
+                    hasRetweets.put(maintweetID, new HashSet<Long>());
+                }
+                //add the retweetID to the tweetID
+                hasRetweets.get(maintweetID).add(retweetID);
+                //store the time of the tweet
+                timestamp.put(retweetID, data.getTimestamp("othertime"));
+                //store teh user of the tweet
+                user.put(retweetID, data.getLong("otheruser"));
+            }
+        }
+        
+        //now use this data to make a csv for disco
+        PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8");
+        //print the first line
+        writer.println("caseID,activity,timestamp,tweet,user");
+        
+        //print all replies
+        for(Long tweetid : hasReplies.keySet()){
+            for(Long replyid : hasReplies.get(tweetid)){
+                writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid));
+            }
+        }
+        //print all retweets
+        for(Long tweetid : hasRetweets.keySet()){
+            for(Long retweetid : hasRetweets.get(tweetid)){
+                writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid));
+            }
+        }
+        writer.close();
+    }
+    
     //replaces punctuation so it will be splitted
     //also removes urls
     private String splitPunctToWords(String text) {
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 6bf350e..9342d0b 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -140,6 +140,12 @@ public class FarmShell {
             case disco:
                 getAnalyzor().disco(params[0]);
                 break;
+            case posneg:
+                getAnalyzor().posNeg(params[0]);
+                break;
+            case newsspread:
+                getAnalyzor().newsSpread(params[0]);
+                break;
             case getBrands:
                 getAnalyzor().getBrands();
                 break;
@@ -171,10 +177,12 @@ public class FarmShell {
 
         filterbots("marks all users as bot or not", 1),
         sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"),
-        wordcloud("makes a wordcloud of the text of the tweets", 1),
+        wordcloud("makes a csv for a wordcloud of the text of the tweets", 1),
         getBrands("fills the database with the brands of a tweet"),
-        timezone("makes a map per brand for the users", 1),
+        timezone("makes a csv ", 1),
         disco("makes a outputfile for disco", 1),
+        posneg("makes a csv for a histogram for positive or negative tweets", 1),
+        newsspread("makes a csv for disco to show a news spread process", 1),
         exit("Returns to shell"),
         help("Get help");
 
-- 
cgit v1.2.1


From 04e575923c129cb099b6332a5e18801e617d9c43 Mon Sep 17 00:00:00 2001
From: s123188 <s123188@S123188.campus.tue.nl>
Date: Mon, 2 Jun 2014 11:21:34 +0200
Subject: +5

---
 src/main/Analyzor.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 45e469f..b01c118 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -347,7 +347,7 @@ public class Analyzor {
             
             brand = data.getString("brand");
             rating = data.getInt("rating");
-            ratingInterval = rating/intervalSize;
+            ratingInterval = (rating+ 5)/intervalSize;
             
             //if the brand is already in the map
             if(posnegMap.containsKey(brand)){
-- 
cgit v1.2.1


From 7b476a73d999a9be4e49247873add1c66ef49824 Mon Sep 17 00:00:00 2001
From: s123188 <s123188@S123188.campus.tue.nl>
Date: Mon, 2 Jun 2014 11:32:04 +0200
Subject: improved ratinginterval in posneg

---
 src/main/Analyzor.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index b01c118..7b9def5 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -347,7 +347,9 @@ public class Analyzor {
             
             brand = data.getString("brand");
             rating = data.getInt("rating");
-            ratingInterval = (rating+ 5)/intervalSize;
+            //ratingInterval is an integer divisible by intervalSize
+            //if a rating is between a ratingInterval+-0.5*intervalSize, it belongs in that interval
+            ratingInterval = (rating + (int)(0.5 * intervalSize))/intervalSize*intervalSize;
             
             //if the brand is already in the map
             if(posnegMap.containsKey(brand)){
-- 
cgit v1.2.1