fixed

author: s123188 <s123188@S123188.campus.tue.nl> 2014-06-11 12:43:54 +0200
committer: s123188 <s123188@S123188.campus.tue.nl> 2014-06-11 12:43:54 +0200
commit: 36e2bf88e62ebb2865c0744a9d8c85bd372fbb69 (patch)
tree: 5a9de093a142b2a78f81c64b227c000510bcb0b1
parent: a45b8b54685e565eda2a6f81eee10c39974f1175 (diff)
parent: e14c671f3cbb56a08d765bd992e4cf774a0d1353 (diff)
download: Goldfarmer-36e2bf88e62ebb2865c0744a9d8c85bd372fbb69.tar.gz
5 files changed, 355 insertions, 280 deletions
diff --git a/brandonlyrules.txt b/brandonlyrules.txt
index fe4557d..00e4dab 100755
--- a/brandonlyrules.txt
+++ b/brandonlyrules.txt
@@ -1,70 +1,13 @@
-samsung - samsung - galaxy
-samsung -  galaxy - s5,s4,s3,zoom,note
-samsung - galaxy,s5
-samsung - samsung,s5
-samsung - galaxy,s4
-samsung - samsung,s4
-samsung - galaxy,s3
-samsung - samsung,s3
-samsung - galaxy,k,zoom
-samsung - samsung,k,zoom
-samsung - galaxy,note
-samsung - samsung,note
+samsung - samsung
 
-food - apple - iphone
-apple - iphone - 4,4s,5,5s,5c
-apple - iphone,4
-apple - iphone4
-apple - iphone,4s
-apple - iphone4s
-apple - iphone,5
-apple - iphone5
-apple - iphone,5s
-apple - iphone5s
-apple - iphone,5c
-apple - iphone5c
+no - apple - iphone
+apple - iphone
 
-huawei- huawei - ascend,p6,p7,mini,y300,y530,mate,g700,g510,g6,g525
-huawei - huawei,ascend
-huawei - huawei,p6
-huawei - huawei,p7
-huawei - huawei,mini
-huawei - huawei,y300
-huawei - huawei,y530
-huawei - huawei,mate
-huawei - huawei,g700
-huawei - huawei,g510
-huawei - huawei,g6
-huawei - huawei,g525
+huawei- huawei
 
-sony - sony - xperia,e1,z,z1,z2,compact,ZR,M
-sony - sony,xperia
-sony - sony,L
-sony - sony,E1
-sony - sony,Z
-sony - sony,Z1
-sony - sony,Z2
-sony - sony,compact
-sony - sony,ZR
-sony - sony,M
+sony - sony
 
-HTC - htc - one,m8,mini,desire,dual,x,sv
-HTC - htc,one
-HTC - htc,m8
-HTC - htc,mini
-HTC - htc,desire
-HTC - htc,x,dual
-HTC - htc,sv
+HTC - htc
 
-LG - lg - nexus,g2,l70,l90,flex,mini,l9,l7,l5,l3
-LG - nexus,5
-LG - lg,g2
-LG - lg,l70
-LG - lg,l90
-LG - lg,l40
-LG - lg,g,flex
-LG - lg,mini
-LG - lg,l9
-LG - lg,l7
-LG - lg,l5
-LG - lg,l3
+LG - nexus
+LG - lg
diff --git a/nbproject/project.properties b/nbproject/project.properties
index b262ab6..ab8ae05 100644
--- a/nbproject/project.properties
+++ b/nbproject/project.properties
@@ -1,81 +1,81 @@
-annotation.processing.enabled=true
-annotation.processing.enabled.in.editor=false
-annotation.processing.processors.list=
-annotation.processing.run.all.processors=true
-annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
-application.title=Goldfarmer
-application.vendor=maurice
-build.classes.dir=${build.dir}/classes
-build.classes.excludes=**/*.java,**/*.form
-# This directory is removed when the project is cleaned:
-build.dir=build
-build.generated.dir=${build.dir}/generated
-build.generated.sources.dir=${build.dir}/generated-sources
-# Only compile against the classpath explicitly listed here:
-build.sysclasspath=ignore
-build.test.classes.dir=${build.dir}/test/classes
-build.test.results.dir=${build.dir}/test/results
-# Uncomment to specify the preferred debugger connection transport:
-#debug.transport=dt_socket
-debug.classpath=\
-    ${run.classpath}
-debug.test.classpath=\
-    ${run.test.classpath}
-# Files in build.classes.dir which should be excluded from distribution jar
-dist.archive.excludes=
-# This directory is removed when the project is cleaned:
-dist.dir=dist
-dist.jar=${dist.dir}/Goldfarmer.jar
-dist.javadoc.dir=${dist.dir}/javadoc
-endorsed.classpath=
-excludes=
-file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
-file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
-includes=**
-jar.compress=false
-javac.classpath=\
-    ${file.reference.joda-time-2.3.jar}:\
-    ${file.reference.postgresql-9.3-1101.jdbc41.jar}
-# Space-separated list of extra javac options
-javac.compilerargs=
-javac.deprecation=false
-javac.processorpath=\
-    ${javac.classpath}
-javac.source=1.7
-javac.target=1.7
-javac.test.classpath=\
-    ${javac.classpath}:\
-    ${build.classes.dir}:\
-    ${libs.junit_4.classpath}
-javac.test.processorpath=\
-    ${javac.test.classpath}
-javadoc.additionalparam=
-javadoc.author=false
-javadoc.encoding=${source.encoding}
-javadoc.noindex=false
-javadoc.nonavbar=false
-javadoc.notree=false
-javadoc.private=false
-javadoc.splitindex=true
-javadoc.use=true
-javadoc.version=false
-javadoc.windowtitle=
-main.class=main.Main
-manifest.file=manifest.mf
-meta.inf.dir=${src.dir}/META-INF
-mkdist.disabled=false
-platform.active=default_platform
-project.licensePath=./nbproject/licenseheader.txt
-run.classpath=\
-    ${javac.classpath}:\
-    ${build.classes.dir}
-# Space-separated list of JVM arguments used when running the project.
-# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
-# To set system properties for unit tests define test-sys-prop.name=value:
-run.jvmargs=
-run.test.classpath=\
-    ${javac.test.classpath}:\
-    ${build.test.classes.dir}
-source.encoding=UTF-8
-src.dir=src
-test.src.dir=test
+annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=Goldfarmer
+application.vendor=maurice
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+    ${run.classpath}
+debug.test.classpath=\
+    ${run.test.classpath}
+# Files in build.classes.dir which should be excluded from distribution jar
+dist.archive.excludes=
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/Goldfarmer.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+    ${file.reference.joda-time-2.3.jar}:\
+    ${file.reference.postgresql-9.3-1101.jdbc41.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+    ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+    ${javac.classpath}:\
+    ${build.classes.dir}:\
+    ${libs.junit_4.classpath}
+javac.test.processorpath=\
+    ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=main.Main
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+project.licensePath=./nbproject/licenseheader.txt
+run.classpath=\
+    ${javac.classpath}:\
+    ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+    ${javac.test.classpath}:\
+    ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=src
+test.src.dir=test
diff --git a/src/database/BrandAnalyzerQueue.java b/src/database/BrandAnalyzerQueue.java
new file mode 100644
index 0000000..d4e4029
--- /dev/null
+++ b/src/database/BrandAnalyzerQueue.java
@@ -0,0 +1,88 @@
+package database;
+
+import analysis.BrandChecker;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ *
+ * @author Peter Wu
+ */
+public class BrandAnalyzerQueue implements Runnable {
+
+    private final BrandChecker checker;
+    private final ResultSet data;
+    private final BlockingQueue<Result> queue;
+    private volatile boolean last = false;
+
+    public BrandAnalyzerQueue(ResultSet data) {
+        this.checker = new BrandChecker("brandonlyrules.txt");
+        this.data = data;
+        this.queue = new ArrayBlockingQueue<>(1000);
+    }
+
+    private Logger getLogger() {
+        return Logger.getLogger(BrandAnalyzerQueue.class.getName());
+    }
+
+    @Override
+    public void run() {
+        try {
+            fillQueue();
+        } catch (SQLException ex) {
+            getLogger().log(Level.SEVERE, "Horrible! Database error", ex);
+        } catch (InterruptedException ex) {
+            getLogger().log(Level.SEVERE, "Interrupted!", ex);
+        }
+        try {
+            last = true;
+            queue.put(new Result(-1, null));
+        } catch (InterruptedException ex) {
+            getLogger().log(Level.SEVERE, "Failed to insert suicide pill!");
+        }
+    }
+
+    private void fillQueue() throws SQLException, InterruptedException {
+        while (data.next()) {
+            List<String> brands = checker.getBrands(data.getString("text"));
+            // if there is no brand, add a dummy so we know it got checked
+            if (brands.isEmpty()) {
+                brands.add("no");
+            }
+            long tweetid = data.getLong("tweetid");
+            Result result = new Result(tweetid, brands);
+            queue.put(result);
+        }
+    }
+
+    public Result next() {
+        Result result = null;
+        try {
+            if (!last) {
+                result = queue.take();
+                if (result.brands == null) {
+                    result = null;
+                }
+            }
+        } catch (InterruptedException ex) {
+            getLogger().log(Level.SEVERE, "Interrupted!", ex);
+        }
+        return result;
+    }
+
+    public static class Result {
+
+        public final long tweetid;
+        public final List<String> brands;
+
+        public Result(long tweetid, List<String> brands) {
+            this.tweetid = tweetid;
+            this.brands = brands;
+        }
+    }
+}
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 7685cdb..b620909 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -1,6 +1,7 @@
 package main;
 
 import analysis.BrandChecker;
+import database.BrandAnalyzerQueue;
 import database.NamedPreparedStatement;
 import database.QueryUtils;
 import java.io.File;
@@ -15,10 +16,8 @@ import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Timestamp;
-import java.util.List;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.Locale;
 import java.util.Map.Entry;
 import java.util.Scanner;
 
@@ -186,14 +185,14 @@ public class Analyzor {
         String text;
         String brand;
         String[] words;
-        HashMap<String,HashMap<String, Integer>> wordcloud = new HashMap<>();
+        HashMap<String, HashMap<String, Integer>> wordcloud = new HashMap<>();
 
         while (data.next()) {
             //get brand
             brand = data.getString("brand");
             //make hashmap for each brand
-            if(!wordcloud.containsKey(brand)){
-                wordcloud.put(brand, new HashMap<String,Integer>());
+            if (!wordcloud.containsKey(brand)) {
+                wordcloud.put(brand, new HashMap<String, Integer>());
             }
             //get the text
             text = data.getString("text");
@@ -204,15 +203,14 @@ public class Analyzor {
             //for all words
             for (String word : words) {
                 //if it is empty, a space or a stripe, skip it
-                if(word.equals("") || word.equals(" ") || word.equals("-")){
+                if (word.equals("") || word.equals(" ") || word.equals("-")) {
                     continue;
                 }
                 //if the word is already in the map, increment the amount
-                if(wordcloud.get(brand).containsKey(word)){
+                if (wordcloud.get(brand).containsKey(word)) {
                     wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
-                }
-                //if the word is not already in the map, make an entry with amount = 1
-                else{
+                } //if the word is not already in the map, make an entry with amount = 1
+                else {
                     wordcloud.get(brand).put(word, 1);
                 }
             }
@@ -249,123 +247,149 @@ public class Analyzor {
         writer.close();
     }
 
-    public void getBrands() throws SQLException {
+    /**
+     * Obtain the brands of select tweet texts.
+     *
+     * @param queryText The rows to select.
+     * @param reset Whether to reset mentionsbrand.
+     * @throws SQLException If the query is unsuccesfull.
+     */
+    public void getBrands(String queryText, boolean reset) throws SQLException {
         PreparedStatement statement;
-        //make a connection to the database and execute the query
-        statement = connection.prepareStatement("delete from mentionsbrand");
-        statement.executeUpdate();
-        BrandChecker checker = new BrandChecker("brandonlyrules.txt");
-        query("select * from tweet");
-        NamedPreparedStatement m_insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
-        while (data.next()) {
-            List<String> brands = checker.getBrands(data.getString("text"));
-            if (brands.isEmpty()) {
-                QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), "no");
-                m_insertBrand.executeUpdate();
-            } else {
-                for (String brand : brands) {
-                    QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), brand);
-                    m_insertBrand.executeUpdate();
-                }
+        // make a connection to the database and execute the query
+        if (reset) {
+            System.out.println("Cleaning old entries of mentionsbrand.");
+            statement = connection.prepareStatement("delete from mentionsbrand");
+            statement.executeUpdate();
+        }
+
+        System.out.println("Obtaining all selected entries in tweet.");
+        if (queryText.isEmpty()) {
+            query("select * from tweet");
+        } else {
+            query(queryText);
+        }
+        System.out.println("Query finished.");
+
+        NamedPreparedStatement insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
+
+        int brandCount = 0;
+        int count = 0;
+        long timestamp = System.currentTimeMillis();
+        BrandAnalyzerQueue analyzer = new BrandAnalyzerQueue(data);
+        BrandAnalyzerQueue.Result result;
+        new Thread(analyzer).start();
+        while ((result = analyzer.next()) != null) {
+            for (String brand : result.brands) {
+                QueryUtils.setInsertBrandParams(insertBrand, result.tweetid, brand);
+                insertBrand.executeUpdate();
+            }
+
+            brandCount += result.brands.size();
+            count++;
+            if (count % 10000 == 0) {
+                System.err.println("Processed " + count + " tweets, inserted "
+                        + brandCount + " in " + ((System.currentTimeMillis() - timestamp) / 1000) + " sec");
             }
         }
+
+        System.err.println("Processed " + count + " tweets in "
+                + ((System.currentTimeMillis() - timestamp) / 1000) + " sec");
+        System.err.println("Finished getBrands, processed " + count
+                + " number of tweets, added " + brandCount + " brands or no.");
     }
 
     //gets the amount of users that tweet about a brand in a timezone
     //makes a csv file timezone, brand, amount
-    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
         query(query);
-        
+
         InputStream inFile = new FileInputStream("timezone.txt");
         Scanner readFile = new Scanner(inFile);
-        HashMap<String,String> toTimezone = new HashMap<>();
+        HashMap<String, String> toTimezone = new HashMap<>();
         while (readFile.hasNextLine()) {
             String line = readFile.nextLine();
-            if(line.split(",").length>1){
+            if (line.split(",").length > 1) {
                 toTimezone.put(line.split(",")[0], line.split(",")[1]);
             }
         }
-        
+
         //hashmap timezone, brand, amount
         HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
         String timezone;
         String brand;
-        
-        while(data.next()){
+
+        while (data.next()) {
             timezone = data.getString("timezone");
-            if (toTimezone.containsKey(timezone)){
-                timezone=toTimezone.get(timezone);
+            if (toTimezone.containsKey(timezone)) {
+                timezone = toTimezone.get(timezone);
             } else {
-                timezone="other";
+                timezone = "other";
             }
             brand = data.getString("brand");
-            
+
             //if the timezone is already in the map
-            if(timeMap.containsKey(timezone)){
+            if (timeMap.containsKey(timezone)) {
                 //if the brand for that timezone is already in the map
-                if(timeMap.get(timezone).containsKey(brand)){
+                if (timeMap.get(timezone).containsKey(brand)) {
                     //increment the amount
                     timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
-                }
-                //if the brand for that timezone is not yet in the map
-                else{
+                } //if the brand for that timezone is not yet in the map
+                else {
                     //make a new entry for that brand with amount = 1
                     timeMap.get(timezone).put(brand, 1);
                 }
-            }
-            //if the timezone is not yet in the map
-            else{
+            } //if the timezone is not yet in the map
+            else {
                 //make a new hashmap for this map and fill it with the brand and the amount
                 timeMap.put(timezone, new HashMap<String, Integer>());
                 timeMap.get(timezone).put(brand, 1);
             }
         }
-        
+
         //make the CSV out of the map
         ssiMapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
     }
-    
+
     //gets the positivity of the tweets about a brand
     //makes a csv file for posnegVisualizer
-    void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+    void posNeg(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
         query(query);
-        
+
         String brand;
         int rating;
         int ratingInterval;
-        
+
         int intervalSize = 10;
         //brand, ratingInterval, amount
         HashMap<String, HashMap<Integer, Integer>> posnegMap = new HashMap<>();
         /*
-          the rating interval is given by an integer, which is the result of the
-          tweets sentiment value divided by interval size rounded down.
-          This puts all data in boxes for the histogram.
-        */
-        
-        while(data.next()){
-            
+         the rating interval is given by an integer, which is the result of the
+         tweets sentiment value divided by interval size rounded down.
+         This puts all data in boxes for the histogram.
+         */
+
+        while (data.next()) {
+
             brand = data.getString("brand");
             rating = data.getInt("rating");
             //ratingInterval is an integer divisible by intervalSize
             //if a rating is between a ratingInterval+-0.5*intervalSize, it belongs in that interval
-            ratingInterval = (rating + (int)(0.5 * intervalSize))/intervalSize*intervalSize;
-            
+            ratingInterval = (rating + (int) (0.5 * intervalSize)) / intervalSize * intervalSize;
+
             //if the brand is already in the map
-            if(posnegMap.containsKey(brand)){
+            if (posnegMap.containsKey(brand)) {
                 //if the brand for that brand is already in the map
-                if(posnegMap.get(brand).containsKey(ratingInterval)){
+                if (posnegMap.get(brand).containsKey(ratingInterval)) {
                     //increment the amount
                     posnegMap.get(brand).put(ratingInterval, posnegMap.get(brand).get(ratingInterval) + 1);
-                }
-                //if the brand for that brand is not yet in the map
-                else{
+                } //if the brand for that brand is not yet in the map
+                else {
                     //make a new entry for that brand with amount = 1
                     posnegMap.get(brand).put(ratingInterval, 1);
                 }
-            }
-            //if the brand is not yet in the map
-            else{
+            } //if the brand is not yet in the map
+            else {
                 //make a new hashmap for this map and fill it with the brand and the amount
                 posnegMap.put(brand, new HashMap<Integer, Integer>());
                 posnegMap.get(brand).put(ratingInterval, 1);
@@ -373,39 +397,39 @@ public class Analyzor {
         }
         siiMapToCSV(posnegMap, "posneg.csv", "brand,ratingInterval,count");
     }
-    
+
     /*
-    makes a csv for disco of a process of news spreading
+     makes a csv for disco of a process of news spreading
     
     
-    the query should be as follows:
-        - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
-            - pick two tables of tweet (t1 and t2) and one of TYPEof
-            - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
-            - t1.tweetid should be named maintweetid
-            - t2.tweetid should be named TYPEid
-            - t1.timestamp should be names maintime
-            - t2.timestamp should be named othertime
-            - t1.userid should be named mainuserid
-            - t2.userid should be named otheruserid
+     the query should be as follows:
+     - it should be a union of the following query twice, once with TYPE = retweet, once with TYPE = reply
+     - pick two tables of tweet (t1 and t2) and one of TYPEof
+     - t1.tweetid = TYPEof.TYPEonid and t2.tweetid = TYPEof.TYPEid
+     - t1.tweetid should be named maintweetid
+     - t2.tweetid should be named TYPEid
+     - t1.timestamp should be names maintime
+     - t2.timestamp should be named othertime
+     - t1.userid should be named mainuserid
+     - t2.userid should be named otheruserid
     
-    so the resulting tables should be:
-    maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
+     so the resulting tables should be:
+     maintweetid, maintime, mainuserid, replyid, retweetid, othertime, otheruserid
     
-    note that one of replyid and retweetid has to be null and the other a long for each row
-    how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
+     note that one of replyid and retweetid has to be null and the other a long for each row
+     how to do this: http://stackoverflow.com/questions/2309943/unioning-two-tables-with-different-number-of-columns
     
     
-    the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
-    which corresponds to: caseID                             , activity     , timestamp, resource                    , rescource
-    */ 
-    void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+     the csv will contain: tweetID of the replied/retweeted on, reply/retweet, timestamp, tweetid of the reply/retweet, userid
+     which corresponds to: caseID                             , activity     , timestamp, resource                    , rescource
+     */
+    void newsSpread(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
         query(query);
-        
+
         long maintweetID;
         long replyID;
         long retweetID;
-        
+
         //tweetID, set of replyID's
         HashMap<Long, HashSet<Long>> hasReplies = new HashMap<>();
         //tweetID, set of retweetID's
@@ -414,20 +438,19 @@ public class Analyzor {
         HashMap<Long, Timestamp> timestamp = new HashMap<>();
         //tweetID, its userID
         HashMap<Long, Long> user = new HashMap<>();
-        
-        while(data.next()){
-            
+
+        while (data.next()) {
+
             maintweetID = data.getLong("thetweetid");
             replyID = data.getLong("replyid");
             retweetID = data.getLong("retweetid");
-            
+
             //put these in the corresponding maps
             //note that exact one of the two if statements below will hold
-            
             //if the replyID is not null
-            if(replyID != 0){
+            if (replyID != 0) {
                 //if this tweetID has no set yet, make one
-                if(hasReplies.get(maintweetID) == null){
+                if (hasReplies.get(maintweetID) == null) {
                     hasReplies.put(maintweetID, new HashSet<Long>());
                 }
                 //add the replyID to the tweetID
@@ -438,9 +461,9 @@ public class Analyzor {
                 user.put(replyID, data.getLong("otheruser"));
             }
             //if the retweetID is not null
-            if(retweetID != 0){
+            if (retweetID != 0) {
                 //if this tweetID has no set yet, make one
-                if(hasRetweets.get(maintweetID) == null){
+                if (hasRetweets.get(maintweetID) == null) {
                     hasRetweets.put(maintweetID, new HashSet<Long>());
                 }
                 //add the retweetID to the tweetID
@@ -451,28 +474,28 @@ public class Analyzor {
                 user.put(retweetID, data.getLong("otheruser"));
             }
         }
-        
+
         //now use this data to make a csv for disco
         PrintWriter writer = new PrintWriter("newsSpread.csv", "UTF-8");
         //print the first line
         writer.println("caseID,activity,timestamp,tweet,user");
-        
+
         //print all replies
-        for(Long tweetid : hasReplies.keySet()){
-            for(Long replyid : hasReplies.get(tweetid)){
+        for (Long tweetid : hasReplies.keySet()) {
+            for (Long replyid : hasReplies.get(tweetid)) {
                 writer.println(tweetid + ", reply, " + timestamp.get(replyid) + ", " + replyid + ", " + user.get(replyid));
             }
         }
         //print all retweets
-        for(Long tweetid : hasRetweets.keySet()){
-            for(Long retweetid : hasRetweets.get(tweetid)){
+        for (Long tweetid : hasRetweets.keySet()) {
+            for (Long retweetid : hasRetweets.get(tweetid)) {
                 writer.println(tweetid + ", retweet, " + timestamp.get(retweetid) + ", " + retweetid + ", " + user.get(retweetid));
             }
         }
         writer.close();
     }
     
-    void categorize(String file) throws FileNotFoundException, UnsupportedEncodingException{
+        void categorize(String file) throws FileNotFoundException, UnsupportedEncodingException{
         
         //get the division in categories
         InputStream inFile = new FileInputStream("categories.txt");
@@ -482,7 +505,9 @@ public class Analyzor {
         while (readFile.hasNextLine()) {
             String line = readFile.nextLine();
             if(line.split(",").length>1){
-                toCategory.put(line.split(",")[0], line.split(",")[1]);
+                for(String element:line.split(",")[1].split(" ")){
+                    toCategory.put(element, line.split(",")[0]);
+                }
             }
         }
         
@@ -521,7 +546,7 @@ public class Analyzor {
         }
         writer.close();
     }
-    
+
     //replaces punctuation so it will be splitted
     //also removes urls
     private String splitPunctToWords(String text) {
@@ -540,44 +565,44 @@ public class Analyzor {
         text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
         return text;
     }
-    
+
     //prints a hashmap into a csv for a html application
     //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
     //only for String, String, Integer
-    void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) 
-                throws FileNotFoundException, UnsupportedEncodingException{
-        
+    void ssiMapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
+            throws FileNotFoundException, UnsupportedEncodingException {
+
         PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-        
+
         writer.println(firstLine);
-          
+
         //loop over brands
-        for(Entry en : map.entrySet()){
+        for (Entry en : map.entrySet()) {
             //loop over words
-            for(Entry e : map.get(en.getKey()).entrySet()){
+            for (Entry e : map.get(en.getKey()).entrySet()) {
                 writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
             }
         }
-        
+
         writer.close();
         System.out.println("csv file made, please put it next to html file and run this");
     }
-    
-    void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine) 
-                throws FileNotFoundException, UnsupportedEncodingException{
-        
+
+    void siiMapToCSV(HashMap<String, HashMap<Integer, Integer>> map, String fileName, String firstLine)
+            throws FileNotFoundException, UnsupportedEncodingException {
+
         PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-        
+
         writer.println(firstLine);
-          
+
         //loop over brands
-        for(Entry en : map.entrySet()){
+        for (Entry en : map.entrySet()) {
             //loop over words
-            for(Entry e : map.get(en.getKey()).entrySet()){
+            for (Entry e : map.get(en.getKey()).entrySet()) {
                 writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
             }
         }
-        
+
         writer.close();
         System.out.println("csv file made, please put it next to html file and run this");
     }
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 6fa6d5f..da4e0f9 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -137,7 +137,6 @@ public class FarmShell {
                 break;
             case timezone:
                 getAnalyzor().timezone(params[0]);
-                break;
             case disco:
                 getAnalyzor().disco(params[0]);
                 break;
@@ -147,11 +146,32 @@ public class FarmShell {
             case newsspread:
                 getAnalyzor().newsSpread(params[0]);
                 break;
-            case categorize:
-                getAnalyzor().categorize(params[0]);
-                break;
             case getBrands:
-                getAnalyzor().getBrands();
+                String trimmed = params[0].trim();
+                String bool = trimmed;
+                String query = null;
+
+                int index = trimmed.indexOf(" ");
+
+                if (index > -1) {
+                    bool = trimmed.substring(0, index);
+                    query = trimmed.substring(index + 1, trimmed.length());
+                }
+
+                boolean reset = false;
+                if (bool.equals("true")) {
+                    reset = true;
+                } else if (bool.equals("false")) {
+                    reset = false;
+                } else {
+                    throw new IllegalArgumentException("getBrands: expected boolean, got " + params[0]);
+                }
+
+                if (query != null) {
+                    getAnalyzor().getBrands(query, reset);
+                } else {
+                    getAnalyzor().getBrands("", reset);
+                }
                 break;
             case help:
                 for (String line : HELP) {
@@ -182,12 +202,11 @@ public class FarmShell {
         filterbots("marks all users as bot or not", 1),
         sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"),
         wordcloud("makes a csv for a wordcloud of the text of the tweets", 1),
-        getBrands("fills the database with the brands of a tweet"),
-        timezone("makes a csv ", 1),
+        getBrands("fills the database with the brands of a tweet, arg: bool indicating whether to reset mentionsbrand (optional arg: tweet selection query", 1),
+        timezone("makes a map per brand for the users", 1),
         disco("makes a outputfile for disco", 1),
         posneg("makes a csv for a histogram for positive or negative tweets", 1),
         newsspread("makes a csv for disco to show a news spread process", 1),
-        categorize("categorizes words in a csv as defined in categories.txt", 1),
         exit("Returns to shell"),
         help("Get help");
author	s123188 <s123188@S123188.campus.tue.nl>	2014-06-11 12:43:54 +0200
committer	s123188 <s123188@S123188.campus.tue.nl>	2014-06-11 12:43:54 +0200
commit	36e2bf88e62ebb2865c0744a9d8c85bd372fbb69 (patch)
tree	5a9de093a142b2a78f81c64b227c000510bcb0b1
parent	a45b8b54685e565eda2a6f81eee10c39974f1175 (diff)
parent	e14c671f3cbb56a08d765bd992e4cf774a0d1353 (diff)
download	Goldfarmer-36e2bf88e62ebb2865c0744a9d8c85bd372fbb69.tar.gz