From e051abbfdbf7ff721bf1318bf0b5939741b1f792 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Mon, 26 May 2014 11:36:25 +0200
Subject: FKING CRLF

---
 nbproject/configs/such_database.properties |   2 +-
 nbproject/project.properties               | 162 +++----
 src/main/Analyzor.java                     | 690 ++++++++++++++---------------
 3 files changed, 427 insertions(+), 427 deletions(-)

diff --git a/nbproject/configs/such_database.properties b/nbproject/configs/such_database.properties
index bba41ec..9dffee6 100644
--- a/nbproject/configs/such_database.properties
+++ b/nbproject/configs/such_database.properties
@@ -1 +1 @@
-$label=such database
+$label=such database
diff --git a/nbproject/project.properties b/nbproject/project.properties
index b262ab6..ab8ae05 100644
--- a/nbproject/project.properties
+++ b/nbproject/project.properties
@@ -1,81 +1,81 @@
-annotation.processing.enabled=true
-annotation.processing.enabled.in.editor=false
-annotation.processing.processors.list=
-annotation.processing.run.all.processors=true
-annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
-application.title=Goldfarmer
-application.vendor=maurice
-build.classes.dir=${build.dir}/classes
-build.classes.excludes=**/*.java,**/*.form
-# This directory is removed when the project is cleaned:
-build.dir=build
-build.generated.dir=${build.dir}/generated
-build.generated.sources.dir=${build.dir}/generated-sources
-# Only compile against the classpath explicitly listed here:
-build.sysclasspath=ignore
-build.test.classes.dir=${build.dir}/test/classes
-build.test.results.dir=${build.dir}/test/results
-# Uncomment to specify the preferred debugger connection transport:
-#debug.transport=dt_socket
-debug.classpath=\
-    ${run.classpath}
-debug.test.classpath=\
-    ${run.test.classpath}
-# Files in build.classes.dir which should be excluded from distribution jar
-dist.archive.excludes=
-# This directory is removed when the project is cleaned:
-dist.dir=dist
-dist.jar=${dist.dir}/Goldfarmer.jar
-dist.javadoc.dir=${dist.dir}/javadoc
-endorsed.classpath=
-excludes=
-file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
-file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
-includes=**
-jar.compress=false
-javac.classpath=\
-    ${file.reference.joda-time-2.3.jar}:\
-    ${file.reference.postgresql-9.3-1101.jdbc41.jar}
-# Space-separated list of extra javac options
-javac.compilerargs=
-javac.deprecation=false
-javac.processorpath=\
-    ${javac.classpath}
-javac.source=1.7
-javac.target=1.7
-javac.test.classpath=\
-    ${javac.classpath}:\
-    ${build.classes.dir}:\
-    ${libs.junit_4.classpath}
-javac.test.processorpath=\
-    ${javac.test.classpath}
-javadoc.additionalparam=
-javadoc.author=false
-javadoc.encoding=${source.encoding}
-javadoc.noindex=false
-javadoc.nonavbar=false
-javadoc.notree=false
-javadoc.private=false
-javadoc.splitindex=true
-javadoc.use=true
-javadoc.version=false
-javadoc.windowtitle=
-main.class=main.Main
-manifest.file=manifest.mf
-meta.inf.dir=${src.dir}/META-INF
-mkdist.disabled=false
-platform.active=default_platform
-project.licensePath=./nbproject/licenseheader.txt
-run.classpath=\
-    ${javac.classpath}:\
-    ${build.classes.dir}
-# Space-separated list of JVM arguments used when running the project.
-# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
-# To set system properties for unit tests define test-sys-prop.name=value:
-run.jvmargs=
-run.test.classpath=\
-    ${javac.test.classpath}:\
-    ${build.test.classes.dir}
-source.encoding=UTF-8
-src.dir=src
-test.src.dir=test
+annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=Goldfarmer
+application.vendor=maurice
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+    ${run.classpath}
+debug.test.classpath=\
+    ${run.test.classpath}
+# Files in build.classes.dir which should be excluded from distribution jar
+dist.archive.excludes=
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/Goldfarmer.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+    ${file.reference.joda-time-2.3.jar}:\
+    ${file.reference.postgresql-9.3-1101.jdbc41.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+    ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+    ${javac.classpath}:\
+    ${build.classes.dir}:\
+    ${libs.junit_4.classpath}
+javac.test.processorpath=\
+    ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=main.Main
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+project.licensePath=./nbproject/licenseheader.txt
+run.classpath=\
+    ${javac.classpath}:\
+    ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+    ${javac.test.classpath}:\
+    ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=src
+test.src.dir=test
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 0c3ede3..9c98a9d 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -1,345 +1,345 @@
-package main;
-
-import analysis.BrandChecker;
-import database.NamedPreparedStatement;
-import database.QueryUtils;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.UnsupportedEncodingException;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.List;
-import java.util.HashMap;
-import java.util.Map.Entry;
-import java.util.Scanner;
-
-/**
- * The sentiment analysis class that rates tweets based on a unigram and bigram
- * set of weights.
- */
-public class Analyzor {
-
-    /**
-     * The map that matches single words to their weights.
-     */
-    private final HashMap<String, Double> unimap = new HashMap();
-
-    /**
-     * The map that matches word pairs to their weights.
-     */
-    private final HashMap<String, Double> bimap = new HashMap();
-
-    /**
-     * The results of a query, maybe return from query().
-     */
-    private ResultSet data;
-
-    /**
-     * The persistent connection to the database.
-     */
-    private final Connection connection;
-
-    /**
-     * @param connection An open connection to the database.
-     */
-    public Analyzor(Connection connection) {
-        this.connection = connection;
-    }
-
-    /**
-     * Read the unigram and bigram lexica.
-     *
-     * @throws FileNotFoundException
-     */
-    public void readLexicon() throws FileNotFoundException {
-        if (!unimap.isEmpty()) {
-            // data is already read.
-            return;
-        }
-        System.err.println("Trying to read lexicons...");
-        // A unigram is in the format (WS = whitespace):
-        // word <WS> rating <WS> ??? <WS> ??
-        // A bigram has an two WS-separated words instead of one.
-        try (Scanner uniScanner = new Scanner(new File("unigrams-pmilexicon.txt"));
-                Scanner biScanner = new Scanner(new File("bigrams-pmilexicon.txt"));) {
-            //Fill the map of unigrams
-            int lineno = 1;
-            while (uniScanner.hasNext()) {
-
-                String words = uniScanner.next();
-                Double d = Double.valueOf(uniScanner.next());
-                unimap.put(words.toLowerCase(), d);
-                if (uniScanner.hasNextLine()) {
-                    uniScanner.nextLine();
-                }
-                lineno++;
-
-            }
-
-            //fill the map of bigrams
-            while (biScanner.hasNext()) {
-                String words = biScanner.next() + " " + biScanner.next();
-                bimap.put(words.toLowerCase(), Double.valueOf(biScanner.next()));
-                if (biScanner.hasNextLine()) {
-                    biScanner.nextLine();
-                }
-            }
-        }
-        System.err.println("Lexicons are read.");
-    }
-
-    /**
-     * Executes a query that the analyzer can analyze.
-     *
-     * @param query The query string to execute.
-     * @throws SQLException When database connection isn't available.
-     */
-    public void query(String query) throws SQLException {
-        PreparedStatement statement;
-        //make a connection to the database and execute the query
-        statement = connection.prepareStatement(query);
-        data = statement.executeQuery();
-    }
-
-    /**
-     * Run a sentiment analysis and fill the database with the output.
-     *
-     * @param query The sql text for the query.
-     * @throws SQLException
-     * @throws IOException
-     */
-    public void sentimentAnalysis(String query) throws SQLException, IOException {
-        query(query);
-
-        //read the lexicons
-        readLexicon();
-
-        //go to the start of te dataset
-        if (data == null) {
-            System.err.println("data is empty, try querying first");
-            return;
-        }
-
-        Double value;
-        String text;
-
-        //for all tuples
-        while (data.next()) {
-            //get the text
-            text = data.getString("text");
-            text = splitPunctToWords(text);
-            // test is the tweet text you are going to analyze
-            String[] words = text.split("\\s+"); // text splitted into separate words
-            double positiverate = 0; // positive rating
-
-            // Rate the text with unigrams
-            for (String word : words) {
-                value = unimap.get(word);
-                if (value != null) {
-                    positiverate += unimap.get(word);
-                }
-            }
-            // Rate the text with bigrams
-            for (int i = 0; i < words.length - 1; i++) {
-                String pair = words[i] + " " + words[i + 1];
-                value = bimap.get(pair);
-                if (value != null) {
-                    positiverate += bimap.get(pair);
-                }
-            }
-            //insert the rating into the database
-            NamedPreparedStatement m_insertRating;
-            m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
-            QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
-            m_insertRating.executeUpdate();
-            //don't print the rate
-            //System.out.println(text + ": " + (int) (positiverate * 10));
-        }
-    }
-
-    /**
-     * Make a wordcloud of the results of some query.
-     *
-     * @param query The sql text for a query.
-     * @throws SQLException
-     * @throws FileNotFoundException
-     * @throws UnsupportedEncodingException
-     */
-    public void makeWordCloud(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
-
-        query(query);
-        //go to the start of the ResultSet data
-        if (data == null) {
-            System.err.println("data is empty, try querying first");
-            return;
-        }
-
-        String text;
-        String brand;
-        String[] words;
-        HashMap<String,HashMap<String, Integer>> wordcloud = new HashMap<>();
-
-        while (data.next()) {
-            //get brand
-            brand=data.getString("brand");
-            //make hashmap for each brand
-            if(!wordcloud.containsKey(brand)){
-                wordcloud.put(brand, new HashMap<String,Integer>());
-            }
-            //get the text
-            text = data.getString("text");
-            //remove punctuation, convert to lowercase and split on words
-            text = removePunct(text);
-            text = text.toLowerCase();
-            words = text.split("\\s+");
-            //for all words
-            for (String word : words) {
-                //if it is empty, a space or a stripe, skip it
-                if(word.equals("") || word.equals(" ") || word.equals("-")){
-                    continue;
-                }
-                //if the word is already in the map, increment the amount
-                if(wordcloud.get(brand).containsKey(word)){
-                    wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
-                }
-                //if the word is not already in the map, make an entry with amount = 1
-                else{
-                    wordcloud.get(brand).put(word, 1);
-                }
-            }
-        }
-        //print the words and their frequency in a csv file
-        mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
-    }
-
-    //generate csv for disco from the query
-    public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
-        //do the query
-        query(query);
-        PrintWriter writer = new PrintWriter("output.csv", "UTF-8");
-        //print the first row
-        for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
-            writer.print(data.getMetaData().getColumnLabel(i) + ", ");
-        }
-        writer.println(data.getMetaData().getColumnLabel(data.getMetaData().getColumnCount()));
-        //print the values
-        while (data.next()) {
-            for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
-                if (data.getObject(i) == null) {
-                    writer.print(", ");
-                } else {
-                    writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ") + ", ");
-                }
-            }
-            if (data.getObject(data.getMetaData().getColumnCount()) == null) {
-                writer.println("0");
-            } else {
-                writer.println(data.getObject(data.getMetaData().getColumnCount()).toString().replace(",", " "));
-            }
-        }
-        writer.close();
-    }
-
-    public void getBrands() throws SQLException {
-        PreparedStatement statement;
-        //make a connection to the database and execute the query
-        statement = connection.prepareStatement("delete from mentionsbrand");
-        statement.executeUpdate();
-        BrandChecker checker = new BrandChecker("brandonlyrules.txt");
-        query("select * from tweet");
-        NamedPreparedStatement m_insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
-        while (data.next()) {
-            List<String> brands = checker.getBrands(data.getString("text"));
-            if (brands.isEmpty()) {
-                QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), "no");
-                m_insertBrand.executeUpdate();
-            } else {
-                for (String brand : brands) {
-                    QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), brand);
-                    m_insertBrand.executeUpdate();
-                }
-            }
-        }
-    }
-
-    //gets the amount of users that tweet about a brand in a timezone
-    //makes a csv file timezone, brand, amount
-    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
-        query(query);
-        //hashmap timezone, brand, amount
-        HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
-        String timezone;
-        String brand;
-        
-        while(data.next()){
-            timezone = data.getString("timezone");
-            brand = data.getString("brand");
-            //if the timezone is already in the map
-            if(timeMap.containsKey(timezone)){
-                //if the brand for that timezone is already in the map
-                if(timeMap.get(timezone).containsKey(brand)){
-                    //increment the amount
-                    timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
-                }
-                //if the brand for that timezone is not yet in the map
-                else{
-                    //make a new entry for that brand with amount = 1
-                    timeMap.get(timezone).put(brand, 1);
-                }
-            }
-            //if the timezone is not yet in the map
-            else{
-                //make a new hashmap for this map and fill it with the brand and the amount
-                timeMap.put(timezone, new HashMap<String, Integer>());
-                timeMap.get(timezone).put(brand, 1);
-            }
-        }
-        //make the CSV out of the map
-        mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
-    }
-    
-    //replaces punctuation so it will be splitted
-    //also removes urls
-    private String splitPunctToWords(String text) {
-        text = text.replaceAll("https?://\\S*", "");
-        text = text.replaceAll("[!?):;\"']", " $0");
-        text = text.replaceAll("[.,-](\\s|$)", " $0");
-        text = text.replaceAll("\\s[(\"']", "$0 ");
-        return text;
-    }
-
-    //removes punctuation
-    //also removes urls
-    private String removePunct(String text) {
-        text = text.replaceAll("https?://\\S*", " ");
-        text = text.replaceAll("@\\S*", " ");
-        text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
-        return text;
-    }
-    
-    //prints a hashmap into a csv for a html application
-    //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
-    //only for String, String, Integer
-    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) 
-                throws FileNotFoundException, UnsupportedEncodingException{
-        
-        PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-        
-        writer.println(firstLine);
-          
-        //loop over brands
-        for(Entry en : map.entrySet()){
-            //loop over words
-            for(Entry e : map.get(en.getKey()).entrySet()){
-                writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
-            }
-        }
-        
-        writer.close();
-        System.out.println("csv file made, please put it next to html file and run this");
-    }
-}
+package main;
+
+import analysis.BrandChecker;
+import database.NamedPreparedStatement;
+import database.QueryUtils;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+/**
+ * The sentiment analysis class that rates tweets based on a unigram and bigram
+ * set of weights.
+ */
+public class Analyzor {
+
+    /**
+     * The map that matches single words to their weights.
+     */
+    private final HashMap<String, Double> unimap = new HashMap();
+
+    /**
+     * The map that matches word pairs to their weights.
+     */
+    private final HashMap<String, Double> bimap = new HashMap();
+
+    /**
+     * The results of a query, maybe return from query().
+     */
+    private ResultSet data;
+
+    /**
+     * The persistent connection to the database.
+     */
+    private final Connection connection;
+
+    /**
+     * @param connection An open connection to the database.
+     */
+    public Analyzor(Connection connection) {
+        this.connection = connection;
+    }
+
+    /**
+     * Read the unigram and bigram lexica.
+     *
+     * @throws FileNotFoundException
+     */
+    public void readLexicon() throws FileNotFoundException {
+        if (!unimap.isEmpty()) {
+            // data is already read.
+            return;
+        }
+        System.err.println("Trying to read lexicons...");
+        // A unigram is in the format (WS = whitespace):
+        // word <WS> rating <WS> ??? <WS> ??
+        // A bigram has an two WS-separated words instead of one.
+        try (Scanner uniScanner = new Scanner(new File("unigrams-pmilexicon.txt"));
+                Scanner biScanner = new Scanner(new File("bigrams-pmilexicon.txt"));) {
+            //Fill the map of unigrams
+            int lineno = 1;
+            while (uniScanner.hasNext()) {
+
+                String words = uniScanner.next();
+                Double d = Double.valueOf(uniScanner.next());
+                unimap.put(words.toLowerCase(), d);
+                if (uniScanner.hasNextLine()) {
+                    uniScanner.nextLine();
+                }
+                lineno++;
+
+            }
+
+            //fill the map of bigrams
+            while (biScanner.hasNext()) {
+                String words = biScanner.next() + " " + biScanner.next();
+                bimap.put(words.toLowerCase(), Double.valueOf(biScanner.next()));
+                if (biScanner.hasNextLine()) {
+                    biScanner.nextLine();
+                }
+            }
+        }
+        System.err.println("Lexicons are read.");
+    }
+
+    /**
+     * Executes a query that the analyzer can analyze.
+     *
+     * @param query The query string to execute.
+     * @throws SQLException When database connection isn't available.
+     */
+    public void query(String query) throws SQLException {
+        PreparedStatement statement;
+        //make a connection to the database and execute the query
+        statement = connection.prepareStatement(query);
+        data = statement.executeQuery();
+    }
+
+    /**
+     * Run a sentiment analysis and fill the database with the output.
+     *
+     * @param query The sql text for the query.
+     * @throws SQLException
+     * @throws IOException
+     */
+    public void sentimentAnalysis(String query) throws SQLException, IOException {
+        query(query);
+
+        //read the lexicons
+        readLexicon();
+
+        //go to the start of te dataset
+        if (data == null) {
+            System.err.println("data is empty, try querying first");
+            return;
+        }
+
+        Double value;
+        String text;
+
+        //for all tuples
+        while (data.next()) {
+            //get the text
+            text = data.getString("text");
+            text = splitPunctToWords(text);
+            // test is the tweet text you are going to analyze
+            String[] words = text.split("\\s+"); // text splitted into separate words
+            double positiverate = 0; // positive rating
+
+            // Rate the text with unigrams
+            for (String word : words) {
+                value = unimap.get(word);
+                if (value != null) {
+                    positiverate += unimap.get(word);
+                }
+            }
+            // Rate the text with bigrams
+            for (int i = 0; i < words.length - 1; i++) {
+                String pair = words[i] + " " + words[i + 1];
+                value = bimap.get(pair);
+                if (value != null) {
+                    positiverate += bimap.get(pair);
+                }
+            }
+            //insert the rating into the database
+            NamedPreparedStatement m_insertRating;
+            m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
+            QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
+            m_insertRating.executeUpdate();
+            //don't print the rate
+            //System.out.println(text + ": " + (int) (positiverate * 10));
+        }
+    }
+
+    /**
+     * Make a wordcloud of the results of some query.
+     *
+     * @param query The sql text for a query.
+     * @throws SQLException
+     * @throws FileNotFoundException
+     * @throws UnsupportedEncodingException
+     */
+    public void makeWordCloud(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
+
+        query(query);
+        //go to the start of the ResultSet data
+        if (data == null) {
+            System.err.println("data is empty, try querying first");
+            return;
+        }
+
+        String text;
+        String brand;
+        String[] words;
+        HashMap<String,HashMap<String, Integer>> wordcloud = new HashMap<>();
+
+        while (data.next()) {
+            //get brand
+            brand=data.getString("brand");
+            //make hashmap for each brand
+            if(!wordcloud.containsKey(brand)){
+                wordcloud.put(brand, new HashMap<String,Integer>());
+            }
+            //get the text
+            text = data.getString("text");
+            //remove punctuation, convert to lowercase and split on words
+            text = removePunct(text);
+            text = text.toLowerCase();
+            words = text.split("\\s+");
+            //for all words
+            for (String word : words) {
+                //if it is empty, a space or a stripe, skip it
+                if(word.equals("") || word.equals(" ") || word.equals("-")){
+                    continue;
+                }
+                //if the word is already in the map, increment the amount
+                if(wordcloud.get(brand).containsKey(word)){
+                    wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
+                }
+                //if the word is not already in the map, make an entry with amount = 1
+                else{
+                    wordcloud.get(brand).put(word, 1);
+                }
+            }
+        }
+        //print the words and their frequency in a csv file
+        mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
+    }
+
+    //generate csv for disco from the query
+    public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
+        //do the query
+        query(query);
+        PrintWriter writer = new PrintWriter("output.csv", "UTF-8");
+        //print the first row
+        for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
+            writer.print(data.getMetaData().getColumnLabel(i) + ", ");
+        }
+        writer.println(data.getMetaData().getColumnLabel(data.getMetaData().getColumnCount()));
+        //print the values
+        while (data.next()) {
+            for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
+                if (data.getObject(i) == null) {
+                    writer.print(", ");
+                } else {
+                    writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ") + ", ");
+                }
+            }
+            if (data.getObject(data.getMetaData().getColumnCount()) == null) {
+                writer.println("0");
+            } else {
+                writer.println(data.getObject(data.getMetaData().getColumnCount()).toString().replace(",", " "));
+            }
+        }
+        writer.close();
+    }
+
+    public void getBrands() throws SQLException {
+        PreparedStatement statement;
+        //make a connection to the database and execute the query
+        statement = connection.prepareStatement("delete from mentionsbrand");
+        statement.executeUpdate();
+        BrandChecker checker = new BrandChecker("brandonlyrules.txt");
+        query("select * from tweet");
+        NamedPreparedStatement m_insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
+        while (data.next()) {
+            List<String> brands = checker.getBrands(data.getString("text"));
+            if (brands.isEmpty()) {
+                QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), "no");
+                m_insertBrand.executeUpdate();
+            } else {
+                for (String brand : brands) {
+                    QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), brand);
+                    m_insertBrand.executeUpdate();
+                }
+            }
+        }
+    }
+
+    //gets the amount of users that tweet about a brand in a timezone
+    //makes a csv file timezone, brand, amount
+    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+        query(query);
+        //hashmap timezone, brand, amount
+        HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
+        String timezone;
+        String brand;
+        
+        while(data.next()){
+            timezone = data.getString("timezone");
+            brand = data.getString("brand");
+            //if the timezone is already in the map
+            if(timeMap.containsKey(timezone)){
+                //if the brand for that timezone is already in the map
+                if(timeMap.get(timezone).containsKey(brand)){
+                    //increment the amount
+                    timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
+                }
+                //if the brand for that timezone is not yet in the map
+                else{
+                    //make a new entry for that brand with amount = 1
+                    timeMap.get(timezone).put(brand, 1);
+                }
+            }
+            //if the timezone is not yet in the map
+            else{
+                //make a new hashmap for this map and fill it with the brand and the amount
+                timeMap.put(timezone, new HashMap<String, Integer>());
+                timeMap.get(timezone).put(brand, 1);
+            }
+        }
+        //make the CSV out of the map
+        mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
+    }
+    
+    //replaces punctuation so it will be splitted
+    //also removes urls
+    private String splitPunctToWords(String text) {
+        text = text.replaceAll("https?://\\S*", "");
+        text = text.replaceAll("[!?):;\"']", " $0");
+        text = text.replaceAll("[.,-](\\s|$)", " $0");
+        text = text.replaceAll("\\s[(\"']", "$0 ");
+        return text;
+    }
+
+    //removes punctuation
+    //also removes urls
+    private String removePunct(String text) {
+        text = text.replaceAll("https?://\\S*", " ");
+        text = text.replaceAll("@\\S*", " ");
+        text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
+        return text;
+    }
+    
+    //prints a hashmap into a csv for a html application
+    //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
+    //only for String, String, Integer
+    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) 
+                throws FileNotFoundException, UnsupportedEncodingException{
+        
+        PrintWriter writer = new PrintWriter(fileName, "UTF-8");
+        
+        writer.println(firstLine);
+          
+        //loop over brands
+        for(Entry en : map.entrySet()){
+            //loop over words
+            for(Entry e : map.get(en.getKey()).entrySet()){
+                writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
+            }
+        }
+        
+        writer.close();
+        System.out.println("csv file made, please put it next to html file and run this");
+    }
+}
-- 
cgit v1.2.1


From 7df2ae452a984cb12986b33034557476cb4a1536 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Mon, 26 May 2014 12:02:08 +0200
Subject: Optimize sentiment analysis

Do not create a new prepared statement every time. Use an optimized
UPDATE query. Drop requirement for supplying a query.
---
 src/database/NamedPreparedStatement.java | 11 +++++++++++
 src/main/Analyzor.java                   | 33 +++++++++++++++++++-------------
 src/main/FarmShell.java                  | 11 ++++++++---
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java
index ebb775b..9305d32 100644
--- a/src/database/NamedPreparedStatement.java
+++ b/src/database/NamedPreparedStatement.java
@@ -2,6 +2,7 @@ package database;
 
 import java.sql.Connection;
 import java.sql.PreparedStatement;
+import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Timestamp;
 import java.sql.Types;
@@ -104,4 +105,14 @@ public class NamedPreparedStatement {
             throw ex;
         }
     }
+
+    public ResultSet executeQuery() throws SQLException {
+        try {
+            return getStmt().executeQuery();
+        } catch (SQLException ex) {
+            System.err.println("Query error: " + ex.getMessage());
+            System.err.println(stmt);
+            throw ex;
+        }
+    }
 }
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 9c98a9d..5385a79 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -113,24 +113,33 @@ public class Analyzor {
      * @throws IOException
      */
     public void sentimentAnalysis(String query) throws SQLException, IOException {
-        query(query);
+        NamedPreparedStatement tweetBrandStmt, updateRating;
 
         //read the lexicons
         readLexicon();
 
-        //go to the start of te dataset
-        if (data == null) {
-            System.err.println("data is empty, try querying first");
-            return;
+        // if you ever need to re-apply rating, use something like:
+        // UPDATE mentionsbrand SET rating = NULL WHERE ...
+        if (query.isEmpty()) {
+            query = "SELECT t.tweetid, t.text, b.brand FROM tweet t "
+                    + "JOIN mentionsbrand b USING (tweetid) "
+                    + "WHERE b.rating IS NULL";
         }
+        tweetBrandStmt = new NamedPreparedStatement(connection,
+                query);
+        ResultSet tweetBrandResults = tweetBrandStmt.executeQuery();
+
+        updateRating = new NamedPreparedStatement(connection,
+                "UPDATE mentionsbrand SET rating = :rating "
+                + "WHERE tweetid = :tweetid AND brand = :brand");
 
         Double value;
         String text;
 
         //for all tuples
-        while (data.next()) {
+        while (tweetBrandResults.next()) {
             //get the text
-            text = data.getString("text");
+            text = tweetBrandResults.getString("text");
             text = splitPunctToWords(text);
             // test is the tweet text you are going to analyze
             String[] words = text.split("\\s+"); // text splitted into separate words
@@ -152,12 +161,10 @@ public class Analyzor {
                 }
             }
             //insert the rating into the database
-            NamedPreparedStatement m_insertRating;
-            m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
-            QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
-            m_insertRating.executeUpdate();
-            //don't print the rate
-            //System.out.println(text + ": " + (int) (positiverate * 10));
+            updateRating.setLong("tweetid", tweetBrandResults.getLong("tweetid"));
+            updateRating.setString("brand", tweetBrandResults.getString("brand"));
+            updateRating.setInt("rating", (int) (positiverate * 10));
+            updateRating.executeUpdate();
         }
     }
 
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 1266fd3..ed1a0ff 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -125,7 +125,12 @@ public class FarmShell {
                 System.out.println("not yet implemented");
                 break;
             case sentiment:
-                getAnalyzor().sentimentAnalysis(params[0]);
+                // if there is no query, update all unrated items.
+                if (params.length > 0) {
+                    getAnalyzor().sentimentAnalysis(params[0]);
+                } else {
+                    getAnalyzor().sentimentAnalysis("");
+                }
                 break;
             case wordcloud:
                 getAnalyzor().makeWordCloud(params[0]);
@@ -163,10 +168,10 @@ public class FarmShell {
     enum Command {
 
         filterbots("marks all users as bot or not", 1),
-        sentiment("analyzes all tweets on positivity (about a brand)", 1),
+        sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"),
         wordcloud("makes a wordcloud of the text of the tweets", 1),
         getBrands("fills the database with the brands of a tweet"),
-        disco("makes a outputfile for disco",1),
+        disco("makes a outputfile for disco", 1),
         exit("Returns to shell"),
         help("Get help");
 
-- 
cgit v1.2.1


From 00baf4ffc86dac7b723c4fc3d2c963d1fa84729b Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Mon, 26 May 2014 12:03:33 +0200
Subject: Formatting, drop useless license header

Sam Hocevar doesn't know us nor does he have copyright about this
work...
---
 src/analysis/BrandChecker.java      | 32 ++++++---------------
 src/database/QueryUtils.java        |  8 +++---
 src/main/Analyzor.java              | 57 ++++++++++++++++++-------------------
 test/analysis/BrandCheckerTest.java |  4 +--
 4 files changed, 42 insertions(+), 59 deletions(-)

diff --git a/src/analysis/BrandChecker.java b/src/analysis/BrandChecker.java
index 10e22b4..ee9c7b4 100644
--- a/src/analysis/BrandChecker.java
+++ b/src/analysis/BrandChecker.java
@@ -1,17 +1,3 @@
-/*
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE Version 2, December 2004
- *
- * Copyright (C) 2004 Sam Hocevar
- *
- * Everyone is permitted to copy and distribute verbatim or modified copies 
- * of this license document, and changing it is allowed as long as the name is 
- * changed.
- *
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, 
- * DISTRIBUTION AND MODIFICATION
- *
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
- */
 package analysis;
 
 import java.io.FileInputStream;
@@ -93,7 +79,7 @@ public class BrandChecker {
         if (line.isEmpty()) {
             return;
         }
-        
+
         if (!line.contains("-")) {
             System.err.println("illformatted rule: " + line + ", missing -");
         } else {
@@ -110,13 +96,13 @@ public class BrandChecker {
 
             // Read the <name> line.
             String name = parts[0].trim();
-            
+
             // Read the positive words.
-            String positive = parts[1].replaceAll(" ","");            
+            String positive = parts[1].replaceAll(" ", "");
             String[] sequence = positive.split(",");
-            
+
             if (parts.length == 3) {
-                String negative = parts[2].replaceAll(" ", "");                
+                String negative = parts[2].replaceAll(" ", "");
                 String[] blacklist = negative.split(",");
                 ruleset.add(new BrandRule(name, sequence, blacklist));
             } else {
@@ -139,7 +125,7 @@ public class BrandChecker {
          * The words that should be in the text.
          */
         private final HashMap<String, Boolean> names;
-        
+
         /**
          * A blacklist of words that are not interesting.
          */
@@ -164,7 +150,7 @@ public class BrandChecker {
             } else {
                 this.blacklist = null;
             }
-            
+
             for (String name : names) {
                 this.names.put(name, Boolean.FALSE);
             }
@@ -177,7 +163,7 @@ public class BrandChecker {
          */
         public boolean analyze(String[] words) {
             reset();
-            
+
             int found = 0;
 
             for (String word : words) {
@@ -201,7 +187,7 @@ public class BrandChecker {
         public String getBrand() {
             return brand;
         }
-        
+
         private void reset() {
             for (String name : this.names.keySet()) {
                 this.names.put(name, Boolean.FALSE);
diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java
index 2cc6fd6..b95903f 100644
--- a/src/database/QueryUtils.java
+++ b/src/database/QueryUtils.java
@@ -1,7 +1,6 @@
 package database;
 
 import java.sql.SQLException;
-import java.util.Locale;
 
 /**
  * Utilities to create queries.
@@ -9,8 +8,9 @@ import java.util.Locale;
  * @author Maurice Laveaux
  */
 public class QueryUtils {
-    public final static String insertRating 
-            = buildQuery("mentionsbrand", new String[]{"tweetid","brand"},"tweetid","brand", "rating");
+
+    public final static String insertRating
+            = buildQuery("mentionsbrand", new String[]{"tweetid", "brand"}, "tweetid", "brand", "rating");
     public final static String insertProfile
             = buildQuery("twitteruser", new String[]{"userid"},
             "userid", "displayname", "timezone", "tweetcount", "followercount",
@@ -96,7 +96,7 @@ public class QueryUtils {
         statement.setLong("tweetid", tweetid);
         statement.setInt("rating", rating);
         statement.setString("brand", brand);
-        
+
     }
 
     public static void setInsertBrandParams(NamedPreparedStatement brandStmt,
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 5385a79..ffd9a5b 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -188,14 +188,14 @@ public class Analyzor {
         String text;
         String brand;
         String[] words;
-        HashMap<String,HashMap<String, Integer>> wordcloud = new HashMap<>();
+        HashMap<String, HashMap<String, Integer>> wordcloud = new HashMap<>();
 
         while (data.next()) {
             //get brand
-            brand=data.getString("brand");
+            brand = data.getString("brand");
             //make hashmap for each brand
-            if(!wordcloud.containsKey(brand)){
-                wordcloud.put(brand, new HashMap<String,Integer>());
+            if (!wordcloud.containsKey(brand)) {
+                wordcloud.put(brand, new HashMap<String, Integer>());
             }
             //get the text
             text = data.getString("text");
@@ -206,15 +206,14 @@ public class Analyzor {
             //for all words
             for (String word : words) {
                 //if it is empty, a space or a stripe, skip it
-                if(word.equals("") || word.equals(" ") || word.equals("-")){
+                if (word.equals("") || word.equals(" ") || word.equals("-")) {
                     continue;
                 }
                 //if the word is already in the map, increment the amount
-                if(wordcloud.get(brand).containsKey(word)){
+                if (wordcloud.get(brand).containsKey(word)) {
                     wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
-                }
-                //if the word is not already in the map, make an entry with amount = 1
-                else{
+                } //if the word is not already in the map, make an entry with amount = 1
+                else {
                     wordcloud.get(brand).put(word, 1);
                 }
             }
@@ -275,31 +274,29 @@ public class Analyzor {
 
     //gets the amount of users that tweet about a brand in a timezone
     //makes a csv file timezone, brand, amount
-    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException{
+    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
         query(query);
         //hashmap timezone, brand, amount
         HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
         String timezone;
         String brand;
-        
-        while(data.next()){
+
+        while (data.next()) {
             timezone = data.getString("timezone");
             brand = data.getString("brand");
             //if the timezone is already in the map
-            if(timeMap.containsKey(timezone)){
+            if (timeMap.containsKey(timezone)) {
                 //if the brand for that timezone is already in the map
-                if(timeMap.get(timezone).containsKey(brand)){
+                if (timeMap.get(timezone).containsKey(brand)) {
                     //increment the amount
                     timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
-                }
-                //if the brand for that timezone is not yet in the map
-                else{
+                } //if the brand for that timezone is not yet in the map
+                else {
                     //make a new entry for that brand with amount = 1
                     timeMap.get(timezone).put(brand, 1);
                 }
-            }
-            //if the timezone is not yet in the map
-            else{
+            } //if the timezone is not yet in the map
+            else {
                 //make a new hashmap for this map and fill it with the brand and the amount
                 timeMap.put(timezone, new HashMap<String, Integer>());
                 timeMap.get(timezone).put(brand, 1);
@@ -308,7 +305,7 @@ public class Analyzor {
         //make the CSV out of the map
         mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
     }
-    
+
     //replaces punctuation so it will be splitted
     //also removes urls
     private String splitPunctToWords(String text) {
@@ -327,25 +324,25 @@ public class Analyzor {
         text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
         return text;
     }
-    
+
     //prints a hashmap into a csv for a html application
     //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
     //only for String, String, Integer
-    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine) 
-                throws FileNotFoundException, UnsupportedEncodingException{
-        
+    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
+            throws FileNotFoundException, UnsupportedEncodingException {
+
         PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-        
+
         writer.println(firstLine);
-          
+
         //loop over brands
-        for(Entry en : map.entrySet()){
+        for (Entry en : map.entrySet()) {
             //loop over words
-            for(Entry e : map.get(en.getKey()).entrySet()){
+            for (Entry e : map.get(en.getKey()).entrySet()) {
                 writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
             }
         }
-        
+
         writer.close();
         System.out.println("csv file made, please put it next to html file and run this");
     }
diff --git a/test/analysis/BrandCheckerTest.java b/test/analysis/BrandCheckerTest.java
index 23d8445..f55035b 100644
--- a/test/analysis/BrandCheckerTest.java
+++ b/test/analysis/BrandCheckerTest.java
@@ -82,10 +82,10 @@ public class BrandCheckerTest {
     public void testBullshit() {
         doTest("This applepie is delicious", new String[]{});
     }
-    
+
     @Test
     public void multipleBrands() {
-        doTest("This tweet contains both iphone 4s,galaxy s5 and iphone", new String[]{"iphone 4s","galaxy s5"});
+        doTest("This tweet contains both iphone 4s,galaxy s5 and iphone", new String[]{"iphone 4s", "galaxy s5"});
     }
 
 }
-- 
cgit v1.2.1


From 9969b6a6cbae322680cfcbc27df3d37b0954f00a Mon Sep 17 00:00:00 2001
From: s123188 <s123188@S123188.campus.tue.nl>
Date: Tue, 27 May 2014 17:55:08 +0200
Subject: changed Analyzor.timezone(String query) so that it adds a legenda
 "timezone" so that the visualizer can display a legends

---
 src/main/Analyzor.java | 710 +++++++++++++++++++++++++------------------------
 1 file changed, 361 insertions(+), 349 deletions(-)

diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index ffd9a5b..6369ece 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -1,349 +1,361 @@
-package main;
-
-import analysis.BrandChecker;
-import database.NamedPreparedStatement;
-import database.QueryUtils;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.UnsupportedEncodingException;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.List;
-import java.util.HashMap;
-import java.util.Map.Entry;
-import java.util.Scanner;
-
-/**
- * The sentiment analysis class that rates tweets based on a unigram and bigram
- * set of weights.
- */
-public class Analyzor {
-
-    /**
-     * The map that matches single words to their weights.
-     */
-    private final HashMap<String, Double> unimap = new HashMap();
-
-    /**
-     * The map that matches word pairs to their weights.
-     */
-    private final HashMap<String, Double> bimap = new HashMap();
-
-    /**
-     * The results of a query, maybe return from query().
-     */
-    private ResultSet data;
-
-    /**
-     * The persistent connection to the database.
-     */
-    private final Connection connection;
-
-    /**
-     * @param connection An open connection to the database.
-     */
-    public Analyzor(Connection connection) {
-        this.connection = connection;
-    }
-
-    /**
-     * Read the unigram and bigram lexica.
-     *
-     * @throws FileNotFoundException
-     */
-    public void readLexicon() throws FileNotFoundException {
-        if (!unimap.isEmpty()) {
-            // data is already read.
-            return;
-        }
-        System.err.println("Trying to read lexicons...");
-        // A unigram is in the format (WS = whitespace):
-        // word <WS> rating <WS> ??? <WS> ??
-        // A bigram has an two WS-separated words instead of one.
-        try (Scanner uniScanner = new Scanner(new File("unigrams-pmilexicon.txt"));
-                Scanner biScanner = new Scanner(new File("bigrams-pmilexicon.txt"));) {
-            //Fill the map of unigrams
-            int lineno = 1;
-            while (uniScanner.hasNext()) {
-
-                String words = uniScanner.next();
-                Double d = Double.valueOf(uniScanner.next());
-                unimap.put(words.toLowerCase(), d);
-                if (uniScanner.hasNextLine()) {
-                    uniScanner.nextLine();
-                }
-                lineno++;
-
-            }
-
-            //fill the map of bigrams
-            while (biScanner.hasNext()) {
-                String words = biScanner.next() + " " + biScanner.next();
-                bimap.put(words.toLowerCase(), Double.valueOf(biScanner.next()));
-                if (biScanner.hasNextLine()) {
-                    biScanner.nextLine();
-                }
-            }
-        }
-        System.err.println("Lexicons are read.");
-    }
-
-    /**
-     * Executes a query that the analyzer can analyze.
-     *
-     * @param query The query string to execute.
-     * @throws SQLException When database connection isn't available.
-     */
-    public void query(String query) throws SQLException {
-        PreparedStatement statement;
-        //make a connection to the database and execute the query
-        statement = connection.prepareStatement(query);
-        data = statement.executeQuery();
-    }
-
-    /**
-     * Run a sentiment analysis and fill the database with the output.
-     *
-     * @param query The sql text for the query.
-     * @throws SQLException
-     * @throws IOException
-     */
-    public void sentimentAnalysis(String query) throws SQLException, IOException {
-        NamedPreparedStatement tweetBrandStmt, updateRating;
-
-        //read the lexicons
-        readLexicon();
-
-        // if you ever need to re-apply rating, use something like:
-        // UPDATE mentionsbrand SET rating = NULL WHERE ...
-        if (query.isEmpty()) {
-            query = "SELECT t.tweetid, t.text, b.brand FROM tweet t "
-                    + "JOIN mentionsbrand b USING (tweetid) "
-                    + "WHERE b.rating IS NULL";
-        }
-        tweetBrandStmt = new NamedPreparedStatement(connection,
-                query);
-        ResultSet tweetBrandResults = tweetBrandStmt.executeQuery();
-
-        updateRating = new NamedPreparedStatement(connection,
-                "UPDATE mentionsbrand SET rating = :rating "
-                + "WHERE tweetid = :tweetid AND brand = :brand");
-
-        Double value;
-        String text;
-
-        //for all tuples
-        while (tweetBrandResults.next()) {
-            //get the text
-            text = tweetBrandResults.getString("text");
-            text = splitPunctToWords(text);
-            // test is the tweet text you are going to analyze
-            String[] words = text.split("\\s+"); // text splitted into separate words
-            double positiverate = 0; // positive rating
-
-            // Rate the text with unigrams
-            for (String word : words) {
-                value = unimap.get(word);
-                if (value != null) {
-                    positiverate += unimap.get(word);
-                }
-            }
-            // Rate the text with bigrams
-            for (int i = 0; i < words.length - 1; i++) {
-                String pair = words[i] + " " + words[i + 1];
-                value = bimap.get(pair);
-                if (value != null) {
-                    positiverate += bimap.get(pair);
-                }
-            }
-            //insert the rating into the database
-            updateRating.setLong("tweetid", tweetBrandResults.getLong("tweetid"));
-            updateRating.setString("brand", tweetBrandResults.getString("brand"));
-            updateRating.setInt("rating", (int) (positiverate * 10));
-            updateRating.executeUpdate();
-        }
-    }
-
-    /**
-     * Make a wordcloud of the results of some query.
-     *
-     * @param query The sql text for a query.
-     * @throws SQLException
-     * @throws FileNotFoundException
-     * @throws UnsupportedEncodingException
-     */
-    public void makeWordCloud(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
-
-        query(query);
-        //go to the start of the ResultSet data
-        if (data == null) {
-            System.err.println("data is empty, try querying first");
-            return;
-        }
-
-        String text;
-        String brand;
-        String[] words;
-        HashMap<String, HashMap<String, Integer>> wordcloud = new HashMap<>();
-
-        while (data.next()) {
-            //get brand
-            brand = data.getString("brand");
-            //make hashmap for each brand
-            if (!wordcloud.containsKey(brand)) {
-                wordcloud.put(brand, new HashMap<String, Integer>());
-            }
-            //get the text
-            text = data.getString("text");
-            //remove punctuation, convert to lowercase and split on words
-            text = removePunct(text);
-            text = text.toLowerCase();
-            words = text.split("\\s+");
-            //for all words
-            for (String word : words) {
-                //if it is empty, a space or a stripe, skip it
-                if (word.equals("") || word.equals(" ") || word.equals("-")) {
-                    continue;
-                }
-                //if the word is already in the map, increment the amount
-                if (wordcloud.get(brand).containsKey(word)) {
-                    wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
-                } //if the word is not already in the map, make an entry with amount = 1
-                else {
-                    wordcloud.get(brand).put(word, 1);
-                }
-            }
-        }
-        //print the words and their frequency in a csv file
-        mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
-    }
-
-    //generate csv for disco from the query
-    public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
-        //do the query
-        query(query);
-        PrintWriter writer = new PrintWriter("output.csv", "UTF-8");
-        //print the first row
-        for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
-            writer.print(data.getMetaData().getColumnLabel(i) + ", ");
-        }
-        writer.println(data.getMetaData().getColumnLabel(data.getMetaData().getColumnCount()));
-        //print the values
-        while (data.next()) {
-            for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
-                if (data.getObject(i) == null) {
-                    writer.print(", ");
-                } else {
-                    writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ") + ", ");
-                }
-            }
-            if (data.getObject(data.getMetaData().getColumnCount()) == null) {
-                writer.println("0");
-            } else {
-                writer.println(data.getObject(data.getMetaData().getColumnCount()).toString().replace(",", " "));
-            }
-        }
-        writer.close();
-    }
-
-    public void getBrands() throws SQLException {
-        PreparedStatement statement;
-        //make a connection to the database and execute the query
-        statement = connection.prepareStatement("delete from mentionsbrand");
-        statement.executeUpdate();
-        BrandChecker checker = new BrandChecker("brandonlyrules.txt");
-        query("select * from tweet");
-        NamedPreparedStatement m_insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
-        while (data.next()) {
-            List<String> brands = checker.getBrands(data.getString("text"));
-            if (brands.isEmpty()) {
-                QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), "no");
-                m_insertBrand.executeUpdate();
-            } else {
-                for (String brand : brands) {
-                    QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), brand);
-                    m_insertBrand.executeUpdate();
-                }
-            }
-        }
-    }
-
-    //gets the amount of users that tweet about a brand in a timezone
-    //makes a csv file timezone, brand, amount
-    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
-        query(query);
-        //hashmap timezone, brand, amount
-        HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
-        String timezone;
-        String brand;
-
-        while (data.next()) {
-            timezone = data.getString("timezone");
-            brand = data.getString("brand");
-            //if the timezone is already in the map
-            if (timeMap.containsKey(timezone)) {
-                //if the brand for that timezone is already in the map
-                if (timeMap.get(timezone).containsKey(brand)) {
-                    //increment the amount
-                    timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
-                } //if the brand for that timezone is not yet in the map
-                else {
-                    //make a new entry for that brand with amount = 1
-                    timeMap.get(timezone).put(brand, 1);
-                }
-            } //if the timezone is not yet in the map
-            else {
-                //make a new hashmap for this map and fill it with the brand and the amount
-                timeMap.put(timezone, new HashMap<String, Integer>());
-                timeMap.get(timezone).put(brand, 1);
-            }
-        }
-        //make the CSV out of the map
-        mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
-    }
-
-    //replaces punctuation so it will be splitted
-    //also removes urls
-    private String splitPunctToWords(String text) {
-        text = text.replaceAll("https?://\\S*", "");
-        text = text.replaceAll("[!?):;\"']", " $0");
-        text = text.replaceAll("[.,-](\\s|$)", " $0");
-        text = text.replaceAll("\\s[(\"']", "$0 ");
-        return text;
-    }
-
-    //removes punctuation
-    //also removes urls
-    private String removePunct(String text) {
-        text = text.replaceAll("https?://\\S*", " ");
-        text = text.replaceAll("@\\S*", " ");
-        text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
-        return text;
-    }
-
-    //prints a hashmap into a csv for a html application
-    //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
-    //only for String, String, Integer
-    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
-            throws FileNotFoundException, UnsupportedEncodingException {
-
-        PrintWriter writer = new PrintWriter(fileName, "UTF-8");
-
-        writer.println(firstLine);
-
-        //loop over brands
-        for (Entry en : map.entrySet()) {
-            //loop over words
-            for (Entry e : map.get(en.getKey()).entrySet()) {
-                writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
-            }
-        }
-
-        writer.close();
-        System.out.println("csv file made, please put it next to html file and run this");
-    }
-}
+package main;
+
+import analysis.BrandChecker;
+import database.NamedPreparedStatement;
+import database.QueryUtils;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.List;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+/**
+ * The sentiment analysis class that rates tweets based on a unigram and bigram
+ * set of weights.
+ */
+public class Analyzor {
+
+    /**
+     * The map that matches single words to their weights.
+     */
+    private final HashMap<String, Double> unimap = new HashMap();
+
+    /**
+     * The map that matches word pairs to their weights.
+     */
+    private final HashMap<String, Double> bimap = new HashMap();
+
+    /**
+     * The results of a query, maybe return from query().
+     */
+    private ResultSet data;
+
+    /**
+     * The persistent connection to the database.
+     */
+    private final Connection connection;
+
+    /**
+     * @param connection An open connection to the database.
+     */
+    public Analyzor(Connection connection) {
+        this.connection = connection;
+    }
+
+    /**
+     * Read the unigram and bigram lexica.
+     *
+     * @throws FileNotFoundException
+     */
+    public void readLexicon() throws FileNotFoundException {
+        if (!unimap.isEmpty()) {
+            // data is already read.
+            return;
+        }
+        System.err.println("Trying to read lexicons...");
+        // A unigram is in the format (WS = whitespace):
+        // word <WS> rating <WS> ??? <WS> ??
+        // A bigram has an two WS-separated words instead of one.
+        try (Scanner uniScanner = new Scanner(new File("unigrams-pmilexicon.txt"));
+                Scanner biScanner = new Scanner(new File("bigrams-pmilexicon.txt"));) {
+            //Fill the map of unigrams
+            int lineno = 1;
+            while (uniScanner.hasNext()) {
+
+                String words = uniScanner.next();
+                Double d = Double.valueOf(uniScanner.next());
+                unimap.put(words.toLowerCase(), d);
+                if (uniScanner.hasNextLine()) {
+                    uniScanner.nextLine();
+                }
+                lineno++;
+
+            }
+
+            //fill the map of bigrams
+            while (biScanner.hasNext()) {
+                String words = biScanner.next() + " " + biScanner.next();
+                bimap.put(words.toLowerCase(), Double.valueOf(biScanner.next()));
+                if (biScanner.hasNextLine()) {
+                    biScanner.nextLine();
+                }
+            }
+        }
+        System.err.println("Lexicons are read.");
+    }
+
+    /**
+     * Executes a query that the analyzer can analyze.
+     *
+     * @param query The query string to execute.
+     * @throws SQLException When database connection isn't available.
+     */
+    public void query(String query) throws SQLException {
+        PreparedStatement statement;
+        //make a connection to the database and execute the query
+        statement = connection.prepareStatement(query);
+        data = statement.executeQuery();
+    }
+
+    /**
+     * Run a sentiment analysis and fill the database with the output.
+     *
+     * @param query The sql text for the query.
+     * @throws SQLException
+     * @throws IOException
+     */
+    public void sentimentAnalysis(String query) throws SQLException, IOException {
+        NamedPreparedStatement tweetBrandStmt, updateRating;
+
+        //read the lexicons
+        readLexicon();
+
+        // if you ever need to re-apply rating, use something like:
+        // UPDATE mentionsbrand SET rating = NULL WHERE ...
+        if (query.isEmpty()) {
+            query = "SELECT t.tweetid, t.text, b.brand FROM tweet t "
+                    + "JOIN mentionsbrand b USING (tweetid) "
+                    + "WHERE b.rating IS NULL";
+        }
+        tweetBrandStmt = new NamedPreparedStatement(connection,
+                query);
+        ResultSet tweetBrandResults = tweetBrandStmt.executeQuery();
+
+        updateRating = new NamedPreparedStatement(connection,
+                "UPDATE mentionsbrand SET rating = :rating "
+                + "WHERE tweetid = :tweetid AND brand = :brand");
+
+        Double value;
+        String text;
+
+        //for all tuples
+        while (tweetBrandResults.next()) {
+            //get the text
+            text = tweetBrandResults.getString("text");
+            text = splitPunctToWords(text);
+            // test is the tweet text you are going to analyze
+            String[] words = text.split("\\s+"); // text splitted into separate words
+            double positiverate = 0; // positive rating
+
+            // Rate the text with unigrams
+            for (String word : words) {
+                value = unimap.get(word);
+                if (value != null) {
+                    positiverate += unimap.get(word);
+                }
+            }
+            // Rate the text with bigrams
+            for (int i = 0; i < words.length - 1; i++) {
+                String pair = words[i] + " " + words[i + 1];
+                value = bimap.get(pair);
+                if (value != null) {
+                    positiverate += bimap.get(pair);
+                }
+            }
+            //insert the rating into the database
+            updateRating.setLong("tweetid", tweetBrandResults.getLong("tweetid"));
+            updateRating.setString("brand", tweetBrandResults.getString("brand"));
+            updateRating.setInt("rating", (int) (positiverate * 10));
+            updateRating.executeUpdate();
+        }
+    }
+
+    /**
+     * Make a wordcloud of the results of some query.
+     *
+     * @param query The sql text for a query.
+     * @throws SQLException
+     * @throws FileNotFoundException
+     * @throws UnsupportedEncodingException
+     */
+    public void makeWordCloud(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
+
+        query(query);
+        //go to the start of the ResultSet data
+        if (data == null) {
+            System.err.println("data is empty, try querying first");
+            return;
+        }
+
+        String text;
+        String brand;
+        String[] words;
+        HashMap<String, HashMap<String, Integer>> wordcloud = new HashMap<>();
+
+        while (data.next()) {
+            //get brand
+            brand = data.getString("brand");
+            //make hashmap for each brand
+            if (!wordcloud.containsKey(brand)) {
+                wordcloud.put(brand, new HashMap<String, Integer>());
+            }
+            //get the text
+            text = data.getString("text");
+            //remove punctuation, convert to lowercase and split on words
+            text = removePunct(text);
+            text = text.toLowerCase();
+            words = text.split("\\s+");
+            //for all words
+            for (String word : words) {
+                //if it is empty, a space or a stripe, skip it
+                if (word.equals("") || word.equals(" ") || word.equals("-")) {
+                    continue;
+                }
+                //if the word is already in the map, increment the amount
+                if (wordcloud.get(brand).containsKey(word)) {
+                    wordcloud.get(brand).put(word, wordcloud.get(brand).get(word) + 1);
+                } //if the word is not already in the map, make an entry with amount = 1
+                else {
+                    wordcloud.get(brand).put(word, 1);
+                }
+            }
+        }
+        //print the words and their frequency in a csv file
+        mapToCSV(wordcloud, "wordcloud.csv", "brand,word,count");
+    }
+
+    //generate csv for disco from the query
+    public void disco(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
+        //do the query
+        query(query);
+        PrintWriter writer = new PrintWriter("output.csv", "UTF-8");
+        //print the first row
+        for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
+            writer.print(data.getMetaData().getColumnLabel(i) + ", ");
+        }
+        writer.println(data.getMetaData().getColumnLabel(data.getMetaData().getColumnCount()));
+        //print the values
+        while (data.next()) {
+            for (int i = 1; i < data.getMetaData().getColumnCount(); i++) {
+                if (data.getObject(i) == null) {
+                    writer.print(", ");
+                } else {
+                    writer.print(data.getObject(i).toString().replaceAll("[,\n]", " ") + ", ");
+                }
+            }
+            if (data.getObject(data.getMetaData().getColumnCount()) == null) {
+                writer.println("0");
+            } else {
+                writer.println(data.getObject(data.getMetaData().getColumnCount()).toString().replace(",", " "));
+            }
+        }
+        writer.close();
+    }
+
+    public void getBrands() throws SQLException {
+        PreparedStatement statement;
+        //make a connection to the database and execute the query
+        statement = connection.prepareStatement("delete from mentionsbrand");
+        statement.executeUpdate();
+        BrandChecker checker = new BrandChecker("brandonlyrules.txt");
+        query("select * from tweet");
+        NamedPreparedStatement m_insertBrand = new NamedPreparedStatement(connection, QueryUtils.insertBrand);
+        while (data.next()) {
+            List<String> brands = checker.getBrands(data.getString("text"));
+            if (brands.isEmpty()) {
+                QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), "no");
+                m_insertBrand.executeUpdate();
+            } else {
+                for (String brand : brands) {
+                    QueryUtils.setInsertBrandParams(m_insertBrand, data.getLong("tweetid"), brand);
+                    m_insertBrand.executeUpdate();
+                }
+            }
+        }
+    }
+
+    //gets the amount of users that tweet about a brand in a timezone
+    //makes a csv file timezone, brand, amount
+    public void timezone(String query) throws SQLException, FileNotFoundException, UnsupportedEncodingException {
+        query(query);
+        //hashmap timezone, brand, amount
+        HashMap<String, HashMap<String, Integer>> timeMap = new HashMap<>();
+        String timezone;
+        String brand;
+
+        while (data.next()) {
+            timezone = data.getString("timezone");
+            brand = data.getString("brand");
+            //if the timezone is already in the map
+            if (timeMap.containsKey(timezone)) {
+                //if the brand for that timezone is already in the map
+                if (timeMap.get(timezone).containsKey(brand)) {
+                    //increment the amount
+                    timeMap.get(timezone).put(brand, timeMap.get(timezone).get(brand) + 1);
+                } //if the brand for that timezone is not yet in the map
+                else {
+                    //make a new entry for that brand with amount = 1
+                    timeMap.get(timezone).put(brand, 1);
+                }
+            } //if the timezone is not yet in the map
+            else {
+                //make a new hashmap for this map and fill it with the brand and the amount
+                timeMap.put(timezone, new HashMap<String, Integer>());
+                timeMap.get(timezone).put(brand, 1);
+            }
+        }
+        
+        //add a legenda "timezone" that will make the legenda for the timezone map
+        final int legendaSize = 6000;
+        
+        timeMap.put("legenda" , new HashMap<String, Integer>());
+        timeMap.get("legenda").put("sony", legendaSize/6);
+        timeMap.get("legenda").put("lg", legendaSize/6);
+        timeMap.get("legenda").put("huawei", legendaSize/6);
+        timeMap.get("legenda").put("htc", legendaSize/6);
+        timeMap.get("legenda").put("samsung", legendaSize/6);
+        timeMap.get("legenda").put("apple", legendaSize/6);
+        
+        //make the CSV out of the map
+        mapToCSV(timeMap, "timezone.csv", "timezone,brand,count");
+    }
+
+    //replaces punctuation so it will be splitted
+    //also removes urls
+    private String splitPunctToWords(String text) {
+        text = text.replaceAll("https?://\\S*", "");
+        text = text.replaceAll("[!?):;\"']", " $0");
+        text = text.replaceAll("[.,-](\\s|$)", " $0");
+        text = text.replaceAll("\\s[(\"']", "$0 ");
+        return text;
+    }
+
+    //removes punctuation
+    //also removes urls
+    private String removePunct(String text) {
+        text = text.replaceAll("https?://\\S*", " ");
+        text = text.replaceAll("@\\S*", " ");
+        text = text.replaceAll("[^a-zA-Z0-9#_-]", " ");
+        return text;
+    }
+
+    //prints a hashmap into a csv for a html application
+    //Hashmap<key1, HashMap<key2, value>> becomes key1, key2, value
+    //only for String, String, Integer
+    void mapToCSV(HashMap<String, HashMap<String, Integer>> map, String fileName, String firstLine)
+            throws FileNotFoundException, UnsupportedEncodingException {
+
+        PrintWriter writer = new PrintWriter(fileName, "UTF-8");
+
+        writer.println(firstLine);
+
+        //loop over brands
+        for (Entry en : map.entrySet()) {
+            //loop over words
+            for (Entry e : map.get(en.getKey()).entrySet()) {
+                writer.println(en.getKey() + "," + e.getKey() + "," + e.getValue());
+            }
+        }
+
+        writer.close();
+        System.out.println("csv file made, please put it next to html file and run this");
+    }
+}
-- 
cgit v1.2.1