summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-15 16:23:11 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-15 16:23:11 +0200
commitf94162b0c8e6a7b7bd62087f14fcb1c646a6fe84 (patch)
tree93a5e82515c4a8e2a6c4c4524413717aaad65947 /src
parent53a0049712a50949dc3972c189a9c268907b1d81 (diff)
downloadGoldfarmer-f94162b0c8e6a7b7bd62087f14fcb1c646a6fe84.tar.gz
FUCK CRLF
Diffstat (limited to 'src')
-rw-r--r--src/main/Analyzor.java382
-rw-r--r--src/main/FarmShell.java396
2 files changed, 389 insertions, 389 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index e7e26fe..9be1101 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -1,191 +1,191 @@
-package main;
-
-import database.NamedPreparedStatement;
-import database.QueryUtils;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.HashMap;
-import java.util.Scanner;
-
-/**
- * The sentiment analysis class that rates tweets based on a unigram and bigram
- * set of weights.
- */
-public class Analyzor {
-
- /**
- * The map that matches single words to their weights.
- */
- private final HashMap<String, Double> unimap = new HashMap();
-
- /**
- * The map that matches word pairs to their weights.
- */
- private final HashMap<String, Double> bimap = new HashMap();
-
- private ResultSet data;
- private final Connection connection;
-
- Analyzor(Connection connection) {
- this.connection = connection;
- }
-
- //reads the lexicons
- void readLexicon() throws FileNotFoundException {
- if (!unimap.isEmpty()) {
- // data is already read.
- return;
- }
- // A unigram is in the format (WS = whitespace):
- // word <WS> rating <WS> ??? <WS> ??
- // A bigram has an two WS-separated words instead of one.
- try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt");
- Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
- //Fill the map of unigrams
- while (uniScanner.hasNext()) {
- String words = uniScanner.next();
- unimap.put(words.toLowerCase(), uniScanner.nextDouble());
- if (uniScanner.hasNextLine()) {
- uniScanner.nextLine();
- }
- }
-
- //fill the map of bigrams
- while (biScanner.hasNext()) {
- String words = biScanner.next() + " " + biScanner.next();
- bimap.put(words.toLowerCase(), biScanner.nextDouble());
- if (biScanner.hasNextLine()) {
- biScanner.nextLine();
- }
- }
- }
- }
-
- /**
- * Executes a query that the analyzer can analyze.
- *
- * @param query The query string to execute.
- * @throws SQLException When database connection isn't available.
- */
- public void query(String query) throws SQLException {
- PreparedStatement statement;
- //make a connection to the database and execute the query
- statement = connection.prepareStatement(query);
- data = statement.executeQuery();
- }
-
- /**
- * Run a sentiment analysis and fill the database with the output.
- *
- * @throws SQLException
- * @throws IOException
- */
- public void sentimentAnalysis(String query) throws SQLException, IOException {
- query(query);
-
- //read the lexicons
- readLexicon();
-
- //go to the start of te dataset
- if (data == null) {
- System.err.println("data is empty, try querying first");
- return;
- }
- data.beforeFirst();
-
- Double value;
- String text;
-
- //for all tuples
- while (data.next()) {
- //get the text
- text = data.getString("text");
- text = splitPunctToWords(text);
- // test is the tweet text you are going to analyze
- String[] words = text.split("\\s+"); // text splitted into separate words
- double positiverate = 0; // positive rating
-
- // Rate the text with unigrams
- for (String word : words) {
- value = unimap.get(word);
- if (value != null) {
- positiverate += unimap.get(word);
- }
- }
- // Rate the text with bigrams
- for (int i = 0; i < words.length - 1; i++) {
- String pair = words[i] + " " + words[i + 1];
- value = bimap.get(pair);
- if (value != null) {
- positiverate += bimap.get(pair);
- }
- }
- //insert the rating into the database
- NamedPreparedStatement m_insertRating;
- m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
- QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
- m_insertRating.executeUpdate();
- //don't print the rate
- //System.out.println(text + ": " + (int) (positiverate * 10));
- }
- }
-
- //makes a wordcloud of the tweets in the ResultSet data
- void makeWordCloud(String query) throws SQLException {
-
- query(query);
- //go to the start of the ResultSet data
- if (data == null) {
- System.err.println("data is empty, try querying first");
- return;
- }
-
- //make the hashmap with the words and their frequency
- HashMap<String, Integer> wordcloud = new HashMap<>();
-
- String text;
- String[] words;
- Integer value;
-
- while (data.next()) {
- //get the text
- text = data.getString("text");
- //remove punctuation, convert to lowercase and split on words
- text = removePunct(text);
- text = text.toLowerCase();
- words = text.split("\\s+");
-
- //count the words
- for (String word : words) {
- value = wordcloud.get(word);
- if (value == null) {
- wordcloud.put(word, 1);
- } else {
- wordcloud.put(word, value++);
- }
- }
- }
- }
-
- //replaces punctuation so it will be splitted
- //also removes urls
- private String splitPunctToWords(String text) {
- text = text.replaceAll("https?://\\S*", "");
- text = text.replaceAll("[!?):;\"']", " $0");
- text = text.replaceAll("[.,-](\\s|$)", " $0");
- text = text.replaceAll("\\s[(\"']", "$0 ");
- return text;
- }
-
- //removes punctuation
- //also removes urls
- private String removePunct(String text) {
- text = text.replaceAll("https?://\\S*", "");
- text = text.replaceAll("[.,!?()-:;\"']", " ");
- return text;
- }
-}
+package main;
+
+import database.NamedPreparedStatement;
+import database.QueryUtils;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Scanner;
+
+/**
+ * The sentiment analysis class that rates tweets based on a unigram and bigram
+ * set of weights.
+ */
+public class Analyzor {
+
+ /**
+ * The map that matches single words to their weights.
+ */
+ private final HashMap<String, Double> unimap = new HashMap();
+
+ /**
+ * The map that matches word pairs to their weights.
+ */
+ private final HashMap<String, Double> bimap = new HashMap();
+
+ private ResultSet data;
+ private final Connection connection;
+
+ Analyzor(Connection connection) {
+ this.connection = connection;
+ }
+
+ //reads the lexicons
+ void readLexicon() throws FileNotFoundException {
+ if (!unimap.isEmpty()) {
+ // data is already read.
+ return;
+ }
+ // A unigram is in the format (WS = whitespace):
+ // word <WS> rating <WS> ??? <WS> ??
+ // A bigram has an two WS-separated words instead of one.
+ try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt");
+ Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
+ //Fill the map of unigrams
+ while (uniScanner.hasNext()) {
+ String words = uniScanner.next();
+ unimap.put(words.toLowerCase(), uniScanner.nextDouble());
+ if (uniScanner.hasNextLine()) {
+ uniScanner.nextLine();
+ }
+ }
+
+ //fill the map of bigrams
+ while (biScanner.hasNext()) {
+ String words = biScanner.next() + " " + biScanner.next();
+ bimap.put(words.toLowerCase(), biScanner.nextDouble());
+ if (biScanner.hasNextLine()) {
+ biScanner.nextLine();
+ }
+ }
+ }
+ }
+
+ /**
+ * Executes a query that the analyzer can analyze.
+ *
+ * @param query The query string to execute.
+ * @throws SQLException When database connection isn't available.
+ */
+ public void query(String query) throws SQLException {
+ PreparedStatement statement;
+ //make a connection to the database and execute the query
+ statement = connection.prepareStatement(query);
+ data = statement.executeQuery();
+ }
+
+ /**
+ * Run a sentiment analysis and fill the database with the output.
+ *
+ * @throws SQLException
+ * @throws IOException
+ */
+ public void sentimentAnalysis(String query) throws SQLException, IOException {
+ query(query);
+
+ //read the lexicons
+ readLexicon();
+
+ //go to the start of te dataset
+ if (data == null) {
+ System.err.println("data is empty, try querying first");
+ return;
+ }
+ data.beforeFirst();
+
+ Double value;
+ String text;
+
+ //for all tuples
+ while (data.next()) {
+ //get the text
+ text = data.getString("text");
+ text = splitPunctToWords(text);
+ // test is the tweet text you are going to analyze
+ String[] words = text.split("\\s+"); // text splitted into separate words
+ double positiverate = 0; // positive rating
+
+ // Rate the text with unigrams
+ for (String word : words) {
+ value = unimap.get(word);
+ if (value != null) {
+ positiverate += unimap.get(word);
+ }
+ }
+ // Rate the text with bigrams
+ for (int i = 0; i < words.length - 1; i++) {
+ String pair = words[i] + " " + words[i + 1];
+ value = bimap.get(pair);
+ if (value != null) {
+ positiverate += bimap.get(pair);
+ }
+ }
+ //insert the rating into the database
+ NamedPreparedStatement m_insertRating;
+ m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
+ QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
+ m_insertRating.executeUpdate();
+ //don't print the rate
+ //System.out.println(text + ": " + (int) (positiverate * 10));
+ }
+ }
+
+ //makes a wordcloud of the tweets in the ResultSet data
+ void makeWordCloud(String query) throws SQLException {
+
+ query(query);
+ //go to the start of the ResultSet data
+ if (data == null) {
+ System.err.println("data is empty, try querying first");
+ return;
+ }
+
+ //make the hashmap with the words and their frequency
+ HashMap<String, Integer> wordcloud = new HashMap<>();
+
+ String text;
+ String[] words;
+ Integer value;
+
+ while (data.next()) {
+ //get the text
+ text = data.getString("text");
+ //remove punctuation, convert to lowercase and split on words
+ text = removePunct(text);
+ text = text.toLowerCase();
+ words = text.split("\\s+");
+
+ //count the words
+ for (String word : words) {
+ value = wordcloud.get(word);
+ if (value == null) {
+ wordcloud.put(word, 1);
+ } else {
+ wordcloud.put(word, value++);
+ }
+ }
+ }
+ }
+
+ //replaces punctuation so it will be splitted
+ //also removes urls
+ private String splitPunctToWords(String text) {
+ text = text.replaceAll("https?://\\S*", "");
+ text = text.replaceAll("[!?):;\"']", " $0");
+ text = text.replaceAll("[.,-](\\s|$)", " $0");
+ text = text.replaceAll("\\s[(\"']", "$0 ");
+ return text;
+ }
+
+ //removes punctuation
+ //also removes urls
+ private String removePunct(String text) {
+ text = text.replaceAll("https?://\\S*", "");
+ text = text.replaceAll("[.,!?()-:;\"']", " ");
+ return text;
+ }
+}
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 9dd1167..3c6f17f 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -1,198 +1,198 @@
-package main;
-
-import database.ConnectionBuilder;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.SQLException;
-import java.util.Arrays;
-import java.util.NoSuchElementException;
-import java.util.Scanner;
-
-/**
- *
- * @author s123188
- */
-public class FarmShell {
-
- /**
- * A scanner for the stdin.
- */
- private final Scanner scanner = new Scanner(System.in);
-
- private Analyzor cached_analyzor;
- private final ConnectionBuilder dbConnectionBuilder;
-
- FarmShell(ConnectionBuilder dbConnectionBuilder) {
- this.dbConnectionBuilder = dbConnectionBuilder;
- }
-
- private void printPrompt() {
- System.out.print("$ ");
- }
-
- private Analyzor getAnalyzor() throws SQLException {
- if (cached_analyzor == null) {
- Connection dbCon = dbConnectionBuilder.create();
- cached_analyzor = new Analyzor(dbCon);
- }
- return cached_analyzor;
- }
-
- /**
- * Processes commands from stdin until the exit command is received or EOF.
- */
- public void process_forever() {
- System.err.println("Entering interactive shell, type 'help' for help "
- + "or 'exit' to leave. '.' repeats the previous interactive "
- + "command.");
- // print prompt for reading first command
- printPrompt();
- String lastLine = "";
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine().trim();
- // repeat last command
- if (line.equals(".")) {
- line = lastLine;
- }
- if (!execute(line)) {
- // requested to terminate
- break;
- }
- if (!line.isEmpty()) {
- lastLine = line;
- }
- // print prompt for reading next line
- printPrompt();
- }
- // prevent corrupted compressed files when exiting without a command
- throw new NoSuchElementException();
- }
-
- /**
- * Execute a single commands.
- *
- * @param cmd A single line of the command.
- * @return Whether to continue or exit the application.
- */
- public boolean execute(String cmd) {
- String[] args = cmd.trim().split("\\s+", 2);
- if (!args[0].isEmpty()) {
- // non-empty command, let's see whether it makes sense?
- return execute(args);
- }
- return true;
- }
-
- /**
- * Executes a command with optional parameters.
- *
- * @param args An array with the first argument containing the command with
- * optional parameters in following arguments.
- * @return true if more commands are allowed to be executed, false
- * otherwise.
- */
- public boolean execute(String[] args) {
- try {
- Command command = Command.fromString(args[0]);
- String[] params = Arrays.copyOfRange(args, 1, args.length);
- execute(command, params);
- } catch (IllegalArgumentException ex) {
- System.err.println(ex.getMessage());
- } catch (IOException ex) {
- System.err.println("Command " + args[0] + " failed with " + ex);
- ex.printStackTrace();
- } catch (NoSuchElementException ex) {
- // thrown by the "exit" command to signal exit
- return false;
- } catch (SQLException ex) {
- System.err.println("such " + ex);
- }
- // another satisfied customer, next!
- return true;
- }
-
- private void execute(Command command, String[] params) throws SQLException, IOException {
- if (params.length < command.getParamCount()) {
- throw new IllegalArgumentException("Expected "
- + command.getParamCount() + " parameters, got only "
- + params.length);
- }
- switch (command) {
- case filterbots:
- System.out.println("not yet implemented");
- break;
- case sentiment:
- getAnalyzor().sentimentAnalysis(params[0]);
- break;
- case wordcloud:
- getAnalyzor().makeWordCloud(params[0]);
- break;
- case help:
- for (String line : HELP) {
- System.out.println(line);
- }
- for (Command cmd : Command.values()) {
- System.out.printf(" %-10s", cmd.name());
- if (!cmd.getDescription().isEmpty()) {
- System.out.print(" " + cmd.getDescription());
- }
- if (cmd.getParamCount() == 1) {
- System.out.print(" (1 arg)");
- } else if (cmd.getParamCount() > 1) {
- System.out.printf(" (%d args)", cmd.getParamCount());
- }
- System.out.println();
- }
- break;
- case exit:
- throw new NoSuchElementException();
- default:
- throw new AssertionError(command.name());
- }
- }
-
- enum Command {
-
- filterbots("marks all users as bot or not", 1),
- sentiment("analyzes all tweets on positivity (about a brand)", 1),
- wordcloud("makes a wordcloud of the text of the tweets", 1),
- exit("Returns to shell"),
- help("Get help");
-
- private final String description;
- private final int paramCount;
-
- Command(String description) {
- this.description = description;
- this.paramCount = 0;
- }
-
- Command(String description, int paramCount) {
- this.description = description;
- this.paramCount = paramCount;
- }
-
- public String getDescription() {
- return description;
- }
-
- public int getParamCount() {
- return paramCount;
- }
-
- public static Command fromString(String command) {
- for (Command cmd : values()) {
- if (cmd.name().equals(command)) {
- return cmd;
- }
- }
- throw new IllegalArgumentException("Unrecognized command. Hint: help");
- }
- };
-
- private final String[] HELP = new String[]{
- "Interactive TweetShell",
- "",
- "Available commands:"
- };
-}
+package main;
+
+import database.ConnectionBuilder;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.NoSuchElementException;
+import java.util.Scanner;
+
+/**
+ *
+ * @author s123188
+ */
+public class FarmShell {
+
+ /**
+ * A scanner for the stdin.
+ */
+ private final Scanner scanner = new Scanner(System.in);
+
+ private Analyzor cached_analyzor;
+ private final ConnectionBuilder dbConnectionBuilder;
+
+ FarmShell(ConnectionBuilder dbConnectionBuilder) {
+ this.dbConnectionBuilder = dbConnectionBuilder;
+ }
+
+ private void printPrompt() {
+ System.out.print("$ ");
+ }
+
+ private Analyzor getAnalyzor() throws SQLException {
+ if (cached_analyzor == null) {
+ Connection dbCon = dbConnectionBuilder.create();
+ cached_analyzor = new Analyzor(dbCon);
+ }
+ return cached_analyzor;
+ }
+
+ /**
+ * Processes commands from stdin until the exit command is received or EOF.
+ */
+ public void process_forever() {
+ System.err.println("Entering interactive shell, type 'help' for help "
+ + "or 'exit' to leave. '.' repeats the previous interactive "
+ + "command.");
+ // print prompt for reading first command
+ printPrompt();
+ String lastLine = "";
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine().trim();
+ // repeat last command
+ if (line.equals(".")) {
+ line = lastLine;
+ }
+ if (!execute(line)) {
+ // requested to terminate
+ break;
+ }
+ if (!line.isEmpty()) {
+ lastLine = line;
+ }
+ // print prompt for reading next line
+ printPrompt();
+ }
+ // prevent corrupted compressed files when exiting without a command
+ throw new NoSuchElementException();
+ }
+
+ /**
+ * Execute a single commands.
+ *
+ * @param cmd A single line of the command.
+ * @return Whether to continue or exit the application.
+ */
+ public boolean execute(String cmd) {
+ String[] args = cmd.trim().split("\\s+", 2);
+ if (!args[0].isEmpty()) {
+ // non-empty command, let's see whether it makes sense?
+ return execute(args);
+ }
+ return true;
+ }
+
+ /**
+ * Executes a command with optional parameters.
+ *
+ * @param args An array with the first argument containing the command with
+ * optional parameters in following arguments.
+ * @return true if more commands are allowed to be executed, false
+ * otherwise.
+ */
+ public boolean execute(String[] args) {
+ try {
+ Command command = Command.fromString(args[0]);
+ String[] params = Arrays.copyOfRange(args, 1, args.length);
+ execute(command, params);
+ } catch (IllegalArgumentException ex) {
+ System.err.println(ex.getMessage());
+ } catch (IOException ex) {
+ System.err.println("Command " + args[0] + " failed with " + ex);
+ ex.printStackTrace();
+ } catch (NoSuchElementException ex) {
+ // thrown by the "exit" command to signal exit
+ return false;
+ } catch (SQLException ex) {
+ System.err.println("such " + ex);
+ }
+ // another satisfied customer, next!
+ return true;
+ }
+
+ private void execute(Command command, String[] params) throws SQLException, IOException {
+ if (params.length < command.getParamCount()) {
+ throw new IllegalArgumentException("Expected "
+ + command.getParamCount() + " parameters, got only "
+ + params.length);
+ }
+ switch (command) {
+ case filterbots:
+ System.out.println("not yet implemented");
+ break;
+ case sentiment:
+ getAnalyzor().sentimentAnalysis(params[0]);
+ break;
+ case wordcloud:
+ getAnalyzor().makeWordCloud(params[0]);
+ break;
+ case help:
+ for (String line : HELP) {
+ System.out.println(line);
+ }
+ for (Command cmd : Command.values()) {
+ System.out.printf(" %-10s", cmd.name());
+ if (!cmd.getDescription().isEmpty()) {
+ System.out.print(" " + cmd.getDescription());
+ }
+ if (cmd.getParamCount() == 1) {
+ System.out.print(" (1 arg)");
+ } else if (cmd.getParamCount() > 1) {
+ System.out.printf(" (%d args)", cmd.getParamCount());
+ }
+ System.out.println();
+ }
+ break;
+ case exit:
+ throw new NoSuchElementException();
+ default:
+ throw new AssertionError(command.name());
+ }
+ }
+
+ enum Command {
+
+ filterbots("marks all users as bot or not", 1),
+ sentiment("analyzes all tweets on positivity (about a brand)", 1),
+ wordcloud("makes a wordcloud of the text of the tweets", 1),
+ exit("Returns to shell"),
+ help("Get help");
+
+ private final String description;
+ private final int paramCount;
+
+ Command(String description) {
+ this.description = description;
+ this.paramCount = 0;
+ }
+
+ Command(String description, int paramCount) {
+ this.description = description;
+ this.paramCount = paramCount;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public int getParamCount() {
+ return paramCount;
+ }
+
+ public static Command fromString(String command) {
+ for (Command cmd : values()) {
+ if (cmd.name().equals(command)) {
+ return cmd;
+ }
+ }
+ throw new IllegalArgumentException("Unrecognized command. Hint: help");
+ }
+ };
+
+ private final String[] HELP = new String[]{
+ "Interactive TweetShell",
+ "",
+ "Available commands:"
+ };
+}