diff options
author | Peter Wu <peter@lekensteyn.nl> | 2014-05-15 16:23:11 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2014-05-15 16:23:11 +0200 |
commit | f94162b0c8e6a7b7bd62087f14fcb1c646a6fe84 (patch) | |
tree | 93a5e82515c4a8e2a6c4c4524413717aaad65947 /src | |
parent | 53a0049712a50949dc3972c189a9c268907b1d81 (diff) | |
download | Goldfarmer-f94162b0c8e6a7b7bd62087f14fcb1c646a6fe84.tar.gz |
FUCK CRLF
Diffstat (limited to 'src')
-rw-r--r-- | src/main/Analyzor.java | 382 | ||||
-rw-r--r-- | src/main/FarmShell.java | 396 |
2 files changed, 389 insertions, 389 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index e7e26fe..9be1101 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -1,191 +1,191 @@ -package main;
-
-import database.NamedPreparedStatement;
-import database.QueryUtils;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.HashMap;
-import java.util.Scanner;
-
-/**
- * The sentiment analysis class that rates tweets based on a unigram and bigram
- * set of weights.
- */
-public class Analyzor {
-
- /**
- * The map that matches single words to their weights.
- */
- private final HashMap<String, Double> unimap = new HashMap();
-
- /**
- * The map that matches word pairs to their weights.
- */
- private final HashMap<String, Double> bimap = new HashMap();
-
- private ResultSet data;
- private final Connection connection;
-
- Analyzor(Connection connection) {
- this.connection = connection;
- }
-
- //reads the lexicons
- void readLexicon() throws FileNotFoundException {
- if (!unimap.isEmpty()) {
- // data is already read.
- return;
- }
- // A unigram is in the format (WS = whitespace):
- // word <WS> rating <WS> ??? <WS> ??
- // A bigram has an two WS-separated words instead of one.
- try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt");
- Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
- //Fill the map of unigrams
- while (uniScanner.hasNext()) {
- String words = uniScanner.next();
- unimap.put(words.toLowerCase(), uniScanner.nextDouble());
- if (uniScanner.hasNextLine()) {
- uniScanner.nextLine();
- }
- }
-
- //fill the map of bigrams
- while (biScanner.hasNext()) {
- String words = biScanner.next() + " " + biScanner.next();
- bimap.put(words.toLowerCase(), biScanner.nextDouble());
- if (biScanner.hasNextLine()) {
- biScanner.nextLine();
- }
- }
- }
- }
-
- /**
- * Executes a query that the analyzer can analyze.
- *
- * @param query The query string to execute.
- * @throws SQLException When database connection isn't available.
- */
- public void query(String query) throws SQLException {
- PreparedStatement statement;
- //make a connection to the database and execute the query
- statement = connection.prepareStatement(query);
- data = statement.executeQuery();
- }
-
- /**
- * Run a sentiment analysis and fill the database with the output.
- *
- * @throws SQLException
- * @throws IOException
- */
- public void sentimentAnalysis(String query) throws SQLException, IOException {
- query(query);
-
- //read the lexicons
- readLexicon();
-
- //go to the start of te dataset
- if (data == null) {
- System.err.println("data is empty, try querying first");
- return;
- }
- data.beforeFirst();
-
- Double value;
- String text;
-
- //for all tuples
- while (data.next()) {
- //get the text
- text = data.getString("text");
- text = splitPunctToWords(text);
- // test is the tweet text you are going to analyze
- String[] words = text.split("\\s+"); // text splitted into separate words
- double positiverate = 0; // positive rating
-
- // Rate the text with unigrams
- for (String word : words) {
- value = unimap.get(word);
- if (value != null) {
- positiverate += unimap.get(word);
- }
- }
- // Rate the text with bigrams
- for (int i = 0; i < words.length - 1; i++) {
- String pair = words[i] + " " + words[i + 1];
- value = bimap.get(pair);
- if (value != null) {
- positiverate += bimap.get(pair);
- }
- }
- //insert the rating into the database
- NamedPreparedStatement m_insertRating;
- m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
- QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
- m_insertRating.executeUpdate();
- //don't print the rate
- //System.out.println(text + ": " + (int) (positiverate * 10));
- }
- }
-
- //makes a wordcloud of the tweets in the ResultSet data
- void makeWordCloud(String query) throws SQLException {
-
- query(query);
- //go to the start of the ResultSet data
- if (data == null) {
- System.err.println("data is empty, try querying first");
- return;
- }
-
- //make the hashmap with the words and their frequency
- HashMap<String, Integer> wordcloud = new HashMap<>();
-
- String text;
- String[] words;
- Integer value;
-
- while (data.next()) {
- //get the text
- text = data.getString("text");
- //remove punctuation, convert to lowercase and split on words
- text = removePunct(text);
- text = text.toLowerCase();
- words = text.split("\\s+");
-
- //count the words
- for (String word : words) {
- value = wordcloud.get(word);
- if (value == null) {
- wordcloud.put(word, 1);
- } else {
- wordcloud.put(word, value++);
- }
- }
- }
- }
-
- //replaces punctuation so it will be splitted
- //also removes urls
- private String splitPunctToWords(String text) {
- text = text.replaceAll("https?://\\S*", "");
- text = text.replaceAll("[!?):;\"']", " $0");
- text = text.replaceAll("[.,-](\\s|$)", " $0");
- text = text.replaceAll("\\s[(\"']", "$0 ");
- return text;
- }
-
- //removes punctuation
- //also removes urls
- private String removePunct(String text) {
- text = text.replaceAll("https?://\\S*", "");
- text = text.replaceAll("[.,!?()-:;\"']", " ");
- return text;
- }
-}
+package main; + +import database.NamedPreparedStatement; +import database.QueryUtils; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Scanner; + +/** + * The sentiment analysis class that rates tweets based on a unigram and bigram + * set of weights. + */ +public class Analyzor { + + /** + * The map that matches single words to their weights. + */ + private final HashMap<String, Double> unimap = new HashMap(); + + /** + * The map that matches word pairs to their weights. + */ + private final HashMap<String, Double> bimap = new HashMap(); + + private ResultSet data; + private final Connection connection; + + Analyzor(Connection connection) { + this.connection = connection; + } + + //reads the lexicons + void readLexicon() throws FileNotFoundException { + if (!unimap.isEmpty()) { + // data is already read. + return; + } + // A unigram is in the format (WS = whitespace): + // word <WS> rating <WS> ??? <WS> ?? + // A bigram has an two WS-separated words instead of one. + try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt"); + Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) { + //Fill the map of unigrams + while (uniScanner.hasNext()) { + String words = uniScanner.next(); + unimap.put(words.toLowerCase(), uniScanner.nextDouble()); + if (uniScanner.hasNextLine()) { + uniScanner.nextLine(); + } + } + + //fill the map of bigrams + while (biScanner.hasNext()) { + String words = biScanner.next() + " " + biScanner.next(); + bimap.put(words.toLowerCase(), biScanner.nextDouble()); + if (biScanner.hasNextLine()) { + biScanner.nextLine(); + } + } + } + } + + /** + * Executes a query that the analyzer can analyze. + * + * @param query The query string to execute. + * @throws SQLException When database connection isn't available. + */ + public void query(String query) throws SQLException { + PreparedStatement statement; + //make a connection to the database and execute the query + statement = connection.prepareStatement(query); + data = statement.executeQuery(); + } + + /** + * Run a sentiment analysis and fill the database with the output. + * + * @throws SQLException + * @throws IOException + */ + public void sentimentAnalysis(String query) throws SQLException, IOException { + query(query); + + //read the lexicons + readLexicon(); + + //go to the start of te dataset + if (data == null) { + System.err.println("data is empty, try querying first"); + return; + } + data.beforeFirst(); + + Double value; + String text; + + //for all tuples + while (data.next()) { + //get the text + text = data.getString("text"); + text = splitPunctToWords(text); + // test is the tweet text you are going to analyze + String[] words = text.split("\\s+"); // text splitted into separate words + double positiverate = 0; // positive rating + + // Rate the text with unigrams + for (String word : words) { + value = unimap.get(word); + if (value != null) { + positiverate += unimap.get(word); + } + } + // Rate the text with bigrams + for (int i = 0; i < words.length - 1; i++) { + String pair = words[i] + " " + words[i + 1]; + value = bimap.get(pair); + if (value != null) { + positiverate += bimap.get(pair); + } + } + //insert the rating into the database + NamedPreparedStatement m_insertRating; + m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating); + QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10)); + m_insertRating.executeUpdate(); + //don't print the rate + //System.out.println(text + ": " + (int) (positiverate * 10)); + } + } + + //makes a wordcloud of the tweets in the ResultSet data + void makeWordCloud(String query) throws SQLException { + + query(query); + //go to the start of the ResultSet data + if (data == null) { + System.err.println("data is empty, try querying first"); + return; + } + + //make the hashmap with the words and their frequency + HashMap<String, Integer> wordcloud = new HashMap<>(); + + String text; + String[] words; + Integer value; + + while (data.next()) { + //get the text + text = data.getString("text"); + //remove punctuation, convert to lowercase and split on words + text = removePunct(text); + text = text.toLowerCase(); + words = text.split("\\s+"); + + //count the words + for (String word : words) { + value = wordcloud.get(word); + if (value == null) { + wordcloud.put(word, 1); + } else { + wordcloud.put(word, value++); + } + } + } + } + + //replaces punctuation so it will be splitted + //also removes urls + private String splitPunctToWords(String text) { + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[!?):;\"']", " $0"); + text = text.replaceAll("[.,-](\\s|$)", " $0"); + text = text.replaceAll("\\s[(\"']", "$0 "); + return text; + } + + //removes punctuation + //also removes urls + private String removePunct(String text) { + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[.,!?()-:;\"']", " "); + return text; + } +} diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 9dd1167..3c6f17f 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -1,198 +1,198 @@ -package main;
-
-import database.ConnectionBuilder;
-import java.io.IOException;
-import java.sql.Connection;
-import java.sql.SQLException;
-import java.util.Arrays;
-import java.util.NoSuchElementException;
-import java.util.Scanner;
-
-/**
- *
- * @author s123188
- */
-public class FarmShell {
-
- /**
- * A scanner for the stdin.
- */
- private final Scanner scanner = new Scanner(System.in);
-
- private Analyzor cached_analyzor;
- private final ConnectionBuilder dbConnectionBuilder;
-
- FarmShell(ConnectionBuilder dbConnectionBuilder) {
- this.dbConnectionBuilder = dbConnectionBuilder;
- }
-
- private void printPrompt() {
- System.out.print("$ ");
- }
-
- private Analyzor getAnalyzor() throws SQLException {
- if (cached_analyzor == null) {
- Connection dbCon = dbConnectionBuilder.create();
- cached_analyzor = new Analyzor(dbCon);
- }
- return cached_analyzor;
- }
-
- /**
- * Processes commands from stdin until the exit command is received or EOF.
- */
- public void process_forever() {
- System.err.println("Entering interactive shell, type 'help' for help "
- + "or 'exit' to leave. '.' repeats the previous interactive "
- + "command.");
- // print prompt for reading first command
- printPrompt();
- String lastLine = "";
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine().trim();
- // repeat last command
- if (line.equals(".")) {
- line = lastLine;
- }
- if (!execute(line)) {
- // requested to terminate
- break;
- }
- if (!line.isEmpty()) {
- lastLine = line;
- }
- // print prompt for reading next line
- printPrompt();
- }
- // prevent corrupted compressed files when exiting without a command
- throw new NoSuchElementException();
- }
-
- /**
- * Execute a single commands.
- *
- * @param cmd A single line of the command.
- * @return Whether to continue or exit the application.
- */
- public boolean execute(String cmd) {
- String[] args = cmd.trim().split("\\s+", 2);
- if (!args[0].isEmpty()) {
- // non-empty command, let's see whether it makes sense?
- return execute(args);
- }
- return true;
- }
-
- /**
- * Executes a command with optional parameters.
- *
- * @param args An array with the first argument containing the command with
- * optional parameters in following arguments.
- * @return true if more commands are allowed to be executed, false
- * otherwise.
- */
- public boolean execute(String[] args) {
- try {
- Command command = Command.fromString(args[0]);
- String[] params = Arrays.copyOfRange(args, 1, args.length);
- execute(command, params);
- } catch (IllegalArgumentException ex) {
- System.err.println(ex.getMessage());
- } catch (IOException ex) {
- System.err.println("Command " + args[0] + " failed with " + ex);
- ex.printStackTrace();
- } catch (NoSuchElementException ex) {
- // thrown by the "exit" command to signal exit
- return false;
- } catch (SQLException ex) {
- System.err.println("such " + ex);
- }
- // another satisfied customer, next!
- return true;
- }
-
- private void execute(Command command, String[] params) throws SQLException, IOException {
- if (params.length < command.getParamCount()) {
- throw new IllegalArgumentException("Expected "
- + command.getParamCount() + " parameters, got only "
- + params.length);
- }
- switch (command) {
- case filterbots:
- System.out.println("not yet implemented");
- break;
- case sentiment:
- getAnalyzor().sentimentAnalysis(params[0]);
- break;
- case wordcloud:
- getAnalyzor().makeWordCloud(params[0]);
- break;
- case help:
- for (String line : HELP) {
- System.out.println(line);
- }
- for (Command cmd : Command.values()) {
- System.out.printf(" %-10s", cmd.name());
- if (!cmd.getDescription().isEmpty()) {
- System.out.print(" " + cmd.getDescription());
- }
- if (cmd.getParamCount() == 1) {
- System.out.print(" (1 arg)");
- } else if (cmd.getParamCount() > 1) {
- System.out.printf(" (%d args)", cmd.getParamCount());
- }
- System.out.println();
- }
- break;
- case exit:
- throw new NoSuchElementException();
- default:
- throw new AssertionError(command.name());
- }
- }
-
- enum Command {
-
- filterbots("marks all users as bot or not", 1),
- sentiment("analyzes all tweets on positivity (about a brand)", 1),
- wordcloud("makes a wordcloud of the text of the tweets", 1),
- exit("Returns to shell"),
- help("Get help");
-
- private final String description;
- private final int paramCount;
-
- Command(String description) {
- this.description = description;
- this.paramCount = 0;
- }
-
- Command(String description, int paramCount) {
- this.description = description;
- this.paramCount = paramCount;
- }
-
- public String getDescription() {
- return description;
- }
-
- public int getParamCount() {
- return paramCount;
- }
-
- public static Command fromString(String command) {
- for (Command cmd : values()) {
- if (cmd.name().equals(command)) {
- return cmd;
- }
- }
- throw new IllegalArgumentException("Unrecognized command. Hint: help");
- }
- };
-
- private final String[] HELP = new String[]{
- "Interactive TweetShell",
- "",
- "Available commands:"
- };
-}
+package main; + +import database.ConnectionBuilder; +import java.io.IOException; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Scanner; + +/** + * + * @author s123188 + */ +public class FarmShell { + + /** + * A scanner for the stdin. + */ + private final Scanner scanner = new Scanner(System.in); + + private Analyzor cached_analyzor; + private final ConnectionBuilder dbConnectionBuilder; + + FarmShell(ConnectionBuilder dbConnectionBuilder) { + this.dbConnectionBuilder = dbConnectionBuilder; + } + + private void printPrompt() { + System.out.print("$ "); + } + + private Analyzor getAnalyzor() throws SQLException { + if (cached_analyzor == null) { + Connection dbCon = dbConnectionBuilder.create(); + cached_analyzor = new Analyzor(dbCon); + } + return cached_analyzor; + } + + /** + * Processes commands from stdin until the exit command is received or EOF. + */ + public void process_forever() { + System.err.println("Entering interactive shell, type 'help' for help " + + "or 'exit' to leave. '.' repeats the previous interactive " + + "command."); + // print prompt for reading first command + printPrompt(); + String lastLine = ""; + while (scanner.hasNextLine()) { + String line = scanner.nextLine().trim(); + // repeat last command + if (line.equals(".")) { + line = lastLine; + } + if (!execute(line)) { + // requested to terminate + break; + } + if (!line.isEmpty()) { + lastLine = line; + } + // print prompt for reading next line + printPrompt(); + } + // prevent corrupted compressed files when exiting without a command + throw new NoSuchElementException(); + } + + /** + * Execute a single commands. + * + * @param cmd A single line of the command. + * @return Whether to continue or exit the application. + */ + public boolean execute(String cmd) { + String[] args = cmd.trim().split("\\s+", 2); + if (!args[0].isEmpty()) { + // non-empty command, let's see whether it makes sense? + return execute(args); + } + return true; + } + + /** + * Executes a command with optional parameters. + * + * @param args An array with the first argument containing the command with + * optional parameters in following arguments. + * @return true if more commands are allowed to be executed, false + * otherwise. + */ + public boolean execute(String[] args) { + try { + Command command = Command.fromString(args[0]); + String[] params = Arrays.copyOfRange(args, 1, args.length); + execute(command, params); + } catch (IllegalArgumentException ex) { + System.err.println(ex.getMessage()); + } catch (IOException ex) { + System.err.println("Command " + args[0] + " failed with " + ex); + ex.printStackTrace(); + } catch (NoSuchElementException ex) { + // thrown by the "exit" command to signal exit + return false; + } catch (SQLException ex) { + System.err.println("such " + ex); + } + // another satisfied customer, next! + return true; + } + + private void execute(Command command, String[] params) throws SQLException, IOException { + if (params.length < command.getParamCount()) { + throw new IllegalArgumentException("Expected " + + command.getParamCount() + " parameters, got only " + + params.length); + } + switch (command) { + case filterbots: + System.out.println("not yet implemented"); + break; + case sentiment: + getAnalyzor().sentimentAnalysis(params[0]); + break; + case wordcloud: + getAnalyzor().makeWordCloud(params[0]); + break; + case help: + for (String line : HELP) { + System.out.println(line); + } + for (Command cmd : Command.values()) { + System.out.printf(" %-10s", cmd.name()); + if (!cmd.getDescription().isEmpty()) { + System.out.print(" " + cmd.getDescription()); + } + if (cmd.getParamCount() == 1) { + System.out.print(" (1 arg)"); + } else if (cmd.getParamCount() > 1) { + System.out.printf(" (%d args)", cmd.getParamCount()); + } + System.out.println(); + } + break; + case exit: + throw new NoSuchElementException(); + default: + throw new AssertionError(command.name()); + } + } + + enum Command { + + filterbots("marks all users as bot or not", 1), + sentiment("analyzes all tweets on positivity (about a brand)", 1), + wordcloud("makes a wordcloud of the text of the tweets", 1), + exit("Returns to shell"), + help("Get help"); + + private final String description; + private final int paramCount; + + Command(String description) { + this.description = description; + this.paramCount = 0; + } + + Command(String description, int paramCount) { + this.description = description; + this.paramCount = paramCount; + } + + public String getDescription() { + return description; + } + + public int getParamCount() { + return paramCount; + } + + public static Command fromString(String command) { + for (Command cmd : values()) { + if (cmd.name().equals(command)) { + return cmd; + } + } + throw new IllegalArgumentException("Unrecognized command. Hint: help"); + } + }; + + private final String[] HELP = new String[]{ + "Interactive TweetShell", + "", + "Available commands:" + }; +} |