From f94162b0c8e6a7b7bd62087f14fcb1c646a6fe84 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 15 May 2014 16:23:11 +0200 Subject: FUCK CRLF --- src/main/Analyzor.java | 382 +++++++++++++++++++++++----------------------- src/main/FarmShell.java | 396 ++++++++++++++++++++++++------------------------ 2 files changed, 389 insertions(+), 389 deletions(-) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index e7e26fe..9be1101 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -1,191 +1,191 @@ -package main; - -import database.NamedPreparedStatement; -import database.QueryUtils; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.sql.Connection; -import java.sql.PreparedStatement; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.HashMap; -import java.util.Scanner; - -/** - * The sentiment analysis class that rates tweets based on a unigram and bigram - * set of weights. - */ -public class Analyzor { - - /** - * The map that matches single words to their weights. - */ - private final HashMap unimap = new HashMap(); - - /** - * The map that matches word pairs to their weights. - */ - private final HashMap bimap = new HashMap(); - - private ResultSet data; - private final Connection connection; - - Analyzor(Connection connection) { - this.connection = connection; - } - - //reads the lexicons - void readLexicon() throws FileNotFoundException { - if (!unimap.isEmpty()) { - // data is already read. - return; - } - // A unigram is in the format (WS = whitespace): - // word rating ??? ?? - // A bigram has an two WS-separated words instead of one. - try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt"); - Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) { - //Fill the map of unigrams - while (uniScanner.hasNext()) { - String words = uniScanner.next(); - unimap.put(words.toLowerCase(), uniScanner.nextDouble()); - if (uniScanner.hasNextLine()) { - uniScanner.nextLine(); - } - } - - //fill the map of bigrams - while (biScanner.hasNext()) { - String words = biScanner.next() + " " + biScanner.next(); - bimap.put(words.toLowerCase(), biScanner.nextDouble()); - if (biScanner.hasNextLine()) { - biScanner.nextLine(); - } - } - } - } - - /** - * Executes a query that the analyzer can analyze. - * - * @param query The query string to execute. - * @throws SQLException When database connection isn't available. - */ - public void query(String query) throws SQLException { - PreparedStatement statement; - //make a connection to the database and execute the query - statement = connection.prepareStatement(query); - data = statement.executeQuery(); - } - - /** - * Run a sentiment analysis and fill the database with the output. - * - * @throws SQLException - * @throws IOException - */ - public void sentimentAnalysis(String query) throws SQLException, IOException { - query(query); - - //read the lexicons - readLexicon(); - - //go to the start of te dataset - if (data == null) { - System.err.println("data is empty, try querying first"); - return; - } - data.beforeFirst(); - - Double value; - String text; - - //for all tuples - while (data.next()) { - //get the text - text = data.getString("text"); - text = splitPunctToWords(text); - // test is the tweet text you are going to analyze - String[] words = text.split("\\s+"); // text splitted into separate words - double positiverate = 0; // positive rating - - // Rate the text with unigrams - for (String word : words) { - value = unimap.get(word); - if (value != null) { - positiverate += unimap.get(word); - } - } - // Rate the text with bigrams - for (int i = 0; i < words.length - 1; i++) { - String pair = words[i] + " " + words[i + 1]; - value = bimap.get(pair); - if (value != null) { - positiverate += bimap.get(pair); - } - } - //insert the rating into the database - NamedPreparedStatement m_insertRating; - m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating); - QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10)); - m_insertRating.executeUpdate(); - //don't print the rate - //System.out.println(text + ": " + (int) (positiverate * 10)); - } - } - - //makes a wordcloud of the tweets in the ResultSet data - void makeWordCloud(String query) throws SQLException { - - query(query); - //go to the start of the ResultSet data - if (data == null) { - System.err.println("data is empty, try querying first"); - return; - } - - //make the hashmap with the words and their frequency - HashMap wordcloud = new HashMap<>(); - - String text; - String[] words; - Integer value; - - while (data.next()) { - //get the text - text = data.getString("text"); - //remove punctuation, convert to lowercase and split on words - text = removePunct(text); - text = text.toLowerCase(); - words = text.split("\\s+"); - - //count the words - for (String word : words) { - value = wordcloud.get(word); - if (value == null) { - wordcloud.put(word, 1); - } else { - wordcloud.put(word, value++); - } - } - } - } - - //replaces punctuation so it will be splitted - //also removes urls - private String splitPunctToWords(String text) { - text = text.replaceAll("https?://\\S*", ""); - text = text.replaceAll("[!?):;\"']", " $0"); - text = text.replaceAll("[.,-](\\s|$)", " $0"); - text = text.replaceAll("\\s[(\"']", "$0 "); - return text; - } - - //removes punctuation - //also removes urls - private String removePunct(String text) { - text = text.replaceAll("https?://\\S*", ""); - text = text.replaceAll("[.,!?()-:;\"']", " "); - return text; - } -} +package main; + +import database.NamedPreparedStatement; +import database.QueryUtils; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.HashMap; +import java.util.Scanner; + +/** + * The sentiment analysis class that rates tweets based on a unigram and bigram + * set of weights. + */ +public class Analyzor { + + /** + * The map that matches single words to their weights. + */ + private final HashMap unimap = new HashMap(); + + /** + * The map that matches word pairs to their weights. + */ + private final HashMap bimap = new HashMap(); + + private ResultSet data; + private final Connection connection; + + Analyzor(Connection connection) { + this.connection = connection; + } + + //reads the lexicons + void readLexicon() throws FileNotFoundException { + if (!unimap.isEmpty()) { + // data is already read. + return; + } + // A unigram is in the format (WS = whitespace): + // word rating ??? ?? + // A bigram has an two WS-separated words instead of one. + try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt"); + Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) { + //Fill the map of unigrams + while (uniScanner.hasNext()) { + String words = uniScanner.next(); + unimap.put(words.toLowerCase(), uniScanner.nextDouble()); + if (uniScanner.hasNextLine()) { + uniScanner.nextLine(); + } + } + + //fill the map of bigrams + while (biScanner.hasNext()) { + String words = biScanner.next() + " " + biScanner.next(); + bimap.put(words.toLowerCase(), biScanner.nextDouble()); + if (biScanner.hasNextLine()) { + biScanner.nextLine(); + } + } + } + } + + /** + * Executes a query that the analyzer can analyze. + * + * @param query The query string to execute. + * @throws SQLException When database connection isn't available. + */ + public void query(String query) throws SQLException { + PreparedStatement statement; + //make a connection to the database and execute the query + statement = connection.prepareStatement(query); + data = statement.executeQuery(); + } + + /** + * Run a sentiment analysis and fill the database with the output. + * + * @throws SQLException + * @throws IOException + */ + public void sentimentAnalysis(String query) throws SQLException, IOException { + query(query); + + //read the lexicons + readLexicon(); + + //go to the start of te dataset + if (data == null) { + System.err.println("data is empty, try querying first"); + return; + } + data.beforeFirst(); + + Double value; + String text; + + //for all tuples + while (data.next()) { + //get the text + text = data.getString("text"); + text = splitPunctToWords(text); + // test is the tweet text you are going to analyze + String[] words = text.split("\\s+"); // text splitted into separate words + double positiverate = 0; // positive rating + + // Rate the text with unigrams + for (String word : words) { + value = unimap.get(word); + if (value != null) { + positiverate += unimap.get(word); + } + } + // Rate the text with bigrams + for (int i = 0; i < words.length - 1; i++) { + String pair = words[i] + " " + words[i + 1]; + value = bimap.get(pair); + if (value != null) { + positiverate += bimap.get(pair); + } + } + //insert the rating into the database + NamedPreparedStatement m_insertRating; + m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating); + QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10)); + m_insertRating.executeUpdate(); + //don't print the rate + //System.out.println(text + ": " + (int) (positiverate * 10)); + } + } + + //makes a wordcloud of the tweets in the ResultSet data + void makeWordCloud(String query) throws SQLException { + + query(query); + //go to the start of the ResultSet data + if (data == null) { + System.err.println("data is empty, try querying first"); + return; + } + + //make the hashmap with the words and their frequency + HashMap wordcloud = new HashMap<>(); + + String text; + String[] words; + Integer value; + + while (data.next()) { + //get the text + text = data.getString("text"); + //remove punctuation, convert to lowercase and split on words + text = removePunct(text); + text = text.toLowerCase(); + words = text.split("\\s+"); + + //count the words + for (String word : words) { + value = wordcloud.get(word); + if (value == null) { + wordcloud.put(word, 1); + } else { + wordcloud.put(word, value++); + } + } + } + } + + //replaces punctuation so it will be splitted + //also removes urls + private String splitPunctToWords(String text) { + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[!?):;\"']", " $0"); + text = text.replaceAll("[.,-](\\s|$)", " $0"); + text = text.replaceAll("\\s[(\"']", "$0 "); + return text; + } + + //removes punctuation + //also removes urls + private String removePunct(String text) { + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[.,!?()-:;\"']", " "); + return text; + } +} diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 9dd1167..3c6f17f 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -1,198 +1,198 @@ -package main; - -import database.ConnectionBuilder; -import java.io.IOException; -import java.sql.Connection; -import java.sql.SQLException; -import java.util.Arrays; -import java.util.NoSuchElementException; -import java.util.Scanner; - -/** - * - * @author s123188 - */ -public class FarmShell { - - /** - * A scanner for the stdin. - */ - private final Scanner scanner = new Scanner(System.in); - - private Analyzor cached_analyzor; - private final ConnectionBuilder dbConnectionBuilder; - - FarmShell(ConnectionBuilder dbConnectionBuilder) { - this.dbConnectionBuilder = dbConnectionBuilder; - } - - private void printPrompt() { - System.out.print("$ "); - } - - private Analyzor getAnalyzor() throws SQLException { - if (cached_analyzor == null) { - Connection dbCon = dbConnectionBuilder.create(); - cached_analyzor = new Analyzor(dbCon); - } - return cached_analyzor; - } - - /** - * Processes commands from stdin until the exit command is received or EOF. - */ - public void process_forever() { - System.err.println("Entering interactive shell, type 'help' for help " - + "or 'exit' to leave. '.' repeats the previous interactive " - + "command."); - // print prompt for reading first command - printPrompt(); - String lastLine = ""; - while (scanner.hasNextLine()) { - String line = scanner.nextLine().trim(); - // repeat last command - if (line.equals(".")) { - line = lastLine; - } - if (!execute(line)) { - // requested to terminate - break; - } - if (!line.isEmpty()) { - lastLine = line; - } - // print prompt for reading next line - printPrompt(); - } - // prevent corrupted compressed files when exiting without a command - throw new NoSuchElementException(); - } - - /** - * Execute a single commands. - * - * @param cmd A single line of the command. - * @return Whether to continue or exit the application. - */ - public boolean execute(String cmd) { - String[] args = cmd.trim().split("\\s+", 2); - if (!args[0].isEmpty()) { - // non-empty command, let's see whether it makes sense? - return execute(args); - } - return true; - } - - /** - * Executes a command with optional parameters. - * - * @param args An array with the first argument containing the command with - * optional parameters in following arguments. - * @return true if more commands are allowed to be executed, false - * otherwise. - */ - public boolean execute(String[] args) { - try { - Command command = Command.fromString(args[0]); - String[] params = Arrays.copyOfRange(args, 1, args.length); - execute(command, params); - } catch (IllegalArgumentException ex) { - System.err.println(ex.getMessage()); - } catch (IOException ex) { - System.err.println("Command " + args[0] + " failed with " + ex); - ex.printStackTrace(); - } catch (NoSuchElementException ex) { - // thrown by the "exit" command to signal exit - return false; - } catch (SQLException ex) { - System.err.println("such " + ex); - } - // another satisfied customer, next! - return true; - } - - private void execute(Command command, String[] params) throws SQLException, IOException { - if (params.length < command.getParamCount()) { - throw new IllegalArgumentException("Expected " - + command.getParamCount() + " parameters, got only " - + params.length); - } - switch (command) { - case filterbots: - System.out.println("not yet implemented"); - break; - case sentiment: - getAnalyzor().sentimentAnalysis(params[0]); - break; - case wordcloud: - getAnalyzor().makeWordCloud(params[0]); - break; - case help: - for (String line : HELP) { - System.out.println(line); - } - for (Command cmd : Command.values()) { - System.out.printf(" %-10s", cmd.name()); - if (!cmd.getDescription().isEmpty()) { - System.out.print(" " + cmd.getDescription()); - } - if (cmd.getParamCount() == 1) { - System.out.print(" (1 arg)"); - } else if (cmd.getParamCount() > 1) { - System.out.printf(" (%d args)", cmd.getParamCount()); - } - System.out.println(); - } - break; - case exit: - throw new NoSuchElementException(); - default: - throw new AssertionError(command.name()); - } - } - - enum Command { - - filterbots("marks all users as bot or not", 1), - sentiment("analyzes all tweets on positivity (about a brand)", 1), - wordcloud("makes a wordcloud of the text of the tweets", 1), - exit("Returns to shell"), - help("Get help"); - - private final String description; - private final int paramCount; - - Command(String description) { - this.description = description; - this.paramCount = 0; - } - - Command(String description, int paramCount) { - this.description = description; - this.paramCount = paramCount; - } - - public String getDescription() { - return description; - } - - public int getParamCount() { - return paramCount; - } - - public static Command fromString(String command) { - for (Command cmd : values()) { - if (cmd.name().equals(command)) { - return cmd; - } - } - throw new IllegalArgumentException("Unrecognized command. Hint: help"); - } - }; - - private final String[] HELP = new String[]{ - "Interactive TweetShell", - "", - "Available commands:" - }; -} +package main; + +import database.ConnectionBuilder; +import java.io.IOException; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.NoSuchElementException; +import java.util.Scanner; + +/** + * + * @author s123188 + */ +public class FarmShell { + + /** + * A scanner for the stdin. + */ + private final Scanner scanner = new Scanner(System.in); + + private Analyzor cached_analyzor; + private final ConnectionBuilder dbConnectionBuilder; + + FarmShell(ConnectionBuilder dbConnectionBuilder) { + this.dbConnectionBuilder = dbConnectionBuilder; + } + + private void printPrompt() { + System.out.print("$ "); + } + + private Analyzor getAnalyzor() throws SQLException { + if (cached_analyzor == null) { + Connection dbCon = dbConnectionBuilder.create(); + cached_analyzor = new Analyzor(dbCon); + } + return cached_analyzor; + } + + /** + * Processes commands from stdin until the exit command is received or EOF. + */ + public void process_forever() { + System.err.println("Entering interactive shell, type 'help' for help " + + "or 'exit' to leave. '.' repeats the previous interactive " + + "command."); + // print prompt for reading first command + printPrompt(); + String lastLine = ""; + while (scanner.hasNextLine()) { + String line = scanner.nextLine().trim(); + // repeat last command + if (line.equals(".")) { + line = lastLine; + } + if (!execute(line)) { + // requested to terminate + break; + } + if (!line.isEmpty()) { + lastLine = line; + } + // print prompt for reading next line + printPrompt(); + } + // prevent corrupted compressed files when exiting without a command + throw new NoSuchElementException(); + } + + /** + * Execute a single commands. + * + * @param cmd A single line of the command. + * @return Whether to continue or exit the application. + */ + public boolean execute(String cmd) { + String[] args = cmd.trim().split("\\s+", 2); + if (!args[0].isEmpty()) { + // non-empty command, let's see whether it makes sense? + return execute(args); + } + return true; + } + + /** + * Executes a command with optional parameters. + * + * @param args An array with the first argument containing the command with + * optional parameters in following arguments. + * @return true if more commands are allowed to be executed, false + * otherwise. + */ + public boolean execute(String[] args) { + try { + Command command = Command.fromString(args[0]); + String[] params = Arrays.copyOfRange(args, 1, args.length); + execute(command, params); + } catch (IllegalArgumentException ex) { + System.err.println(ex.getMessage()); + } catch (IOException ex) { + System.err.println("Command " + args[0] + " failed with " + ex); + ex.printStackTrace(); + } catch (NoSuchElementException ex) { + // thrown by the "exit" command to signal exit + return false; + } catch (SQLException ex) { + System.err.println("such " + ex); + } + // another satisfied customer, next! + return true; + } + + private void execute(Command command, String[] params) throws SQLException, IOException { + if (params.length < command.getParamCount()) { + throw new IllegalArgumentException("Expected " + + command.getParamCount() + " parameters, got only " + + params.length); + } + switch (command) { + case filterbots: + System.out.println("not yet implemented"); + break; + case sentiment: + getAnalyzor().sentimentAnalysis(params[0]); + break; + case wordcloud: + getAnalyzor().makeWordCloud(params[0]); + break; + case help: + for (String line : HELP) { + System.out.println(line); + } + for (Command cmd : Command.values()) { + System.out.printf(" %-10s", cmd.name()); + if (!cmd.getDescription().isEmpty()) { + System.out.print(" " + cmd.getDescription()); + } + if (cmd.getParamCount() == 1) { + System.out.print(" (1 arg)"); + } else if (cmd.getParamCount() > 1) { + System.out.printf(" (%d args)", cmd.getParamCount()); + } + System.out.println(); + } + break; + case exit: + throw new NoSuchElementException(); + default: + throw new AssertionError(command.name()); + } + } + + enum Command { + + filterbots("marks all users as bot or not", 1), + sentiment("analyzes all tweets on positivity (about a brand)", 1), + wordcloud("makes a wordcloud of the text of the tweets", 1), + exit("Returns to shell"), + help("Get help"); + + private final String description; + private final int paramCount; + + Command(String description) { + this.description = description; + this.paramCount = 0; + } + + Command(String description, int paramCount) { + this.description = description; + this.paramCount = paramCount; + } + + public String getDescription() { + return description; + } + + public int getParamCount() { + return paramCount; + } + + public static Command fromString(String command) { + for (Command cmd : values()) { + if (cmd.name().equals(command)) { + return cmd; + } + } + throw new IllegalArgumentException("Unrecognized command. Hint: help"); + } + }; + + private final String[] HELP = new String[]{ + "Interactive TweetShell", + "", + "Available commands:" + }; +} -- cgit v1.2.1