summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authors123188 <s123188@S123188.campus.tue.nl>2014-05-14 15:11:55 +0200
committers123188 <s123188@S123188.campus.tue.nl>2014-05-14 15:11:55 +0200
commita71684df8deebf3b1e42fd70559d78c35465283b (patch)
tree77f56b0b582006d63198aef4612c078d4f61c60e /src
parent5b495d441550cea4bd18d9987a4d4e2c6872c0e9 (diff)
downloadGoldfarmer-a71684df8deebf3b1e42fd70559d78c35465283b.tar.gz
Added shell, implemented query and a base version of sentimentanalysis
Diffstat (limited to 'src')
-rw-r--r--src/database/ConnectionBuilder.java65
-rw-r--r--src/database/NamedPreparedStatement.java107
-rw-r--r--src/main/Analyzor.java121
-rw-r--r--src/main/FarmShell.java193
-rw-r--r--src/main/Main.java84
5 files changed, 525 insertions, 45 deletions
diff --git a/src/database/ConnectionBuilder.java b/src/database/ConnectionBuilder.java
new file mode 100644
index 0000000..74e5c33
--- /dev/null
+++ b/src/database/ConnectionBuilder.java
@@ -0,0 +1,65 @@
+package database;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+
+/**
+ * Builder for connection.
+ *
+ * @author Peter Wu
+ */
+public class ConnectionBuilder {
+
+ private String dbms;
+ private String serverName;
+ private int port;
+ private String dbName;
+ private String username;
+ private String password;
+
+ /**
+ * Sets up a ConnectionBuilder for PostgreSQL and serverName localhost. The
+ * username, password and database name must still be supplied.
+ */
+ public ConnectionBuilder() {
+ dbms = "postgresql";
+ serverName = "localhost";
+ port = 5432;
+ }
+
+ public ConnectionBuilder setDbms(String dbms) {
+ this.dbms = dbms;
+ return this;
+ }
+
+ public ConnectionBuilder setServerName(String serverName) {
+ this.serverName = serverName;
+ return this;
+ }
+
+ public ConnectionBuilder setPort(int port) {
+ this.port = port;
+ return this;
+ }
+
+ public ConnectionBuilder setDbName(String dbName) {
+ this.dbName = dbName;
+ return this;
+ }
+
+ public ConnectionBuilder setUsername(String username) {
+ this.username = username;
+ return this;
+ }
+
+ public ConnectionBuilder setPassword(String password) {
+ this.password = password;
+ return this;
+ }
+
+ public Connection create() throws SQLException {
+ String url = "jdbc:" + dbms + "://" + serverName + ":" + port + "/" + dbName;
+ return DriverManager.getConnection(url, username, password);
+ }
+}
diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java
new file mode 100644
index 0000000..ebb775b
--- /dev/null
+++ b/src/database/NamedPreparedStatement.java
@@ -0,0 +1,107 @@
+package database;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.sql.Timestamp;
+import java.sql.Types;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.joda.time.DateTime;
+
+/**
+ * Allows a prepared statement to contain named parameters instead of a question
+ * mark position marker.
+ *
+ * @author Peter Wu
+ */
+public class NamedPreparedStatement {
+
+ private final List<String> fields;
+ private final PreparedStatement stmt;
+
+ public NamedPreparedStatement(Connection conn, String query) throws SQLException {
+ fields = new ArrayList<>();
+ Pattern pattern = Pattern.compile("(?<!:):(\\w+)(::\\w+)?");
+ Matcher matcher = pattern.matcher(query);
+ while (matcher.find()) {
+ fields.add(matcher.group(1));
+ }
+ String sql = query.replaceAll(pattern.pattern(), "?$2");
+ stmt = conn.prepareStatement(sql);
+ }
+
+ private List<Integer> getParamIndices(String fieldName) {
+ List<Integer> indices = new ArrayList<>();
+ int index = 0;
+ for (String name : fields) {
+ ++index;
+ if (name.equals(fieldName)) {
+ indices.add(index);
+ }
+ }
+ if (indices.isEmpty()) {
+ System.err.println(stmt);
+ throw new RuntimeException("Missing " + fieldName + " in query!");
+ }
+ return indices;
+ }
+
+ public void setInt(String name, Integer i) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ if (i == null) {
+ stmt.setNull(paramIndex, Types.INTEGER);
+ } else {
+ stmt.setInt(paramIndex, i);
+ }
+ }
+ }
+
+ public void setLong(String name, Long l) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ if (l == null) {
+ stmt.setNull(paramIndex, Types.BIGINT);
+ } else {
+ stmt.setLong(paramIndex, l);
+ }
+ }
+ }
+
+ public void setString(String name, String str) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setString(paramIndex, str);
+ }
+ }
+
+ public void setTimestamp(String name, DateTime dt) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ Timestamp tsp = new Timestamp(dt.getMillis());
+ Calendar calendar = dt.toCalendar(Locale.ENGLISH);
+ stmt.setTimestamp(paramIndex, tsp, calendar);
+ }
+ }
+
+ public void setBoolean(String name, boolean b) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setBoolean(paramIndex, b);
+ }
+ }
+
+ public PreparedStatement getStmt() {
+ return stmt;
+ }
+
+ public void executeUpdate() throws SQLException {
+ try {
+ getStmt().executeUpdate();
+ } catch (SQLException ex) {
+ System.err.println("Query error: " + ex.getMessage());
+ System.err.println(stmt);
+ throw ex;
+ }
+ }
+}
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index bc81440..831231a 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -3,31 +3,122 @@
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
-
package main;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.HashMap;
+import java.util.Scanner;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
/**
*
* @author s123188
*/
public class Analyzor {
- // test is the tweet text you are going to analyze
- String[] testlist = test.split("\\s+"); // text splitted into separate words
- double positiverate = 0; // positive rating
- for (String word : testlist) { // Rate the text with each word with uni
- if (unimap.containsKey(word)) {
- positiverate += unimap.get(word);
- }
+ //maps for the lexicons
+ HashMap<String, Double> unimap = new HashMap<String, Double>(); // Map for uni
+ HashMap<String, Double> bimap = new HashMap<String, Double>(); // Map for bi
+
+ //the resultset of the query or the import
+ ResultSet data;
+
+ //reads the lexicons
+ void readLexicon() throws FileNotFoundException{
+
+ File uniFile = new File("unigrams-pmilexicon.txt"); // get uni
+ File biFile = new File("bigrams-pmilexicon.txt"); // get bi
+
+ Scanner uniScanner = new Scanner(uniFile);
+ Scanner biScanner = new Scanner(biFile);
+
+ //Fill the map of unigrams
+ while (uniScanner.hasNext()) {
+ unimap.put(uniScanner.next(), Double.parseDouble(uniScanner.next()));
+ if (uniScanner.hasNextLine()) {
+ uniScanner.nextLine();
}
+ }
- for (int i = 0; i < testlist.length-1; i++) { // Rate text with pair words with bi
- String pair = testlist[i] + " " + testlist[i+1];
- if (bimap.containsKey(pair)) {
- positiverate += bimap.get(pair);
- }
+ //fill the map of bigrams
+ while (biScanner.hasNext()) {
+ bimap.put(biScanner.next() + " " + biScanner.next(), Double.parseDouble(biScanner.next()));
+ if (biScanner.hasNextLine()) {
+ biScanner.nextLine();
}
+ }
+ }
+
+ //query the database
+ //fills the ResultSet
+ void Query(String query){
+
+ PreparedStatement statement;
+
+ try (Connection connection = Main.cb.create()){
+ statement = connection.prepareStatement(query);
+ data = statement.executeQuery();
+ }
+ catch(SQLException ex){
+ System.err.println("could not make a connection with the database");
+ }
+ }
- System.out.println(test + ": " + (int) (positiverate * 10));
- // print rate as int. Alter to return if you like
+ //analyzes the tweet on their positivity
+ //this is just a base version
+ void sentimentAnalysis() {
+
+ if(data == null){
+ System.err.print("no dataset available: query first");
+ return;
+ }
+
+ try{
+ readLexicon();
+ }
+ catch(FileNotFoundException ex){
+ System.out.println("could not find the lexicons, please try again");
+ return;
+ }
+
+ Double value;
+ String text;
+ try {
+ //for all tuples
+ while (data.next()) {
+ //get the text
+ text = data.getString("text");
+ // test is the tweet text you are going to analyze
+ String[] words = text.split("\\s+"); // text splitted into separate words
+ double positiverate = 0; // positive rating
+
+ // Rate the text with unigrams
+ for (String word : words) {
+ value = unimap.get(word);
+ if(value != null){
+ positiverate += unimap.get(word);
+ }
+ }
+ // Rate the text with bigrams
+ for (int i = 0; i < words.length - 1; i++) {
+ String pair = words[i] + " " + words[i + 1];
+ value = bimap.get(pair);
+ if (value != null) {
+ positiverate += bimap.get(pair);
+ }
+ }
+
+ //prints the rate
+ System.out.println(text + ": " + (int) (positiverate * 10));
+ }
+ } catch (SQLException ex) {
+ System.err.println("text not found");
+ }
+ }
}
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
new file mode 100644
index 0000000..82478ec
--- /dev/null
+++ b/src/main/FarmShell.java
@@ -0,0 +1,193 @@
+/*
+ * To change this license header, choose License Headers in Project Properties.
+ * To change this template file, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package main;
+
+import java.io.IOException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Arrays;
+import java.util.NoSuchElementException;
+import java.util.Scanner;
+import java.util.Set;
+
+/**
+ *
+ * @author s123188
+ */
+public class FarmShell {
+
+ private final Scanner scanner = new Scanner(System.in);
+
+ Analyzor analyzor;
+
+ /*
+ private void printPrompt() {
+ if (stream_cached == null) {
+ // "dollars are worthless"
+ System.out.print("$ ");
+ } else if (stream_cached.isValid()) {
+ // "we make money now by receiving tweets"
+ System.out.print("€ ");
+ } else {
+ // "we previously made money, but not anymore"
+ System.out.print("ƒ ");
+ }
+ }
+*/
+ /**
+ * Processes commands from stdin until the exit command is received or EOF.
+ */
+ public void process_forever() {
+ System.err.println("Entering interactive shell, type 'help' for help "
+ + "or 'exit' to leave. '.' repeats the previous interactive "
+ + "command.");
+ // print prompt for reading first command
+ //printPrompt();
+ String lastLine = "";
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine().trim();
+ // repeat last command
+ if (line.equals(".")) {
+ line = lastLine;
+ }
+ if (!execute(line)) {
+ // requested to terminate
+ break;
+ }
+ if (!line.isEmpty()) {
+ lastLine = line;
+ }
+ // print prompt for reading next line
+ //printPrompt();
+ }
+ // prevent corrupted compressed files when exiting without a command
+ throw new NoSuchElementException();
+ }
+
+ public boolean execute(String cmd) {
+ String[] args = cmd.trim().split("\\s+", 2);
+ if (!args[0].isEmpty()) {
+ // non-empty command, let's see whether it makes sense?
+ return execute(args);
+ }
+ return true;
+ }
+
+ /**
+ * Executes a command with optional parameters.
+ *
+ * @param args An array with the first argument containing the command with
+ * optional parameters in following arguments.
+ * @return true if more commands are allowed to be executed, false
+ * otherwise.
+ */
+ public boolean execute(String[] args) {
+ //make a new Analyzor
+ analyzor = new Analyzor();
+
+ try {
+ Command command = Command.fromString(args[0]);
+ String[] params = Arrays.copyOfRange(args, 1, args.length);
+ execute(command, params);
+ } catch (IllegalArgumentException ex) {
+ System.err.println(ex.getMessage());
+ } catch (IOException ex) {
+ System.err.println("Command " + args[0] + " failed with " + ex);
+ ex.printStackTrace();
+ } catch (NoSuchElementException ex) {
+ // thrown by the "exit" command to signal exit
+ return false;
+ }
+ // another satisfied customer, next!
+ return true;
+ }
+
+ enum Command {
+
+ query("make a query to the database; needed to do analysis", 1),
+ filterbots("marks all users as bot or not"),
+ sentiment("analyzes all tweets on positivity (about a brand)"),
+ exit("Returns to shell"),
+ help("Get help");
+
+ private final String description;
+ private final int paramCount;
+
+ Command(String description) {
+ this.description = description;
+ this.paramCount = 0;
+ }
+
+ Command(String description, int paramCount) {
+ this.description = description;
+ this.paramCount = paramCount;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public int getParamCount() {
+ return paramCount;
+ }
+
+ public static Command fromString(String command) {
+ for (Command cmd : values()) {
+ if (cmd.name().equals(command)) {
+ return cmd;
+ }
+ }
+ throw new IllegalArgumentException("Unrecognized command. Hint: help");
+ }
+ };
+
+ private final String[] HELP = new String[]{
+ "Interactive TweetShell",
+ "",
+ "Available commands:"
+ };
+
+ private void execute(Command command, String[] params) throws IOException {
+ if (params.length < command.getParamCount()) {
+ throw new IllegalArgumentException("Expected "
+ + command.getParamCount() + " parameters, got only "
+ + params.length);
+ }
+ switch (command) {
+ case query:
+ analyzor.Query(params[0]);
+ break;
+ case filterbots:
+ System.out.println("not yet implemented");
+ break;
+ case sentiment:
+ analyzor.sentimentAnalysis();
+ break;
+ case help:
+ for (String line : HELP) {
+ System.out.println(line);
+ }
+ for (Command cmd : Command.values()) {
+ System.out.printf(" %-10s", cmd.name());
+ if (!cmd.getDescription().isEmpty()) {
+ System.out.print(" " + cmd.getDescription());
+ }
+ if (cmd.getParamCount() == 1) {
+ System.out.print(" (1 arg)");
+ } else if (cmd.getParamCount() > 1) {
+ System.out.printf(" (%d args)", cmd.getParamCount());
+ }
+ System.out.println();
+ }
+ break;
+ case exit:
+ throw new NoSuchElementException();
+ default:
+ throw new AssertionError(command.name());
+ }
+ }
+}
diff --git a/src/main/Main.java b/src/main/Main.java
index 81b6df9..1ba2579 100644
--- a/src/main/Main.java
+++ b/src/main/Main.java
@@ -1,7 +1,9 @@
package main;
+import database.ConnectionBuilder;
import java.io.File;
import java.io.FileNotFoundException;
+import java.io.IOException;
import java.util.HashMap;
import java.util.Scanner;
@@ -12,39 +14,61 @@ import java.util.Scanner;
*/
public class Main {
- /**
- * @param args the command line arguments
- */
-
- /**
- * TODO: parametrized query
- * choose query
- * choose filters (analyzor)
- */
-
-
-public static void main(String[] args) throws FileNotFoundException {
- HashMap<String, Double> unimap = new HashMap<String, Double>(); // Map for uni
- HashMap<String, Double> bimap = new HashMap<String, Double>(); // Map for bi
-
- File uniFile = new File("unigrams-pmilexicon.txt"); // scan uni
- Scanner uniScanner = new Scanner(uniFile);
- File biFile = new File("bigrams-pmilexicon.txt"); // scan bi
- Scanner biScanner = new Scanner(biFile);
-
- while (uniScanner.hasNext()) { // Set up map with uni
- unimap.put(uniScanner.next(), Double.parseDouble(uniScanner.next()));
- if (uniScanner.hasNextLine()) {
- uniScanner.nextLine();
- }
+ static public ConnectionBuilder cb;
+
+ public static void main(String[] args) {
+ Main main;
+ try {
+ main = new Main(args);
+ } catch (IllegalArgumentException ex) {
+ System.err.println(ex.getMessage());
+ System.exit(1);
+ return;
}
+ }
+
+ public Main(String[] args) {
+ cb = new ConnectionBuilder()
+ .setServerName("localhost")
+ .setUsername("twitter")
+ .setPassword("2IOC02")
+ .setDbName("twitter");
+ parseGlobalOptions(args);
+ try {
- while (biScanner.hasNext()) { // Set up map with bi
- bimap.put(biScanner.next() + " " + biScanner.next(), Double.parseDouble(biScanner.next()));
- if (biScanner.hasNextLine()) {
- biScanner.nextLine();
+ FarmShell shell = new FarmShell();
+ shell.execute(args);
+ } catch (IllegalArgumentException ex) {
+ System.err.println(ex.getMessage());
+ System.exit(1);
+ }
+ }
+
+ private void parseGlobalOptions(String[] args)
+ throws IllegalArgumentException {
+ /* parse global options */
+ for (int i = 0; i < args.length; i++) {
+ if ("--dbhost".equals(args[i])) {
+ cb.setServerName(getArg(args, ++i, "--dbhost"));
+ } else if ("--dbuser".equals(args[i])) {
+ cb.setUsername(getArg(args, ++i, "--dbuser"));
+ } else if ("--dbpass".equals(args[i])) {
+ cb.setPassword(getArg(args, ++i, "--dbpass"));
+ } else if ("--dbport".equals(args[i])) {
+ cb.setPort(Integer.valueOf(getArg(args, ++i, "--dbport")));
+ } else if ("--dbname".equals(args[i])) {
+ cb.setDbName(getArg(args, ++i, "--dbname"));
+ } else if (args[i].startsWith("-")) {
+ throw new IllegalArgumentException("Invalid option: " + args[i]);
}
}
}
-
+
+ private String getArg(String[] params, int index, String name) {
+ if (index >= params.length) {
+ System.err.println("Missing argument for parameter " + name);
+ System.exit(1);
+ }
+ return params[index];
+ }
}