diff options
-rw-r--r-- | lib/postgresql-9.3-1101.jdbc41.jar | bin | 0 -> 588901 bytes | |||
-rw-r--r-- | nbproject/project.properties | 2 | ||||
-rw-r--r-- | src/main/Analyzor.java | 112 | ||||
-rw-r--r-- | src/main/FarmShell.java | 48 | ||||
-rw-r--r-- | src/main/Main.java | 31 |
5 files changed, 84 insertions, 109 deletions
diff --git a/lib/postgresql-9.3-1101.jdbc41.jar b/lib/postgresql-9.3-1101.jdbc41.jar Binary files differnew file mode 100644 index 0000000..06fd998 --- /dev/null +++ b/lib/postgresql-9.3-1101.jdbc41.jar diff --git a/nbproject/project.properties b/nbproject/project.properties index 1dbb4ec..fd48dd8 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -30,7 +30,7 @@ dist.javadoc.dir=${dist.dir}/javadoc endorsed.classpath=
excludes=
file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
-file.reference.postgresql-9.3-1101.jdbc41.jar=D:\\Documents\\NetBeansProjects\\Datafiller\\lib\\postgresql-9.3-1101.jdbc41.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
includes=**
jar.compress=false
javac.classpath=\
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 3eb93f9..e7e26fe 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -1,14 +1,7 @@ -/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
package main;
-import database.ConnectionBuilder;
import database.NamedPreparedStatement;
import database.QueryUtils;
-import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Connection;
@@ -19,8 +12,8 @@ import java.util.HashMap; import java.util.Scanner;
/**
- * The sentiment analysis class that rates tweets based on a unigram and
- * bigram set of weights.
+ * The sentiment analysis class that rates tweets based on a unigram and bigram
+ * set of weights.
*/
public class Analyzor {
@@ -28,78 +21,70 @@ public class Analyzor { * The map that matches single words to their weights.
*/
private final HashMap<String, Double> unimap = new HashMap();
-
+
/**
* The map that matches word pairs to their weights.
*/
private final HashMap<String, Double> bimap = new HashMap();
- /**
- * The results of the query (can be null)
- */
- private ResultSet data = null;
-
- /**
- * The connection to the database.
- */
- private Connection connection;
-
- /**
- * The connection builder to initialize the connection.
- */
- private ConnectionBuilder builder;
+ private ResultSet data;
+ private final Connection connection;
- public Analyzor(ConnectionBuilder builder) {
- this.builder = builder;
+ Analyzor(Connection connection) {
+ this.connection = connection;
}
//reads the lexicons
- private void readLexicon() throws FileNotFoundException {
- //TODO: fix? hardcoded filenames.
- Scanner uniScanner = new Scanner(new File("unigrams-pmilexicon.txt"));
- Scanner biScanner = new Scanner(new File("bigrams-pmilexicon.txt"));
-
- //Fill the map of unigrams
- while (uniScanner.hasNext()) {
- unimap.put(uniScanner.next(), Double.parseDouble(uniScanner.next()));
- if (uniScanner.hasNextLine()) {
- uniScanner.nextLine();
- }
- // NumberFormatException is not handled.
+ void readLexicon() throws FileNotFoundException {
+ if (!unimap.isEmpty()) {
+ // data is already read.
+ return;
}
+ // A unigram is in the format (WS = whitespace):
+ // word <WS> rating <WS> ??? <WS> ??
+ // A bigram has an two WS-separated words instead of one.
+ try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt");
+ Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
+ //Fill the map of unigrams
+ while (uniScanner.hasNext()) {
+ String words = uniScanner.next();
+ unimap.put(words.toLowerCase(), uniScanner.nextDouble());
+ if (uniScanner.hasNextLine()) {
+ uniScanner.nextLine();
+ }
+ }
- //fill the map of bigrams
- while (biScanner.hasNext()) {
- bimap.put(biScanner.next() + " " + biScanner.next(), Double.parseDouble(biScanner.next()));
- if (biScanner.hasNextLine()) {
- biScanner.nextLine();
+ //fill the map of bigrams
+ while (biScanner.hasNext()) {
+ String words = biScanner.next() + " " + biScanner.next();
+ bimap.put(words.toLowerCase(), biScanner.nextDouble());
+ if (biScanner.hasNextLine()) {
+ biScanner.nextLine();
+ }
}
- // NumberFormatException is not handled.
}
}
/**
* Executes a query that the analyzer can analyze.
- *
+ *
* @param query The query string to execute.
* @throws SQLException When database connection isn't available.
*/
public void query(String query) throws SQLException {
-
PreparedStatement statement;
//make a connection to the database and execute the query
- connection = builder.create();
statement = connection.prepareStatement(query);
data = statement.executeQuery();
}
/**
* Run a sentiment analysis and fill the database with the output.
- *
+ *
* @throws SQLException
- * @throws IOException
+ * @throws IOException
*/
- public void sentimentAnalysis(String query) throws SQLException, IOException {
+ public void sentimentAnalysis(String query) throws SQLException, IOException {
query(query);
//read the lexicons
@@ -119,7 +104,7 @@ public class Analyzor { while (data.next()) {
//get the text
text = data.getString("text");
- text = replacePunct(text);
+ text = splitPunctToWords(text);
// test is the tweet text you are going to analyze
String[] words = text.split("\\s+"); // text splitted into separate words
double positiverate = 0; // positive rating
@@ -151,36 +136,35 @@ public class Analyzor { //makes a wordcloud of the tweets in the ResultSet data
void makeWordCloud(String query) throws SQLException {
-
+
query(query);
//go to the start of the ResultSet data
if (data == null) {
System.err.println("data is empty, try querying first");
return;
}
-
+
//make the hashmap with the words and their frequency
HashMap<String, Integer> wordcloud = new HashMap<>();
-
+
String text;
String[] words;
Integer value;
-
- while(data.next()){
+
+ while (data.next()) {
//get the text
text = data.getString("text");
//remove punctuation, convert to lowercase and split on words
text = removePunct(text);
text = text.toLowerCase();
words = text.split("\\s+");
-
+
//count the words
- for(String word : words){
+ for (String word : words) {
value = wordcloud.get(word);
- if(value == null){
+ if (value == null) {
wordcloud.put(word, 1);
- }
- else{
+ } else {
wordcloud.put(word, value++);
}
}
@@ -189,17 +173,17 @@ public class Analyzor { //replaces punctuation so it will be splitted
//also removes urls
- private String replacePunct(String text) {
+ private String splitPunctToWords(String text) {
text = text.replaceAll("https?://\\S*", "");
text = text.replaceAll("[!?):;\"']", " $0");
text = text.replaceAll("[.,-](\\s|$)", " $0");
text = text.replaceAll("\\s[(\"']", "$0 ");
return text;
}
-
+
//removes punctuation
//also removes urls
- private String removePunct(String text){
+ private String removePunct(String text) {
text = text.replaceAll("https?://\\S*", "");
text = text.replaceAll("[.,!?()-:;\"']", " ");
return text;
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index f3c8011..9dd1167 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -2,6 +2,7 @@ package main; import database.ConnectionBuilder;
import java.io.IOException;
+import java.sql.Connection;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.NoSuchElementException;
@@ -18,18 +19,25 @@ public class FarmShell { */
private final Scanner scanner = new Scanner(System.in);
- /**
- * The sentiment analysis class.
- */
- private final Analyzor analyzor;
+ private Analyzor cached_analyzor;
+ private final ConnectionBuilder dbConnectionBuilder;
- /**
- * @param builder The connection builder for the database.
- */
- public FarmShell(final ConnectionBuilder builder) {
- analyzor = new Analyzor(builder);
+ FarmShell(ConnectionBuilder dbConnectionBuilder) {
+ this.dbConnectionBuilder = dbConnectionBuilder;
+ }
+
+ private void printPrompt() {
+ System.out.print("$ ");
+ }
+
+ private Analyzor getAnalyzor() throws SQLException {
+ if (cached_analyzor == null) {
+ Connection dbCon = dbConnectionBuilder.create();
+ cached_analyzor = new Analyzor(dbCon);
+ }
+ return cached_analyzor;
}
-
+
/**
* Processes commands from stdin until the exit command is received or EOF.
*/
@@ -62,7 +70,7 @@ public class FarmShell { /**
* Execute a single commands.
- *
+ *
* @param cmd A single line of the command.
* @return Whether to continue or exit the application.
*/
@@ -84,7 +92,6 @@ public class FarmShell { * otherwise.
*/
public boolean execute(String[] args) {
-
try {
Command command = Command.fromString(args[0]);
String[] params = Arrays.copyOfRange(args, 1, args.length);
@@ -97,13 +104,13 @@ public class FarmShell { } catch (NoSuchElementException ex) {
// thrown by the "exit" command to signal exit
return false;
- } catch (SQLException ex){
+ } catch (SQLException ex) {
System.err.println("such " + ex);
}
// another satisfied customer, next!
return true;
}
-
+
private void execute(Command command, String[] params) throws SQLException, IOException {
if (params.length < command.getParamCount()) {
throw new IllegalArgumentException("Expected "
@@ -111,17 +118,14 @@ public class FarmShell { + params.length);
}
switch (command) {
- case query:
- System.err.println("isn't supported anymore, now enter query after analysis type");
- break;
case filterbots:
System.out.println("not yet implemented");
break;
case sentiment:
- analyzor.sentimentAnalysis(params[0]);
+ getAnalyzor().sentimentAnalysis(params[0]);
break;
case wordcloud:
- analyzor.makeWordCloud(params[0]);
+ getAnalyzor().makeWordCloud(params[0]);
break;
case help:
for (String line : HELP) {
@@ -146,13 +150,9 @@ public class FarmShell { throw new AssertionError(command.name());
}
}
-
- private void printPrompt() {
- System.out.print("$ ");
- }
enum Command {
- query("make a query to the database; needed to do analysis", 1),
+
filterbots("marks all users as bot or not", 1),
sentiment("analyzes all tweets on positivity (about a brand)", 1),
wordcloud("makes a wordcloud of the text of the tweets", 1),
diff --git a/src/main/Main.java b/src/main/Main.java index 9102ecd..02673b2 100644 --- a/src/main/Main.java +++ b/src/main/Main.java @@ -1,17 +1,3 @@ -/* - * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE Version 2, December 2004 - * - * Copyright (C) 2004 Sam Hocevar - * - * Everyone is permitted to copy and distribute verbatim or modified copies - * of this license document, and changing it is allowed as long as the name is - * changed. - * - * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, - * DISTRIBUTION AND MODIFICATION - * - * 0. You just DO WHAT THE FUCK YOU WANT TO. - */ package main; import database.ConnectionBuilder; @@ -22,15 +8,19 @@ import java.util.Arrays; */ public class Main { + private final ConnectionBuilder cb; + public static void main(String[] args) { + Main main; try { - Main main = new Main(args); + main = new Main(args); } catch (IllegalArgumentException ex) { System.err.println(ex.getMessage()); + System.exit(1); + return; } + main.run(); } - - private final ConnectionBuilder cb; private String[] leftover_params; @@ -40,11 +30,12 @@ public class Main { .setUsername("twitter") .setPassword("2IOC02") .setDbName("twitter"); - + parseGlobalOptions(args); - - try { + } + public void run() { + try { FarmShell shell = new FarmShell(cb); if (leftover_params != null && leftover_params.length > 0) { shell.execute(leftover_params); |