summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/postgresql-9.3-1101.jdbc41.jarbin0 -> 588901 bytes
-rw-r--r--nbproject/project.properties2
-rw-r--r--src/main/Analyzor.java112
-rw-r--r--src/main/FarmShell.java48
-rw-r--r--src/main/Main.java31
5 files changed, 84 insertions, 109 deletions
diff --git a/lib/postgresql-9.3-1101.jdbc41.jar b/lib/postgresql-9.3-1101.jdbc41.jar
new file mode 100644
index 0000000..06fd998
--- /dev/null
+++ b/lib/postgresql-9.3-1101.jdbc41.jar
Binary files differ
diff --git a/nbproject/project.properties b/nbproject/project.properties
index 1dbb4ec..fd48dd8 100644
--- a/nbproject/project.properties
+++ b/nbproject/project.properties
@@ -30,7 +30,7 @@ dist.javadoc.dir=${dist.dir}/javadoc
endorsed.classpath=
excludes=
file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
-file.reference.postgresql-9.3-1101.jdbc41.jar=D:\\Documents\\NetBeansProjects\\Datafiller\\lib\\postgresql-9.3-1101.jdbc41.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
includes=**
jar.compress=false
javac.classpath=\
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 3eb93f9..e7e26fe 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -1,14 +1,7 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
package main;
-import database.ConnectionBuilder;
import database.NamedPreparedStatement;
import database.QueryUtils;
-import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Connection;
@@ -19,8 +12,8 @@ import java.util.HashMap;
import java.util.Scanner;
/**
- * The sentiment analysis class that rates tweets based on a unigram and
- * bigram set of weights.
+ * The sentiment analysis class that rates tweets based on a unigram and bigram
+ * set of weights.
*/
public class Analyzor {
@@ -28,78 +21,70 @@ public class Analyzor {
* The map that matches single words to their weights.
*/
private final HashMap<String, Double> unimap = new HashMap();
-
+
/**
* The map that matches word pairs to their weights.
*/
private final HashMap<String, Double> bimap = new HashMap();
- /**
- * The results of the query (can be null)
- */
- private ResultSet data = null;
-
- /**
- * The connection to the database.
- */
- private Connection connection;
-
- /**
- * The connection builder to initialize the connection.
- */
- private ConnectionBuilder builder;
+ private ResultSet data;
+ private final Connection connection;
- public Analyzor(ConnectionBuilder builder) {
- this.builder = builder;
+ Analyzor(Connection connection) {
+ this.connection = connection;
}
//reads the lexicons
- private void readLexicon() throws FileNotFoundException {
- //TODO: fix? hardcoded filenames.
- Scanner uniScanner = new Scanner(new File("unigrams-pmilexicon.txt"));
- Scanner biScanner = new Scanner(new File("bigrams-pmilexicon.txt"));
-
- //Fill the map of unigrams
- while (uniScanner.hasNext()) {
- unimap.put(uniScanner.next(), Double.parseDouble(uniScanner.next()));
- if (uniScanner.hasNextLine()) {
- uniScanner.nextLine();
- }
- // NumberFormatException is not handled.
+ void readLexicon() throws FileNotFoundException {
+ if (!unimap.isEmpty()) {
+ // data is already read.
+ return;
}
+ // A unigram is in the format (WS = whitespace):
+ // word <WS> rating <WS> ??? <WS> ??
+ // A bigram has an two WS-separated words instead of one.
+ try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt");
+ Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
+ //Fill the map of unigrams
+ while (uniScanner.hasNext()) {
+ String words = uniScanner.next();
+ unimap.put(words.toLowerCase(), uniScanner.nextDouble());
+ if (uniScanner.hasNextLine()) {
+ uniScanner.nextLine();
+ }
+ }
- //fill the map of bigrams
- while (biScanner.hasNext()) {
- bimap.put(biScanner.next() + " " + biScanner.next(), Double.parseDouble(biScanner.next()));
- if (biScanner.hasNextLine()) {
- biScanner.nextLine();
+ //fill the map of bigrams
+ while (biScanner.hasNext()) {
+ String words = biScanner.next() + " " + biScanner.next();
+ bimap.put(words.toLowerCase(), biScanner.nextDouble());
+ if (biScanner.hasNextLine()) {
+ biScanner.nextLine();
+ }
}
- // NumberFormatException is not handled.
}
}
/**
* Executes a query that the analyzer can analyze.
- *
+ *
* @param query The query string to execute.
* @throws SQLException When database connection isn't available.
*/
public void query(String query) throws SQLException {
-
PreparedStatement statement;
//make a connection to the database and execute the query
- connection = builder.create();
statement = connection.prepareStatement(query);
data = statement.executeQuery();
}
/**
* Run a sentiment analysis and fill the database with the output.
- *
+ *
* @throws SQLException
- * @throws IOException
+ * @throws IOException
*/
- public void sentimentAnalysis(String query) throws SQLException, IOException {
+ public void sentimentAnalysis(String query) throws SQLException, IOException {
query(query);
//read the lexicons
@@ -119,7 +104,7 @@ public class Analyzor {
while (data.next()) {
//get the text
text = data.getString("text");
- text = replacePunct(text);
+ text = splitPunctToWords(text);
// test is the tweet text you are going to analyze
String[] words = text.split("\\s+"); // text splitted into separate words
double positiverate = 0; // positive rating
@@ -151,36 +136,35 @@ public class Analyzor {
//makes a wordcloud of the tweets in the ResultSet data
void makeWordCloud(String query) throws SQLException {
-
+
query(query);
//go to the start of the ResultSet data
if (data == null) {
System.err.println("data is empty, try querying first");
return;
}
-
+
//make the hashmap with the words and their frequency
HashMap<String, Integer> wordcloud = new HashMap<>();
-
+
String text;
String[] words;
Integer value;
-
- while(data.next()){
+
+ while (data.next()) {
//get the text
text = data.getString("text");
//remove punctuation, convert to lowercase and split on words
text = removePunct(text);
text = text.toLowerCase();
words = text.split("\\s+");
-
+
//count the words
- for(String word : words){
+ for (String word : words) {
value = wordcloud.get(word);
- if(value == null){
+ if (value == null) {
wordcloud.put(word, 1);
- }
- else{
+ } else {
wordcloud.put(word, value++);
}
}
@@ -189,17 +173,17 @@ public class Analyzor {
//replaces punctuation so it will be splitted
//also removes urls
- private String replacePunct(String text) {
+ private String splitPunctToWords(String text) {
text = text.replaceAll("https?://\\S*", "");
text = text.replaceAll("[!?):;\"']", " $0");
text = text.replaceAll("[.,-](\\s|$)", " $0");
text = text.replaceAll("\\s[(\"']", "$0 ");
return text;
}
-
+
//removes punctuation
//also removes urls
- private String removePunct(String text){
+ private String removePunct(String text) {
text = text.replaceAll("https?://\\S*", "");
text = text.replaceAll("[.,!?()-:;\"']", " ");
return text;
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index f3c8011..9dd1167 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -2,6 +2,7 @@ package main;
import database.ConnectionBuilder;
import java.io.IOException;
+import java.sql.Connection;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.NoSuchElementException;
@@ -18,18 +19,25 @@ public class FarmShell {
*/
private final Scanner scanner = new Scanner(System.in);
- /**
- * The sentiment analysis class.
- */
- private final Analyzor analyzor;
+ private Analyzor cached_analyzor;
+ private final ConnectionBuilder dbConnectionBuilder;
- /**
- * @param builder The connection builder for the database.
- */
- public FarmShell(final ConnectionBuilder builder) {
- analyzor = new Analyzor(builder);
+ FarmShell(ConnectionBuilder dbConnectionBuilder) {
+ this.dbConnectionBuilder = dbConnectionBuilder;
+ }
+
+ private void printPrompt() {
+ System.out.print("$ ");
+ }
+
+ private Analyzor getAnalyzor() throws SQLException {
+ if (cached_analyzor == null) {
+ Connection dbCon = dbConnectionBuilder.create();
+ cached_analyzor = new Analyzor(dbCon);
+ }
+ return cached_analyzor;
}
-
+
/**
* Processes commands from stdin until the exit command is received or EOF.
*/
@@ -62,7 +70,7 @@ public class FarmShell {
/**
* Execute a single commands.
- *
+ *
* @param cmd A single line of the command.
* @return Whether to continue or exit the application.
*/
@@ -84,7 +92,6 @@ public class FarmShell {
* otherwise.
*/
public boolean execute(String[] args) {
-
try {
Command command = Command.fromString(args[0]);
String[] params = Arrays.copyOfRange(args, 1, args.length);
@@ -97,13 +104,13 @@ public class FarmShell {
} catch (NoSuchElementException ex) {
// thrown by the "exit" command to signal exit
return false;
- } catch (SQLException ex){
+ } catch (SQLException ex) {
System.err.println("such " + ex);
}
// another satisfied customer, next!
return true;
}
-
+
private void execute(Command command, String[] params) throws SQLException, IOException {
if (params.length < command.getParamCount()) {
throw new IllegalArgumentException("Expected "
@@ -111,17 +118,14 @@ public class FarmShell {
+ params.length);
}
switch (command) {
- case query:
- System.err.println("isn't supported anymore, now enter query after analysis type");
- break;
case filterbots:
System.out.println("not yet implemented");
break;
case sentiment:
- analyzor.sentimentAnalysis(params[0]);
+ getAnalyzor().sentimentAnalysis(params[0]);
break;
case wordcloud:
- analyzor.makeWordCloud(params[0]);
+ getAnalyzor().makeWordCloud(params[0]);
break;
case help:
for (String line : HELP) {
@@ -146,13 +150,9 @@ public class FarmShell {
throw new AssertionError(command.name());
}
}
-
- private void printPrompt() {
- System.out.print("$ ");
- }
enum Command {
- query("make a query to the database; needed to do analysis", 1),
+
filterbots("marks all users as bot or not", 1),
sentiment("analyzes all tweets on positivity (about a brand)", 1),
wordcloud("makes a wordcloud of the text of the tweets", 1),
diff --git a/src/main/Main.java b/src/main/Main.java
index 9102ecd..02673b2 100644
--- a/src/main/Main.java
+++ b/src/main/Main.java
@@ -1,17 +1,3 @@
-/*
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE Version 2, December 2004
- *
- * Copyright (C) 2004 Sam Hocevar
- *
- * Everyone is permitted to copy and distribute verbatim or modified copies
- * of this license document, and changing it is allowed as long as the name is
- * changed.
- *
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING,
- * DISTRIBUTION AND MODIFICATION
- *
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
- */
package main;
import database.ConnectionBuilder;
@@ -22,15 +8,19 @@ import java.util.Arrays;
*/
public class Main {
+ private final ConnectionBuilder cb;
+
public static void main(String[] args) {
+ Main main;
try {
- Main main = new Main(args);
+ main = new Main(args);
} catch (IllegalArgumentException ex) {
System.err.println(ex.getMessage());
+ System.exit(1);
+ return;
}
+ main.run();
}
-
- private final ConnectionBuilder cb;
private String[] leftover_params;
@@ -40,11 +30,12 @@ public class Main {
.setUsername("twitter")
.setPassword("2IOC02")
.setDbName("twitter");
-
+
parseGlobalOptions(args);
-
- try {
+ }
+ public void run() {
+ try {
FarmShell shell = new FarmShell(cb);
if (leftover_params != null && leftover_params.length > 0) {
shell.execute(leftover_params);