summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-26 12:02:08 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-26 12:02:08 +0200
commit7df2ae452a984cb12986b33034557476cb4a1536 (patch)
tree6058fd9c8e5b33fb8545e9cad4b1c0c75f5d54c8
parente051abbfdbf7ff721bf1318bf0b5939741b1f792 (diff)
downloadGoldfarmer-7df2ae452a984cb12986b33034557476cb4a1536.tar.gz
Optimize sentiment analysis
Do not create a new prepared statement every time. Use an optimized UPDATE query. Drop requirement for supplying a query.
-rw-r--r--src/database/NamedPreparedStatement.java11
-rw-r--r--src/main/Analyzor.java33
-rw-r--r--src/main/FarmShell.java11
3 files changed, 39 insertions, 16 deletions
diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java
index ebb775b..9305d32 100644
--- a/src/database/NamedPreparedStatement.java
+++ b/src/database/NamedPreparedStatement.java
@@ -2,6 +2,7 @@ package database;
import java.sql.Connection;
import java.sql.PreparedStatement;
+import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.sql.Types;
@@ -104,4 +105,14 @@ public class NamedPreparedStatement {
throw ex;
}
}
+
+ public ResultSet executeQuery() throws SQLException {
+ try {
+ return getStmt().executeQuery();
+ } catch (SQLException ex) {
+ System.err.println("Query error: " + ex.getMessage());
+ System.err.println(stmt);
+ throw ex;
+ }
+ }
}
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 9c98a9d..5385a79 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -113,24 +113,33 @@ public class Analyzor {
* @throws IOException
*/
public void sentimentAnalysis(String query) throws SQLException, IOException {
- query(query);
+ NamedPreparedStatement tweetBrandStmt, updateRating;
//read the lexicons
readLexicon();
- //go to the start of te dataset
- if (data == null) {
- System.err.println("data is empty, try querying first");
- return;
+ // if you ever need to re-apply rating, use something like:
+ // UPDATE mentionsbrand SET rating = NULL WHERE ...
+ if (query.isEmpty()) {
+ query = "SELECT t.tweetid, t.text, b.brand FROM tweet t "
+ + "JOIN mentionsbrand b USING (tweetid) "
+ + "WHERE b.rating IS NULL";
}
+ tweetBrandStmt = new NamedPreparedStatement(connection,
+ query);
+ ResultSet tweetBrandResults = tweetBrandStmt.executeQuery();
+
+ updateRating = new NamedPreparedStatement(connection,
+ "UPDATE mentionsbrand SET rating = :rating "
+ + "WHERE tweetid = :tweetid AND brand = :brand");
Double value;
String text;
//for all tuples
- while (data.next()) {
+ while (tweetBrandResults.next()) {
//get the text
- text = data.getString("text");
+ text = tweetBrandResults.getString("text");
text = splitPunctToWords(text);
// test is the tweet text you are going to analyze
String[] words = text.split("\\s+"); // text splitted into separate words
@@ -152,12 +161,10 @@ public class Analyzor {
}
}
//insert the rating into the database
- NamedPreparedStatement m_insertRating;
- m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating);
- QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10));
- m_insertRating.executeUpdate();
- //don't print the rate
- //System.out.println(text + ": " + (int) (positiverate * 10));
+ updateRating.setLong("tweetid", tweetBrandResults.getLong("tweetid"));
+ updateRating.setString("brand", tweetBrandResults.getString("brand"));
+ updateRating.setInt("rating", (int) (positiverate * 10));
+ updateRating.executeUpdate();
}
}
diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java
index 1266fd3..ed1a0ff 100644
--- a/src/main/FarmShell.java
+++ b/src/main/FarmShell.java
@@ -125,7 +125,12 @@ public class FarmShell {
System.out.println("not yet implemented");
break;
case sentiment:
- getAnalyzor().sentimentAnalysis(params[0]);
+ // if there is no query, update all unrated items.
+ if (params.length > 0) {
+ getAnalyzor().sentimentAnalysis(params[0]);
+ } else {
+ getAnalyzor().sentimentAnalysis("");
+ }
break;
case wordcloud:
getAnalyzor().makeWordCloud(params[0]);
@@ -163,10 +168,10 @@ public class FarmShell {
enum Command {
filterbots("marks all users as bot or not", 1),
- sentiment("analyzes all tweets on positivity (about a brand)", 1),
+ sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"),
wordcloud("makes a wordcloud of the text of the tweets", 1),
getBrands("fills the database with the brands of a tweet"),
- disco("makes a outputfile for disco",1),
+ disco("makes a outputfile for disco", 1),
exit("Returns to shell"),
help("Get help");