From 7df2ae452a984cb12986b33034557476cb4a1536 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Mon, 26 May 2014 12:02:08 +0200 Subject: Optimize sentiment analysis Do not create a new prepared statement every time. Use an optimized UPDATE query. Drop requirement for supplying a query. --- src/database/NamedPreparedStatement.java | 11 +++++++++++ src/main/Analyzor.java | 33 +++++++++++++++++++------------- src/main/FarmShell.java | 11 ++++++++--- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java index ebb775b..9305d32 100644 --- a/src/database/NamedPreparedStatement.java +++ b/src/database/NamedPreparedStatement.java @@ -2,6 +2,7 @@ package database; import java.sql.Connection; import java.sql.PreparedStatement; +import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; import java.sql.Types; @@ -104,4 +105,14 @@ public class NamedPreparedStatement { throw ex; } } + + public ResultSet executeQuery() throws SQLException { + try { + return getStmt().executeQuery(); + } catch (SQLException ex) { + System.err.println("Query error: " + ex.getMessage()); + System.err.println(stmt); + throw ex; + } + } } diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 9c98a9d..5385a79 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -113,24 +113,33 @@ public class Analyzor { * @throws IOException */ public void sentimentAnalysis(String query) throws SQLException, IOException { - query(query); + NamedPreparedStatement tweetBrandStmt, updateRating; //read the lexicons readLexicon(); - //go to the start of te dataset - if (data == null) { - System.err.println("data is empty, try querying first"); - return; + // if you ever need to re-apply rating, use something like: + // UPDATE mentionsbrand SET rating = NULL WHERE ... + if (query.isEmpty()) { + query = "SELECT t.tweetid, t.text, b.brand FROM tweet t " + + "JOIN mentionsbrand b USING (tweetid) " + + "WHERE b.rating IS NULL"; } + tweetBrandStmt = new NamedPreparedStatement(connection, + query); + ResultSet tweetBrandResults = tweetBrandStmt.executeQuery(); + + updateRating = new NamedPreparedStatement(connection, + "UPDATE mentionsbrand SET rating = :rating " + + "WHERE tweetid = :tweetid AND brand = :brand"); Double value; String text; //for all tuples - while (data.next()) { + while (tweetBrandResults.next()) { //get the text - text = data.getString("text"); + text = tweetBrandResults.getString("text"); text = splitPunctToWords(text); // test is the tweet text you are going to analyze String[] words = text.split("\\s+"); // text splitted into separate words @@ -152,12 +161,10 @@ public class Analyzor { } } //insert the rating into the database - NamedPreparedStatement m_insertRating; - m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating); - QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10)); - m_insertRating.executeUpdate(); - //don't print the rate - //System.out.println(text + ": " + (int) (positiverate * 10)); + updateRating.setLong("tweetid", tweetBrandResults.getLong("tweetid")); + updateRating.setString("brand", tweetBrandResults.getString("brand")); + updateRating.setInt("rating", (int) (positiverate * 10)); + updateRating.executeUpdate(); } } diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 1266fd3..ed1a0ff 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -125,7 +125,12 @@ public class FarmShell { System.out.println("not yet implemented"); break; case sentiment: - getAnalyzor().sentimentAnalysis(params[0]); + // if there is no query, update all unrated items. + if (params.length > 0) { + getAnalyzor().sentimentAnalysis(params[0]); + } else { + getAnalyzor().sentimentAnalysis(""); + } break; case wordcloud: getAnalyzor().makeWordCloud(params[0]); @@ -163,10 +168,10 @@ public class FarmShell { enum Command { filterbots("marks all users as bot or not", 1), - sentiment("analyzes all tweets on positivity (about a brand)", 1), + sentiment("analyzes all tweets on brand positivity (optional arg: tweet/brand selection query)"), wordcloud("makes a wordcloud of the text of the tweets", 1), getBrands("fills the database with the brands of a tweet"), - disco("makes a outputfile for disco",1), + disco("makes a outputfile for disco", 1), exit("Returns to shell"), help("Get help"); -- cgit v1.2.1