summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-09 14:31:37 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-09 14:31:37 +0200
commitb1a7bb58774ecee3c2ef79f684bc255cb8e6ef7c (patch)
treecc6f8b37d8edcdb92c8ce5d769d16ab8ed9acfab /src
parentecb00abefb73ae8aec58cd549cfcbbc1af209ed0 (diff)
downloadDatafiller-b1a7bb58774ecee3c2ef79f684bc255cb8e6ef7c.tar.gz
Use upsert queries, convert to named parameter statements
Get rid of ispostedby, move it to tweet table ("userid")
Diffstat (limited to 'src')
-rw-r--r--src/database/NamedPreparedStatement.java69
-rw-r--r--src/database/QueryUtils.java183
-rw-r--r--src/main/DataFiller.java46
3 files changed, 190 insertions, 108 deletions
diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java
new file mode 100644
index 0000000..c98edac
--- /dev/null
+++ b/src/database/NamedPreparedStatement.java
@@ -0,0 +1,69 @@
+package database;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Allows a prepared statement to contain named parameters instead of a question
+ * mark position marker.
+ *
+ * @author Peter Wu
+ */
+public class NamedPreparedStatement {
+
+ private final List<String> fields;
+ private final PreparedStatement stmt;
+
+ public NamedPreparedStatement(Connection conn, String query) throws SQLException {
+ fields = new ArrayList<>();
+ Pattern pattern = Pattern.compile(":\\w+");
+ Matcher matcher = pattern.matcher(query);
+ while (matcher.find()) {
+ fields.add(matcher.group());
+ }
+ String sql = query.replaceAll(pattern.pattern(), "");
+ stmt = conn.prepareStatement(sql);
+ }
+
+ private List<Integer> getParamIndices(String fieldName) {
+ List<Integer> indices = new ArrayList<>();
+ int index = 0;
+ for (String name : fields) {
+ ++index;
+ if (name.equals(fieldName)) {
+ indices.add(index);
+ }
+ }
+ if (indices.isEmpty()) {
+ throw new RuntimeException("Missing " + fieldName + " in query!");
+ }
+ return indices;
+ }
+
+ public void setLong(String name, long l) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setLong(paramIndex, l);
+ }
+ }
+
+ public void setString(String name, String str) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setString(paramIndex, str);
+ }
+ }
+
+ public void setString(String name, int i) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setInt(paramIndex, i);
+ }
+ }
+
+ public PreparedStatement getStmt() {
+ return stmt;
+ }
+}
diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java
index ceec1e3..f87ba47 100644
--- a/src/database/QueryUtils.java
+++ b/src/database/QueryUtils.java
@@ -13,115 +13,134 @@ import java.sql.SQLException;
public class QueryUtils {
public final static String insertProfile
- = "INSERT INTO twitteruser (userid,displayname,timezone,tweetcount,"
- + "followercount,followedcount,location,tweetname,createdat,language) "
- + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM twitteruser WHERE userid= ? )";
+ = buildQuery("twitteruser", new String[]{"userid"},
+ "userid", "displayname", "timezone", "tweetcount", "followercount",
+ "followedcount", "location", "tweetname", "createdat", "language");
public final static String insertTweet
- = "INSERT INTO tweet (tweetid,createdat,favcount,retweetcount,text,"
- + "coordinates,language,retweetid,replyid,place) "
- + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM tweet WHERE tweetid=? )";
+ = buildQuery("tweet", new String[]{"tweetid"},
+ "tweetid", "createdat", "favcount", "retweetcount", "text",
+ "coordinates", "language", "retweetid", "replyid", "place",
+ "userid");
public final static String insertHash
- = "INSERT INTO hashtag (tweetid, hashtag) "
- + "SELECT ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM hashtag WHERE tweetid=? and hashtag =? )";
+ = buildQuery("hashtag", null, "tweetid", "hashtag");
+ // TODO: split url to userUrl and tweetUrl
public final static String insertUrl
- = "INSERT INTO url (tweetid, url) "
- + "SELECT ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM url WHERE tweetid=? and url =? )";
+ = buildQuery("url", null, "tweetid", "url");
public final static String insertMentions
- = "INSERT INTO mentionsuser (tweetid, userid) "
- + "SELECT ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM mentionsuser WHERE tweetid=? and userid =? )";
-
- public final static String insertPosted
- = "INSERT INTO ispostedby (tweetid,userid) "
- + "SELECT ? , ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM ispostedby WHERE tweetid= ? )";
+ = buildQuery("mentionsuser", null, "tweetid", "userid");
public final static String insertBrand
- = "INSERT INTO mentionsbrand (tweetid,brand) "
- + "SELECT ? , ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM mentionsbrand WHERE tweetid = ? AND brand = ?)";
-
- public static void setInsertParams(PreparedStatement tweetStatement,
- PreparedStatement profileStatement,
- PreparedStatement postedStatement,
+ = buildQuery("mentionsbrand", null, "tweetid", "brand");
+
+ /**
+ * Builds an upsert query for a table and keys.
+ *
+ * @param table Table to insert or update the data.
+ * @param primaryKeys The primary keys for the table or null if all keys
+ * form the primary key.
+ * @param keys The keys to be inserted or updated.
+ * @return A SQL query containing named parameters.
+ */
+ private static String buildQuery(String table, String[] primaryKeys,
+ String... keys) {
+ String sep = "";
+ String fields = ""; // a, b, c
+ String values = ""; // :a, :b, :c
+ String set_values = ""; // a = nv.a, b = nv.b, c = nv.c
+ String pkey_matches = "";
+ for (String key : keys) {
+ fields += sep + key;
+ values += sep + ":" + key;
+ // "u" is "table that gets updated", "nv" is "new values"
+ set_values += sep + key + " = nv." + key;
+ sep = ", ";
+ }
+ sep = "";
+ // assume that all fields form the primary key if pks are missing
+ if (primaryKeys == null) {
+ primaryKeys = keys;
+ }
+ for (String pkey : primaryKeys) {
+ pkey_matches = sep + "u." + pkey + " = nv." + pkey;
+ sep = " AND ";
+ }
+
+ String sql;
+ // CTE that holds new values
+ sql = "WITH nv (" + fields + ") AS (\n";
+ sql += " VALUES (" + values + ")\n";
+ sql += ")\n";
+ // CTE that tries to update the table with new values (PSQL extension)
+ sql += ", upsert AS (\n";
+ sql += " UPDATE " + table + " SET " + set_values + " FROM nv\n";
+ sql += " WHERE " + pkey_matches + " RETURNING 1\n";
+ sql += ")\n";
+ // if nothing got updated, insert new entry
+ sql += "INSERT INTO " + table + " (" + fields + ")\n";
+ sql += "SELECT " + fields + " FROM nv\n";
+ sql += "WHERE NOT EXISTS (SELECT 1 FROM upsert)";
+ return sql;
+ }
+
+ public static void setInsertParams(NamedPreparedStatement tweetStatement,
+ NamedPreparedStatement profileStatement,
Tweet tweet) throws SQLException {
- tweetStatement.setLong(1, tweet.id);
- tweetStatement.setString(2, tweet.created_at);
- tweetStatement.setLong(3, tweet.favorite_count);
- tweetStatement.setLong(4, tweet.retweet_count);
- tweetStatement.setString(5, tweet.text);
- tweetStatement.setString(6, tweet.coordinates);
- tweetStatement.setString(7, tweet.lang);
+ tweetStatement.setLong("id", tweet.id);
+ tweetStatement.setString("createdat", tweet.created_at);
+ tweetStatement.setLong("favoritecount", tweet.favorite_count);
+ tweetStatement.setLong("retweetcount", tweet.retweet_count);
+ tweetStatement.setString("text", tweet.text);
+ tweetStatement.setString("coordinates", tweet.coordinates);
+ tweetStatement.setString("language", tweet.lang);
if (tweet.retweeted_status != null) {
- tweetStatement.setLong(8, tweet.retweeted_status.id);
+ tweetStatement.setLong("retweetid", tweet.retweeted_status.id);
} else {
- tweetStatement.setLong(8, 0);
+ tweetStatement.setLong("retweetid", 0);
}
- tweetStatement.setLong(9, tweet.in_reply_to_user_id);
- tweetStatement.setString(10, tweet.place);
- tweetStatement.setLong(11, tweet.id);
+ tweetStatement.setLong("replyid", tweet.in_reply_to_user_id);
+ // TODO: place is not a string...
+ tweetStatement.setString("place", tweet.place);
User twuser = tweet.user;
- profileStatement.setLong(1, twuser.id);
- profileStatement.setString(2, twuser.name);
- profileStatement.setString(3, twuser.time_zone);
- profileStatement.setLong(4, twuser.statuses_count);
- profileStatement.setLong(5, twuser.followers_count);
- profileStatement.setLong(6, twuser.friends_count);
- profileStatement.setString(7, twuser.location);
- profileStatement.setString(8, twuser.screen_name);
- profileStatement.setString(9, twuser.created_at);
- profileStatement.setString(10, twuser.lang);
- profileStatement.setLong(11, twuser.id);
- postedStatement.setLong(1, tweet.id);
- postedStatement.setLong(2, twuser.id);
- postedStatement.setLong(3, tweet.id);
+ tweetStatement.setLong("userid", twuser.id);
+ profileStatement.setLong("userid", twuser.id);
+ profileStatement.setString("displayname", twuser.name);
+ profileStatement.setString("timezone", twuser.time_zone);
+ profileStatement.setLong("tweetcount", twuser.statuses_count);
+ profileStatement.setLong("followercount", twuser.followers_count);
+ profileStatement.setLong("followedcount", twuser.friends_count);
+ profileStatement.setString("location", twuser.location);
+ profileStatement.setString("tweetname", twuser.screen_name);
+ profileStatement.setString("createdat", twuser.created_at);
+ profileStatement.setString("language", twuser.lang);
}
- public static void setInsertBrandParams(PreparedStatement brandStatement,
+ public static void setInsertBrandParams(NamedPreparedStatement brandStmt,
long id, String brand) throws SQLException {
- brandStatement.setLong(1, id);
- brandStatement.setString(2, brand);
- brandStatement.setLong(3, id);
- brandStatement.setString(4, brand);
+ brandStmt.setLong("tweetid", id);
+ brandStmt.setString("brand", brand);
+ // TODO: rating (positive)
}
- public static void setInsertHashParams(PreparedStatement hashStatement,
+ public static void setInsertHashParams(NamedPreparedStatement hashStmt,
long id, String text) throws SQLException {
- hashStatement.setLong(1, id);
- hashStatement.setString(2, text);
- hashStatement.setLong(3, id);
- hashStatement.setString(4, text);
+ hashStmt.setLong("tweetid", id);
+ hashStmt.setString("hashtag", text);
}
- public static void setInsertUrlParams(PreparedStatement UrlStatement,
+ public static void setInsertUrlParams(NamedPreparedStatement urlStmt,
long id, String text) throws SQLException {
- UrlStatement.setLong(1, id);
- UrlStatement.setString(2, text);
- UrlStatement.setLong(3, id);
- UrlStatement.setString(4, text);
+ urlStmt.setLong("tweetid", id);
+ urlStmt.setString("url", text);
}
- public static void setInsertMentionsParams(PreparedStatement UrlStatement,
+ public static void setInsertMentionsParams(NamedPreparedStatement mentStmt,
long id, long userid) throws SQLException {
- UrlStatement.setLong(1, id);
- UrlStatement.setLong(2, userid);
- UrlStatement.setLong(3, id);
- UrlStatement.setLong(4, userid);
+ mentStmt.setLong("tweetid", id);
+ mentStmt.setLong("userid", userid);
}
}
diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java
index 4f06006..10ed774 100644
--- a/src/main/DataFiller.java
+++ b/src/main/DataFiller.java
@@ -1,9 +1,9 @@
package main;
import data.Tweet;
+import database.NamedPreparedStatement;
import database.QueryUtils;
import java.sql.Connection;
-import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
@@ -25,36 +25,32 @@ public class DataFiller {
/**
* A single insert tweet that can be used.
*/
- private final PreparedStatement m_insertTweet;
+ private final NamedPreparedStatement m_insertTweet;
/**
* A single insert profiles that can be used.
*/
- private final PreparedStatement m_insertProfile;
- /**
- * A single insert ispostedby that can be used.
- */
- private final PreparedStatement m_insertPosted;
+ private final NamedPreparedStatement m_insertProfile;
/**
* A single insert brand that can be used.
*/
- private final PreparedStatement m_insertBrand;
+ private final NamedPreparedStatement m_insertBrand;
/**
* A single insert hashtag that can be used.
*/
- private final PreparedStatement m_insertHash;
+ private final NamedPreparedStatement m_insertHash;
/**
* A single insert url that can be used.
*/
- private final PreparedStatement m_insertUrl;
+ private final NamedPreparedStatement m_insertUrl;
/**
* A single insert url that can be used.
*/
- private final PreparedStatement m_insertMentions;
+ private final NamedPreparedStatement m_insertMentions;
/**
* Create the datafiller object.
@@ -64,13 +60,12 @@ public class DataFiller {
public DataFiller(Connection connection) {
try {
m_connection = connection;
- m_insertTweet = m_connection.prepareStatement(QueryUtils.insertTweet);
- m_insertProfile = m_connection.prepareStatement(QueryUtils.insertProfile);
- m_insertPosted = m_connection.prepareStatement(QueryUtils.insertPosted);
- m_insertBrand = m_connection.prepareStatement(QueryUtils.insertBrand);
- m_insertHash = m_connection.prepareStatement(QueryUtils.insertHash);
- m_insertUrl = m_connection.prepareStatement(QueryUtils.insertUrl);
- m_insertMentions = m_connection.prepareStatement(QueryUtils.insertMentions);
+ m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet);
+ m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile);
+ m_insertBrand = new NamedPreparedStatement(m_connection, QueryUtils.insertBrand);
+ m_insertHash = new NamedPreparedStatement(m_connection, QueryUtils.insertHash);
+ m_insertUrl = new NamedPreparedStatement(m_connection, QueryUtils.insertUrl);
+ m_insertMentions = new NamedPreparedStatement(m_connection, QueryUtils.insertMentions);
} catch (SQLException ex) {
throw new RuntimeException(ex.getMessage());
}
@@ -80,25 +75,24 @@ public class DataFiller {
try {
for (Tweet.Hashtag hashtag : tweet.entities.hashtags) {
QueryUtils.setInsertHashParams(m_insertHash, tweet.id, hashtag.text);
- m_insertHash.executeUpdate();
+ m_insertHash.getStmt().executeUpdate();
}
for (Tweet.Url url : tweet.entities.urls) {
QueryUtils.setInsertHashParams(m_insertUrl, tweet.id, url.expanded_url);
- m_insertUrl.executeUpdate();
+ m_insertUrl.getStmt().executeUpdate();
}
for (Tweet.Mention mention : tweet.entities.user_mentions) {
QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.id, mention.id);
- m_insertMentions.executeUpdate();
+ m_insertMentions.getStmt().executeUpdate();
}
- QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, m_insertPosted, tweet);
- m_insertTweet.executeUpdate();
- m_insertProfile.executeUpdate();
- m_insertPosted.executeUpdate();
+ QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet);
+ m_insertTweet.getStmt().executeUpdate();
+ m_insertProfile.getStmt().executeUpdate();
List<String> brands = getBrands(tweet);
for (String brand : brands) {
QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand);
- m_insertBrand.executeUpdate();
+ m_insertBrand.getStmt().executeUpdate();
}
} catch (SQLException ex) {
Logger.getLogger(DataFiller.class.getName()).log(Level.SEVERE, null, ex);