From b1a7bb58774ecee3c2ef79f684bc255cb8e6ef7c Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 9 May 2014 14:31:37 +0200 Subject: Use upsert queries, convert to named parameter statements Get rid of ispostedby, move it to tweet table ("userid") --- src/database/NamedPreparedStatement.java | 69 ++++++++++++ src/database/QueryUtils.java | 183 +++++++++++++++++-------------- src/main/DataFiller.java | 46 ++++---- 3 files changed, 190 insertions(+), 108 deletions(-) create mode 100644 src/database/NamedPreparedStatement.java (limited to 'src') diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java new file mode 100644 index 0000000..c98edac --- /dev/null +++ b/src/database/NamedPreparedStatement.java @@ -0,0 +1,69 @@ +package database; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Allows a prepared statement to contain named parameters instead of a question + * mark position marker. + * + * @author Peter Wu + */ +public class NamedPreparedStatement { + + private final List fields; + private final PreparedStatement stmt; + + public NamedPreparedStatement(Connection conn, String query) throws SQLException { + fields = new ArrayList<>(); + Pattern pattern = Pattern.compile(":\\w+"); + Matcher matcher = pattern.matcher(query); + while (matcher.find()) { + fields.add(matcher.group()); + } + String sql = query.replaceAll(pattern.pattern(), ""); + stmt = conn.prepareStatement(sql); + } + + private List getParamIndices(String fieldName) { + List indices = new ArrayList<>(); + int index = 0; + for (String name : fields) { + ++index; + if (name.equals(fieldName)) { + indices.add(index); + } + } + if (indices.isEmpty()) { + throw new RuntimeException("Missing " + fieldName + " in query!"); + } + return indices; + } + + public void setLong(String name, long l) throws SQLException { + for (int paramIndex : getParamIndices(name)) { + stmt.setLong(paramIndex, l); + } + } + + public void setString(String name, String str) throws SQLException { + for (int paramIndex : getParamIndices(name)) { + stmt.setString(paramIndex, str); + } + } + + public void setString(String name, int i) throws SQLException { + for (int paramIndex : getParamIndices(name)) { + stmt.setInt(paramIndex, i); + } + } + + public PreparedStatement getStmt() { + return stmt; + } +} diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java index ceec1e3..f87ba47 100644 --- a/src/database/QueryUtils.java +++ b/src/database/QueryUtils.java @@ -13,115 +13,134 @@ import java.sql.SQLException; public class QueryUtils { public final static String insertProfile - = "INSERT INTO twitteruser (userid,displayname,timezone,tweetcount," - + "followercount,followedcount,location,tweetname,createdat,language) " - + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM twitteruser WHERE userid= ? )"; + = buildQuery("twitteruser", new String[]{"userid"}, + "userid", "displayname", "timezone", "tweetcount", "followercount", + "followedcount", "location", "tweetname", "createdat", "language"); public final static String insertTweet - = "INSERT INTO tweet (tweetid,createdat,favcount,retweetcount,text," - + "coordinates,language,retweetid,replyid,place) " - + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM tweet WHERE tweetid=? )"; + = buildQuery("tweet", new String[]{"tweetid"}, + "tweetid", "createdat", "favcount", "retweetcount", "text", + "coordinates", "language", "retweetid", "replyid", "place", + "userid"); public final static String insertHash - = "INSERT INTO hashtag (tweetid, hashtag) " - + "SELECT ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM hashtag WHERE tweetid=? and hashtag =? )"; + = buildQuery("hashtag", null, "tweetid", "hashtag"); + // TODO: split url to userUrl and tweetUrl public final static String insertUrl - = "INSERT INTO url (tweetid, url) " - + "SELECT ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM url WHERE tweetid=? and url =? )"; + = buildQuery("url", null, "tweetid", "url"); public final static String insertMentions - = "INSERT INTO mentionsuser (tweetid, userid) " - + "SELECT ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM mentionsuser WHERE tweetid=? and userid =? )"; - - public final static String insertPosted - = "INSERT INTO ispostedby (tweetid,userid) " - + "SELECT ? , ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM ispostedby WHERE tweetid= ? )"; + = buildQuery("mentionsuser", null, "tweetid", "userid"); public final static String insertBrand - = "INSERT INTO mentionsbrand (tweetid,brand) " - + "SELECT ? , ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM mentionsbrand WHERE tweetid = ? AND brand = ?)"; - - public static void setInsertParams(PreparedStatement tweetStatement, - PreparedStatement profileStatement, - PreparedStatement postedStatement, + = buildQuery("mentionsbrand", null, "tweetid", "brand"); + + /** + * Builds an upsert query for a table and keys. + * + * @param table Table to insert or update the data. + * @param primaryKeys The primary keys for the table or null if all keys + * form the primary key. + * @param keys The keys to be inserted or updated. + * @return A SQL query containing named parameters. + */ + private static String buildQuery(String table, String[] primaryKeys, + String... keys) { + String sep = ""; + String fields = ""; // a, b, c + String values = ""; // :a, :b, :c + String set_values = ""; // a = nv.a, b = nv.b, c = nv.c + String pkey_matches = ""; + for (String key : keys) { + fields += sep + key; + values += sep + ":" + key; + // "u" is "table that gets updated", "nv" is "new values" + set_values += sep + key + " = nv." + key; + sep = ", "; + } + sep = ""; + // assume that all fields form the primary key if pks are missing + if (primaryKeys == null) { + primaryKeys = keys; + } + for (String pkey : primaryKeys) { + pkey_matches = sep + "u." + pkey + " = nv." + pkey; + sep = " AND "; + } + + String sql; + // CTE that holds new values + sql = "WITH nv (" + fields + ") AS (\n"; + sql += " VALUES (" + values + ")\n"; + sql += ")\n"; + // CTE that tries to update the table with new values (PSQL extension) + sql += ", upsert AS (\n"; + sql += " UPDATE " + table + " SET " + set_values + " FROM nv\n"; + sql += " WHERE " + pkey_matches + " RETURNING 1\n"; + sql += ")\n"; + // if nothing got updated, insert new entry + sql += "INSERT INTO " + table + " (" + fields + ")\n"; + sql += "SELECT " + fields + " FROM nv\n"; + sql += "WHERE NOT EXISTS (SELECT 1 FROM upsert)"; + return sql; + } + + public static void setInsertParams(NamedPreparedStatement tweetStatement, + NamedPreparedStatement profileStatement, Tweet tweet) throws SQLException { - tweetStatement.setLong(1, tweet.id); - tweetStatement.setString(2, tweet.created_at); - tweetStatement.setLong(3, tweet.favorite_count); - tweetStatement.setLong(4, tweet.retweet_count); - tweetStatement.setString(5, tweet.text); - tweetStatement.setString(6, tweet.coordinates); - tweetStatement.setString(7, tweet.lang); + tweetStatement.setLong("id", tweet.id); + tweetStatement.setString("createdat", tweet.created_at); + tweetStatement.setLong("favoritecount", tweet.favorite_count); + tweetStatement.setLong("retweetcount", tweet.retweet_count); + tweetStatement.setString("text", tweet.text); + tweetStatement.setString("coordinates", tweet.coordinates); + tweetStatement.setString("language", tweet.lang); if (tweet.retweeted_status != null) { - tweetStatement.setLong(8, tweet.retweeted_status.id); + tweetStatement.setLong("retweetid", tweet.retweeted_status.id); } else { - tweetStatement.setLong(8, 0); + tweetStatement.setLong("retweetid", 0); } - tweetStatement.setLong(9, tweet.in_reply_to_user_id); - tweetStatement.setString(10, tweet.place); - tweetStatement.setLong(11, tweet.id); + tweetStatement.setLong("replyid", tweet.in_reply_to_user_id); + // TODO: place is not a string... + tweetStatement.setString("place", tweet.place); User twuser = tweet.user; - profileStatement.setLong(1, twuser.id); - profileStatement.setString(2, twuser.name); - profileStatement.setString(3, twuser.time_zone); - profileStatement.setLong(4, twuser.statuses_count); - profileStatement.setLong(5, twuser.followers_count); - profileStatement.setLong(6, twuser.friends_count); - profileStatement.setString(7, twuser.location); - profileStatement.setString(8, twuser.screen_name); - profileStatement.setString(9, twuser.created_at); - profileStatement.setString(10, twuser.lang); - profileStatement.setLong(11, twuser.id); - postedStatement.setLong(1, tweet.id); - postedStatement.setLong(2, twuser.id); - postedStatement.setLong(3, tweet.id); + tweetStatement.setLong("userid", twuser.id); + profileStatement.setLong("userid", twuser.id); + profileStatement.setString("displayname", twuser.name); + profileStatement.setString("timezone", twuser.time_zone); + profileStatement.setLong("tweetcount", twuser.statuses_count); + profileStatement.setLong("followercount", twuser.followers_count); + profileStatement.setLong("followedcount", twuser.friends_count); + profileStatement.setString("location", twuser.location); + profileStatement.setString("tweetname", twuser.screen_name); + profileStatement.setString("createdat", twuser.created_at); + profileStatement.setString("language", twuser.lang); } - public static void setInsertBrandParams(PreparedStatement brandStatement, + public static void setInsertBrandParams(NamedPreparedStatement brandStmt, long id, String brand) throws SQLException { - brandStatement.setLong(1, id); - brandStatement.setString(2, brand); - brandStatement.setLong(3, id); - brandStatement.setString(4, brand); + brandStmt.setLong("tweetid", id); + brandStmt.setString("brand", brand); + // TODO: rating (positive) } - public static void setInsertHashParams(PreparedStatement hashStatement, + public static void setInsertHashParams(NamedPreparedStatement hashStmt, long id, String text) throws SQLException { - hashStatement.setLong(1, id); - hashStatement.setString(2, text); - hashStatement.setLong(3, id); - hashStatement.setString(4, text); + hashStmt.setLong("tweetid", id); + hashStmt.setString("hashtag", text); } - public static void setInsertUrlParams(PreparedStatement UrlStatement, + public static void setInsertUrlParams(NamedPreparedStatement urlStmt, long id, String text) throws SQLException { - UrlStatement.setLong(1, id); - UrlStatement.setString(2, text); - UrlStatement.setLong(3, id); - UrlStatement.setString(4, text); + urlStmt.setLong("tweetid", id); + urlStmt.setString("url", text); } - public static void setInsertMentionsParams(PreparedStatement UrlStatement, + public static void setInsertMentionsParams(NamedPreparedStatement mentStmt, long id, long userid) throws SQLException { - UrlStatement.setLong(1, id); - UrlStatement.setLong(2, userid); - UrlStatement.setLong(3, id); - UrlStatement.setLong(4, userid); + mentStmt.setLong("tweetid", id); + mentStmt.setLong("userid", userid); } } diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java index 4f06006..10ed774 100644 --- a/src/main/DataFiller.java +++ b/src/main/DataFiller.java @@ -1,9 +1,9 @@ package main; import data.Tweet; +import database.NamedPreparedStatement; import database.QueryUtils; import java.sql.Connection; -import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; @@ -25,36 +25,32 @@ public class DataFiller { /** * A single insert tweet that can be used. */ - private final PreparedStatement m_insertTweet; + private final NamedPreparedStatement m_insertTweet; /** * A single insert profiles that can be used. */ - private final PreparedStatement m_insertProfile; - /** - * A single insert ispostedby that can be used. - */ - private final PreparedStatement m_insertPosted; + private final NamedPreparedStatement m_insertProfile; /** * A single insert brand that can be used. */ - private final PreparedStatement m_insertBrand; + private final NamedPreparedStatement m_insertBrand; /** * A single insert hashtag that can be used. */ - private final PreparedStatement m_insertHash; + private final NamedPreparedStatement m_insertHash; /** * A single insert url that can be used. */ - private final PreparedStatement m_insertUrl; + private final NamedPreparedStatement m_insertUrl; /** * A single insert url that can be used. */ - private final PreparedStatement m_insertMentions; + private final NamedPreparedStatement m_insertMentions; /** * Create the datafiller object. @@ -64,13 +60,12 @@ public class DataFiller { public DataFiller(Connection connection) { try { m_connection = connection; - m_insertTweet = m_connection.prepareStatement(QueryUtils.insertTweet); - m_insertProfile = m_connection.prepareStatement(QueryUtils.insertProfile); - m_insertPosted = m_connection.prepareStatement(QueryUtils.insertPosted); - m_insertBrand = m_connection.prepareStatement(QueryUtils.insertBrand); - m_insertHash = m_connection.prepareStatement(QueryUtils.insertHash); - m_insertUrl = m_connection.prepareStatement(QueryUtils.insertUrl); - m_insertMentions = m_connection.prepareStatement(QueryUtils.insertMentions); + m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet); + m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile); + m_insertBrand = new NamedPreparedStatement(m_connection, QueryUtils.insertBrand); + m_insertHash = new NamedPreparedStatement(m_connection, QueryUtils.insertHash); + m_insertUrl = new NamedPreparedStatement(m_connection, QueryUtils.insertUrl); + m_insertMentions = new NamedPreparedStatement(m_connection, QueryUtils.insertMentions); } catch (SQLException ex) { throw new RuntimeException(ex.getMessage()); } @@ -80,25 +75,24 @@ public class DataFiller { try { for (Tweet.Hashtag hashtag : tweet.entities.hashtags) { QueryUtils.setInsertHashParams(m_insertHash, tweet.id, hashtag.text); - m_insertHash.executeUpdate(); + m_insertHash.getStmt().executeUpdate(); } for (Tweet.Url url : tweet.entities.urls) { QueryUtils.setInsertHashParams(m_insertUrl, tweet.id, url.expanded_url); - m_insertUrl.executeUpdate(); + m_insertUrl.getStmt().executeUpdate(); } for (Tweet.Mention mention : tweet.entities.user_mentions) { QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.id, mention.id); - m_insertMentions.executeUpdate(); + m_insertMentions.getStmt().executeUpdate(); } - QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, m_insertPosted, tweet); - m_insertTweet.executeUpdate(); - m_insertProfile.executeUpdate(); - m_insertPosted.executeUpdate(); + QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet); + m_insertTweet.getStmt().executeUpdate(); + m_insertProfile.getStmt().executeUpdate(); List brands = getBrands(tweet); for (String brand : brands) { QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); - m_insertBrand.executeUpdate(); + m_insertBrand.getStmt().executeUpdate(); } } catch (SQLException ex) { Logger.getLogger(DataFiller.class.getName()).log(Level.SEVERE, null, ex); -- cgit v1.2.1