diff options
-rw-r--r-- | lib/commons-lang3-3.3.2.jar | bin | 0 -> 412740 bytes | |||
-rw-r--r-- | nbproject/project.properties | 4 | ||||
-rw-r--r-- | src/data/DebuggingJsonDeserializer.java | 41 | ||||
-rw-r--r-- | src/data/Tweet.java | 27 | ||||
-rw-r--r-- | src/data/User.java | 2 | ||||
-rw-r--r-- | src/data/ValidatingJsonDeserializer.java | 88 | ||||
-rw-r--r-- | src/database/ConnectionBuilder.java | 65 | ||||
-rw-r--r-- | src/database/DBConnection.java | 59 | ||||
-rw-r--r-- | src/database/NamedPreparedStatement.java | 69 | ||||
-rw-r--r-- | src/database/QueryUtils.java | 211 | ||||
-rw-r--r-- | src/io/TweetReader.java | 7 | ||||
-rw-r--r-- | src/main/DataFiller.java | 53 | ||||
-rw-r--r-- | src/main/Main.java | 111 | ||||
-rw-r--r-- | test/data/ValidatingJsonDeserializerTest.java | 165 |
14 files changed, 654 insertions, 248 deletions
diff --git a/lib/commons-lang3-3.3.2.jar b/lib/commons-lang3-3.3.2.jar Binary files differnew file mode 100644 index 0000000..bb06979 --- /dev/null +++ b/lib/commons-lang3-3.3.2.jar diff --git a/nbproject/project.properties b/nbproject/project.properties index d738523..f6acd32 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -29,13 +29,15 @@ dist.jar=${dist.dir}/Datafiller.jar dist.javadoc.dir=${dist.dir}/javadoc endorsed.classpath= excludes= +file.reference.commons-lang3-3.3.2.jar=lib/commons-lang3-3.3.2.jar file.reference.gson-2.2.4.jar=lib/gson-2.2.4.jar file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar includes=** jar.compress=false javac.classpath=\ ${file.reference.postgresql-9.3-1101.jdbc41.jar}:\ - ${file.reference.gson-2.2.4.jar} + ${file.reference.gson-2.2.4.jar}:\ + ${file.reference.commons-lang3-3.3.2.jar} # Space-separated list of extra javac options javac.compilerargs= javac.deprecation=false diff --git a/src/data/DebuggingJsonDeserializer.java b/src/data/DebuggingJsonDeserializer.java new file mode 100644 index 0000000..1b00d4d --- /dev/null +++ b/src/data/DebuggingJsonDeserializer.java @@ -0,0 +1,41 @@ +package data; + +import com.google.gson.GsonBuilder; +import com.google.gson.JsonDeserializationContext; +import com.google.gson.JsonElement; +import com.google.gson.JsonParseException; +import java.lang.reflect.Type; + +/** + * Validates the deserialization of a JSON string. The main motivation of this + * class is to print the property name of an object that throws an + * IllegalStateException because a string was expected, but a different type got + * returned. + * + * @author Peter Wu + */ +public class DebuggingJsonDeserializer extends ValidatingJsonDeserializer { + + private final Class overriddenClass; + + private DebuggingJsonDeserializer(Class overriddenClass) { + this.overriddenClass = overriddenClass; + } + + @Override + public Object deserialize(JsonElement je, Type type, + JsonDeserializationContext jdc) throws JsonParseException { + checkObject("", je, overriddenClass); + return null; + } + + public static void tryValidate(String json, Class cls) { + new GsonBuilder() + .registerTypeAdapter(Dummy.class, new DebuggingJsonDeserializer(cls)) + .create() + .fromJson(json, Dummy.class); + } + + private class Dummy { + } +} diff --git a/src/data/Tweet.java b/src/data/Tweet.java index 577b620..3caae6a 100644 --- a/src/data/Tweet.java +++ b/src/data/Tweet.java @@ -10,13 +10,19 @@ public class Tweet { public long id; public String lang; + @ValidatingJsonDeserializer.Nullable public long in_reply_to_user_id; public String created_at; public long favorite_count; - public String place; - public String coordinates; + @ValidatingJsonDeserializer.Nullable + @ValidatingJsonDeserializer.Validator + public Place place; + @ValidatingJsonDeserializer.Nullable + @ValidatingJsonDeserializer.Validator + public Coordinates coordinates; public String text; @ValidatingJsonDeserializer.Nullable + @ValidatingJsonDeserializer.Validator public Tweet retweeted_status; @ValidatingJsonDeserializer.Validator public Entities entities; @@ -30,6 +36,23 @@ public class Tweet { return gson.toJson(this); } + public static class Place { + + //public String id; // "a5b6bdd8008412b1" + //public String name; // "Danbury" + //public String country_code; // "US" + public String country; // "United States" + //public String url; // "https://api.twitter.com/1.1/geo/id/a5b6bdd8008412b1.json" + public String full_name; // "Danbury, CT" + } + + public static class Coordinates { + + //public String type; // always "Point"? + @ValidatingJsonDeserializer.ArrayValidator(minLen = 2, maxLen = 2) + public float[] coordinates; // e.g. [-73.49513755, 41.43286284] + } + public static class Entities { @ValidatingJsonDeserializer.Validator diff --git a/src/data/User.java b/src/data/User.java index 390d986..91b5e07 100644 --- a/src/data/User.java +++ b/src/data/User.java @@ -7,6 +7,7 @@ public class User { public long id; public String name; + @ValidatingJsonDeserializer.Nullable public String time_zone; public long statuses_count; public long followers_count; @@ -14,6 +15,7 @@ public class User { public String location; public String screen_name; public String created_at; + @ValidatingJsonDeserializer.Nullable @ValidatingJsonDeserializer.Validator public Entities entities; public String lang; diff --git a/src/data/ValidatingJsonDeserializer.java b/src/data/ValidatingJsonDeserializer.java index 8711f59..d5a8dbc 100644 --- a/src/data/ValidatingJsonDeserializer.java +++ b/src/data/ValidatingJsonDeserializer.java @@ -1,7 +1,6 @@ package data; import com.google.gson.Gson; -import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; import com.google.gson.JsonDeserializationContext; import com.google.gson.JsonDeserializer; @@ -25,46 +24,83 @@ public class ValidatingJsonDeserializer<T> implements JsonDeserializer<T> { @Override public T deserialize(JsonElement je, Type type, JsonDeserializationContext jdc) throws JsonParseException { - T obj = new Gson().fromJson(je, type); - checkObject(je, obj.getClass()); + T obj; + try { + obj = new Gson().fromJson(je, type); + } catch (JsonParseException jpe) { + DebuggingJsonDeserializer.tryValidate(je.toString(), (Class) type); + throw new JsonParseException("Debugger could not find a bug", jpe); + } + checkObject("", je, obj.getClass()); return obj; } - private void checkObject(JsonElement je, Class type) { - if (!je.isJsonObject()) { - throw new JsonParseException("Expected object: " + type.getName()); - } + void checkObject(String path, JsonElement je, Class type) + throws JsonParseException { JsonObject jsonObj = je.getAsJsonObject(); for (Field f : type.getDeclaredFields()) { - System.err.println("verif " + f.getName()); - if (!jsonObj.has(f.getName())) { + JsonElement val = jsonObj.get(f.getName()); + if (!jsonObj.has(f.getName()) || val.isJsonNull()) { if (f.getAnnotation(Nullable.class) != null) { // null allowed, skip continue; } - throw new JsonParseException("Missing field: " + f.getName()); + throw new JsonParseException("Missing field: " + path + f.getName()); } - tryValidateProperty(jsonObj.get(f.getName()), f); + if (f.getType().equals(String.class)) { + if (!val.isJsonPrimitive() || !val.getAsJsonPrimitive().isString()) { + throw new JsonParseException("Expected string: " + path + f.getName()); + } + } + tryValidateProperty(path, val, f); // TODO: validate type? } } - private void tryValidateProperty(JsonElement je, Field f) { + private void tryValidateProperty(String path, JsonElement je, Field f) + throws JsonParseException { + Class<?> type = f.getType(); // assume that this annotation is only applied to objects Validator v = f.getAnnotation(Validator.class); + ArrayValidator av = f.getAnnotation(ArrayValidator.class); + path += f.getName(); + if (av != null) { + if (!type.isArray()) { + throw new RuntimeException("Invalid " + av.getClass().getName() + + " + annotation for " + path); + } + if (!je.isJsonArray()) { + throw new JsonParseException("Expected array: " + path); + } + JsonArray ja = je.getAsJsonArray(); + int minLen = av.minLen(), maxLen = av.maxLen(); + if (minLen >= 0 && ja.size() < minLen) { + throw new JsonParseException("Array smaller than " + + minLen + ": " + path); + } + if (maxLen >= 0 && ja.size() > maxLen) { + throw new JsonParseException("Array larger than " + + maxLen + ": " + path); + } + } if (v != null) { - Class<?> type = f.getType(); if (type.isArray()) { + // the class expects an array, so the value must have one too. if (!je.isJsonArray()) { - throw new JsonParseException("Not an array: " + f.getName()); + throw new JsonParseException("Expected array: " + path); } JsonArray ja = je.getAsJsonArray(); type = type.getComponentType(); + // for each array element, check if the object is valid. for (JsonElement arr_je : ja) { - checkObject(arr_je, type); + checkObject(path + ".", arr_je, type); } } else { - checkObject(je, type); + // not an array, assume a verifiable object + if (!je.isJsonObject()) { + throw new JsonParseException("Expected object: " + path); + } + checkObject(path + ".", je, type); } } } @@ -74,9 +110,25 @@ public class ValidatingJsonDeserializer<T> implements JsonDeserializer<T> { */ @Retention(RetentionPolicy.RUNTIME) @Target(ElementType.FIELD) - public @interface Validator { + public @interface ArrayValidator { + + /** + * @return Minimal length for array types (-1 if not checked). + */ + int minLen() default -1; - Class deserializer() default ValidatingJsonDeserializer.class; + /** + * @return Maximum length for array types (-1 if not checked). + */ + int maxLen() default -1; + } + + /** + * Marks a member as object that should be validated too. + */ + @Retention(RetentionPolicy.RUNTIME) + @Target(ElementType.FIELD) + public @interface Validator { } /** diff --git a/src/database/ConnectionBuilder.java b/src/database/ConnectionBuilder.java new file mode 100644 index 0000000..74e5c33 --- /dev/null +++ b/src/database/ConnectionBuilder.java @@ -0,0 +1,65 @@ +package database; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; + +/** + * Builder for connection. + * + * @author Peter Wu + */ +public class ConnectionBuilder { + + private String dbms; + private String serverName; + private int port; + private String dbName; + private String username; + private String password; + + /** + * Sets up a ConnectionBuilder for PostgreSQL and serverName localhost. The + * username, password and database name must still be supplied. + */ + public ConnectionBuilder() { + dbms = "postgresql"; + serverName = "localhost"; + port = 5432; + } + + public ConnectionBuilder setDbms(String dbms) { + this.dbms = dbms; + return this; + } + + public ConnectionBuilder setServerName(String serverName) { + this.serverName = serverName; + return this; + } + + public ConnectionBuilder setPort(int port) { + this.port = port; + return this; + } + + public ConnectionBuilder setDbName(String dbName) { + this.dbName = dbName; + return this; + } + + public ConnectionBuilder setUsername(String username) { + this.username = username; + return this; + } + + public ConnectionBuilder setPassword(String password) { + this.password = password; + return this; + } + + public Connection create() throws SQLException { + String url = "jdbc:" + dbms + "://" + serverName + ":" + port + "/" + dbName; + return DriverManager.getConnection(url, username, password); + } +} diff --git a/src/database/DBConnection.java b/src/database/DBConnection.java deleted file mode 100644 index 330c2a5..0000000 --- a/src/database/DBConnection.java +++ /dev/null @@ -1,59 +0,0 @@ -package database; - -import java.sql.Connection; -import java.sql.DriverManager; -import java.sql.PreparedStatement; -import java.sql.SQLException; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * Create a persistent database connection. - * - * @author Maurice Laveaux - */ -public class DBConnection { - - /* The interface to the postgresql database connection. */ - private Connection m_connection; - - public DBConnection(final String hostaddress, - final String port, - final String databasename, - final String username, - final String password) { - - String url = "jdbc:postgresql://" + hostaddress + ":" + port + "/" + databasename; - - try { - m_connection = DriverManager.getConnection(url, username, password); - } catch (SQLException ex) { - //TODO: retry when db connection fails or something. - throw new RuntimeException("cannot connect to host: " + url); - } - } - - /** - * prepares a statement. - * - * @param query The query to prepare. - * @return A prepared statement. - */ - public PreparedStatement create(final String query) throws SQLException { - return m_connection.prepareStatement(query); - } - - /** - * Closes the connection if it exists. - */ - public void close() { - if (m_connection != null) { - try { - m_connection.close(); - } catch (SQLException ex) { - /* TODO: what to do here else. */ - Logger.getLogger(DBConnection.class.getName()).log(Level.SEVERE, null, ex); - } - } - } -} diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java new file mode 100644 index 0000000..c98edac --- /dev/null +++ b/src/database/NamedPreparedStatement.java @@ -0,0 +1,69 @@ +package database; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Allows a prepared statement to contain named parameters instead of a question + * mark position marker. + * + * @author Peter Wu + */ +public class NamedPreparedStatement { + + private final List<String> fields; + private final PreparedStatement stmt; + + public NamedPreparedStatement(Connection conn, String query) throws SQLException { + fields = new ArrayList<>(); + Pattern pattern = Pattern.compile(":\\w+"); + Matcher matcher = pattern.matcher(query); + while (matcher.find()) { + fields.add(matcher.group()); + } + String sql = query.replaceAll(pattern.pattern(), ""); + stmt = conn.prepareStatement(sql); + } + + private List<Integer> getParamIndices(String fieldName) { + List<Integer> indices = new ArrayList<>(); + int index = 0; + for (String name : fields) { + ++index; + if (name.equals(fieldName)) { + indices.add(index); + } + } + if (indices.isEmpty()) { + throw new RuntimeException("Missing " + fieldName + " in query!"); + } + return indices; + } + + public void setLong(String name, long l) throws SQLException { + for (int paramIndex : getParamIndices(name)) { + stmt.setLong(paramIndex, l); + } + } + + public void setString(String name, String str) throws SQLException { + for (int paramIndex : getParamIndices(name)) { + stmt.setString(paramIndex, str); + } + } + + public void setString(String name, int i) throws SQLException { + for (int paramIndex : getParamIndices(name)) { + stmt.setInt(paramIndex, i); + } + } + + public PreparedStatement getStmt() { + return stmt; + } +} diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java index d0a424d..66edd1c 100644 --- a/src/database/QueryUtils.java +++ b/src/database/QueryUtils.java @@ -12,128 +12,145 @@ import java.sql.SQLException; */ public class QueryUtils { - /** - * Create an insert tweet. - * - * @return A valid database query. - */ - public static String insertProfile() { - return "INSERT INTO twitteruser (userid,displayname,timezone,tweetcount," - + "followercount,followedcount,location,tweetname,createdat,language) " - + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM twitteruser WHERE userid= ? )"; - } + public final static String insertProfile + = buildQuery("twitteruser", new String[]{"userid"}, + "userid", "displayname", "timezone", "tweetcount", "followercount", + "followedcount", "location", "tweetname", "createdat", "language"); - public static String insertTweet() { - return "INSERT INTO tweet (tweetid,createdat,favcount,retweetcount,text," - + "coordinates,language,retweetid,replyid,place) " - + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM tweet WHERE tweetid=? )"; - } + public final static String insertTweet + = buildQuery("tweet", new String[]{"tweetid"}, + "tweetid", "createdat", "favcount", "retweetcount", "text", + "coordinates", "language", "retweetid", "replyid", "place", + "userid"); - public static String insertHash() { - return "INSERT INTO hashtag (tweetid, hashtag) " - + "SELECT ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM hashtag WHERE tweetid=? and hashtag =? )"; - } + public final static String insertHash + = buildQuery("hashtag", null, "tweetid", "hashtag"); - public static String insertUrl() { - return "INSERT INTO url (tweetid, url) " - + "SELECT ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM url WHERE tweetid=? and url =? )"; - } + // TODO: split url to userUrl and tweetUrl + public final static String insertUrl + = buildQuery("url", null, "tweetid", "url"); - public static String insertMentions() { - return "INSERT INTO mentionsuser (tweetid, userid) " - + "SELECT ?, ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM mentionsuser WHERE tweetid=? and userid =? )"; - } + public final static String insertMentions + = buildQuery("mentionsuser", null, "tweetid", "userid"); - public static String insertPosted() { - return "INSERT INTO ispostedby (tweetid,userid) " - + "SELECT ? , ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM ispostedby WHERE tweetid= ? )"; - } + public final static String insertBrand + = buildQuery("mentionsbrand", null, "tweetid", "brand"); + + /** + * Builds an upsert query for a table and keys. + * + * @param table Table to insert or update the data. + * @param primaryKeys The primary keys for the table or null if all keys + * form the primary key. + * @param keys The keys to be inserted or updated. + * @return A SQL query containing named parameters. + */ + private static String buildQuery(String table, String[] primaryKeys, + String... keys) { + String sep = ""; + String fields = ""; // a, b, c + String values = ""; // :a, :b, :c + String set_values = ""; // a = nv.a, b = nv.b, c = nv.c + String pkey_matches = ""; + for (String key : keys) { + fields += sep + key; + values += sep + ":" + key; + // "u" is "table that gets updated", "nv" is "new values" + set_values += sep + key + " = nv." + key; + sep = ", "; + } + sep = ""; + // assume that all fields form the primary key if pks are missing + if (primaryKeys == null) { + primaryKeys = keys; + } + for (String pkey : primaryKeys) { + pkey_matches = sep + "u." + pkey + " = nv." + pkey; + sep = " AND "; + } - public static String insertBrand() { - return "INSERT INTO mentionsbrand (tweetid,brand) " - + "SELECT ? , ? " - + "WHERE NOT EXISTS " - + "(SELECT * FROM mentionsbrand WHERE tweetid = ? AND brand = ?)"; + String sql; + // CTE that holds new values + sql = "WITH nv (" + fields + ") AS (\n"; + sql += " VALUES (" + values + ")\n"; + sql += ")\n"; + // CTE that tries to update the table with new values (PSQL extension) + sql += ", upsert AS (\n"; + sql += " UPDATE " + table + " SET " + set_values + " FROM nv\n"; + sql += " WHERE " + pkey_matches + " RETURNING 1\n"; + sql += ")\n"; + // if nothing got updated, insert new entry + sql += "INSERT INTO " + table + " (" + fields + ")\n"; + sql += "SELECT " + fields + " FROM nv\n"; + sql += "WHERE NOT EXISTS (SELECT 1 FROM upsert)"; + return sql; } - public static void setInsertParams(PreparedStatement tweetStatement, - PreparedStatement profileStatement, - PreparedStatement postedStatement, + public static void setInsertParams(NamedPreparedStatement tweetStatement, + NamedPreparedStatement profileStatement, Tweet tweet) throws SQLException { - tweetStatement.setLong(1, tweet.id); - tweetStatement.setString(2, tweet.created_at); - tweetStatement.setLong(3, tweet.favorite_count); - tweetStatement.setLong(4, tweet.retweet_count); - tweetStatement.setString(5, tweet.text); - tweetStatement.setString(6, tweet.coordinates); - tweetStatement.setString(7, tweet.lang); + tweetStatement.setLong("id", tweet.id); + tweetStatement.setString("createdat", tweet.created_at); + tweetStatement.setLong("favoritecount", tweet.favorite_count); + tweetStatement.setLong("retweetcount", tweet.retweet_count); + tweetStatement.setString("text", tweet.text); + if (tweet.coordinates != null) { + float[] coords = tweet.coordinates.coordinates; + String coords_str = String.format("%f,%f", coords[0], coords[1]); + tweetStatement.setString("coordinates", coords_str); + } else { + tweetStatement.setString("coordinates", null); + } + tweetStatement.setString("language", tweet.lang); if (tweet.retweeted_status != null) { - tweetStatement.setLong(8, tweet.retweeted_status.id); + tweetStatement.setLong("retweetid", tweet.retweeted_status.id); + } else { + tweetStatement.setLong("retweetid", 0); + } + tweetStatement.setLong("replyid", tweet.in_reply_to_user_id); + // TODO: place is not a string... + if (tweet.place != null) { + tweetStatement.setString("place", tweet.place.full_name + " " + tweet.place.country); } else { - tweetStatement.setLong(8, 0); + tweetStatement.setString("place", null); } - tweetStatement.setLong(9, tweet.in_reply_to_user_id); - tweetStatement.setString(10, tweet.place); - tweetStatement.setLong(11, tweet.id); User twuser = tweet.user; - profileStatement.setLong(1, twuser.id); - profileStatement.setString(2, twuser.name); - profileStatement.setString(3, twuser.time_zone); - profileStatement.setLong(4, twuser.statuses_count); - profileStatement.setLong(5, twuser.followers_count); - profileStatement.setLong(6, twuser.friends_count); - profileStatement.setString(7, twuser.location); - profileStatement.setString(8, twuser.screen_name); - profileStatement.setString(9, twuser.created_at); - profileStatement.setString(10, twuser.lang); - profileStatement.setLong(11, twuser.id); - postedStatement.setLong(1, tweet.id); - postedStatement.setLong(2, twuser.id); - postedStatement.setLong(3, tweet.id); + tweetStatement.setLong("userid", twuser.id); + profileStatement.setLong("userid", twuser.id); + profileStatement.setString("displayname", twuser.name); + profileStatement.setString("timezone", twuser.time_zone); + profileStatement.setLong("tweetcount", twuser.statuses_count); + profileStatement.setLong("followercount", twuser.followers_count); + profileStatement.setLong("followedcount", twuser.friends_count); + profileStatement.setString("location", twuser.location); + profileStatement.setString("tweetname", twuser.screen_name); + profileStatement.setString("createdat", twuser.created_at); + profileStatement.setString("language", twuser.lang); } - public static void setInsertBrandParams(PreparedStatement brandStatement, + public static void setInsertBrandParams(NamedPreparedStatement brandStmt, long id, String brand) throws SQLException { - brandStatement.setLong(1, id); - brandStatement.setString(2, brand); - brandStatement.setLong(3, id); - brandStatement.setString(4, brand); + brandStmt.setLong("tweetid", id); + brandStmt.setString("brand", brand); + // TODO: rating (positive) } - public static void setInsertHashParams(PreparedStatement hashStatement, + public static void setInsertHashParams(NamedPreparedStatement hashStmt, long id, String text) throws SQLException { - hashStatement.setLong(1, id); - hashStatement.setString(2, text); - hashStatement.setLong(3, id); - hashStatement.setString(4, text); + hashStmt.setLong("tweetid", id); + hashStmt.setString("hashtag", text); } - public static void setInsertUrlParams(PreparedStatement UrlStatement, + public static void setInsertUrlParams(NamedPreparedStatement urlStmt, long id, String text) throws SQLException { - UrlStatement.setLong(1, id); - UrlStatement.setString(2, text); - UrlStatement.setLong(3, id); - UrlStatement.setString(4, text); + urlStmt.setLong("tweetid", id); + urlStmt.setString("url", text); } - public static void setInsertMentionsParams(PreparedStatement UrlStatement, + public static void setInsertMentionsParams(NamedPreparedStatement mentStmt, long id, long userid) throws SQLException { - UrlStatement.setLong(1, id); - UrlStatement.setLong(2, userid); - UrlStatement.setLong(3, id); - UrlStatement.setLong(4, userid); + mentStmt.setLong("tweetid", id); + mentStmt.setLong("userid", userid); } } diff --git a/src/io/TweetReader.java b/src/io/TweetReader.java index bf89a2c..8968b84 100644 --- a/src/io/TweetReader.java +++ b/src/io/TweetReader.java @@ -40,7 +40,7 @@ public class TweetReader implements ITweetReader { try { tweet = gson.fromJson(line, Tweet.class); } catch (JsonSyntaxException ex) { - // TODO: handle something? + debugTweet(line, ex); throw ex; } } @@ -54,4 +54,9 @@ public class TweetReader implements ITweetReader { } catch (IOException ex) { } } + + private void debugTweet(String line, JsonSyntaxException ex) { + System.err.println("Faulty line: " + line); + ex.printStackTrace(); + } } diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java index a65a032..10ed774 100644 --- a/src/main/DataFiller.java +++ b/src/main/DataFiller.java @@ -1,9 +1,9 @@ package main; import data.Tweet; -import database.DBConnection; +import database.NamedPreparedStatement; import database.QueryUtils; -import java.sql.PreparedStatement; +import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; @@ -20,58 +20,52 @@ public class DataFiller { /** * The main database connection to fill. */ - private final DBConnection m_connection; + private final Connection m_connection; /** * A single insert tweet that can be used. */ - private PreparedStatement m_insertTweet; + private final NamedPreparedStatement m_insertTweet; /** * A single insert profiles that can be used. */ - private PreparedStatement m_insertProfile; - /** - * A single insert ispostedby that can be used. - */ - private PreparedStatement m_insertPosted; + private final NamedPreparedStatement m_insertProfile; /** * A single insert brand that can be used. */ - private PreparedStatement m_insertBrand; + private final NamedPreparedStatement m_insertBrand; /** * A single insert hashtag that can be used. */ - private PreparedStatement m_insertHash; + private final NamedPreparedStatement m_insertHash; /** * A single insert url that can be used. */ - private PreparedStatement m_insertUrl; + private final NamedPreparedStatement m_insertUrl; /** * A single insert url that can be used. */ - private PreparedStatement m_insertMentions; + private final NamedPreparedStatement m_insertMentions; /** * Create the datafiller object. * * @param connection The database connection to use. */ - public DataFiller(DBConnection connection) { + public DataFiller(Connection connection) { try { m_connection = connection; - m_insertTweet = m_connection.create(QueryUtils.insertTweet()); - m_insertProfile = m_connection.create(QueryUtils.insertProfile()); - m_insertPosted = m_connection.create(QueryUtils.insertPosted()); - m_insertBrand = m_connection.create(QueryUtils.insertBrand()); - m_insertHash = m_connection.create(QueryUtils.insertHash()); - m_insertUrl = m_connection.create(QueryUtils.insertUrl()); - m_insertMentions = m_connection.create(QueryUtils.insertMentions()); - + m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet); + m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile); + m_insertBrand = new NamedPreparedStatement(m_connection, QueryUtils.insertBrand); + m_insertHash = new NamedPreparedStatement(m_connection, QueryUtils.insertHash); + m_insertUrl = new NamedPreparedStatement(m_connection, QueryUtils.insertUrl); + m_insertMentions = new NamedPreparedStatement(m_connection, QueryUtils.insertMentions); } catch (SQLException ex) { throw new RuntimeException(ex.getMessage()); } @@ -81,25 +75,24 @@ public class DataFiller { try { for (Tweet.Hashtag hashtag : tweet.entities.hashtags) { QueryUtils.setInsertHashParams(m_insertHash, tweet.id, hashtag.text); - m_insertHash.executeUpdate(); + m_insertHash.getStmt().executeUpdate(); } for (Tweet.Url url : tweet.entities.urls) { QueryUtils.setInsertHashParams(m_insertUrl, tweet.id, url.expanded_url); - m_insertUrl.executeUpdate(); + m_insertUrl.getStmt().executeUpdate(); } for (Tweet.Mention mention : tweet.entities.user_mentions) { QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.id, mention.id); - m_insertMentions.executeUpdate(); + m_insertMentions.getStmt().executeUpdate(); } - QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, m_insertPosted, tweet); - m_insertTweet.executeUpdate(); - m_insertProfile.executeUpdate(); - m_insertPosted.executeUpdate(); + QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet); + m_insertTweet.getStmt().executeUpdate(); + m_insertProfile.getStmt().executeUpdate(); List<String> brands = getBrands(tweet); for (String brand : brands) { QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); - m_insertBrand.executeUpdate(); + m_insertBrand.getStmt().executeUpdate(); } } catch (SQLException ex) { Logger.getLogger(DataFiller.class.getName()).log(Level.SEVERE, null, ex); diff --git a/src/main/Main.java b/src/main/Main.java index 67032bf..b5df23f 100644 --- a/src/main/Main.java +++ b/src/main/Main.java @@ -1,13 +1,16 @@ package main; -import com.google.gson.JsonIOException; import com.google.gson.JsonSyntaxException; import data.Tweet; -import database.DBConnection; +import database.ConnectionBuilder; import io.FileTweetReader; import io.ITweetReader; import io.TweetReader; import java.io.IOException; +import java.sql.Connection; +import java.sql.SQLException; +import java.util.logging.Level; +import java.util.logging.Logger; /** * The main class. @@ -20,30 +23,60 @@ public class Main { * @param args the global arguments to pass to the program. */ public static void main(String[] args) { + Main main; try { - Main main = new Main(args); + main = new Main(args); } catch (IllegalArgumentException ex) { System.err.println(ex.getMessage()); System.exit(1); + return; } + main.run(); } - private String m_hostaddress; - private String m_filename; + private final ConnectionBuilder cb; + /** + * Whether the database should be contacted or not. + */ + private boolean skipDb; public Main(String[] args) { + // default connection properties + cb = new ConnectionBuilder() + .setServerName("localhost") + .setUsername("postgres") + .setPassword("2IOC02") + .setDbName("Twitter"); + skipDb = false; + /* parse the global options. */ parseGlobalOptions(args); + } - if (m_hostaddress == null) { - throw new IllegalArgumentException("Missing --dbhost to specify the hostaddress."); + private void printTweets(ITweetReader reader) throws IOException { + Tweet tweet; + long tweetNo = 1; + while ((tweet = reader.getTweet()) != null) { + System.out.println("/*" + tweetNo++ + "*/ " + tweet); } + } - DBConnection connection = new DBConnection(m_hostaddress, "5432", "Twitter", "postgres", "2IOC02"); - /* create the object that fills the database */ - DataFiller filler = new DataFiller(connection); + private void tweetsToDb(ITweetReader reader) throws IOException { + Tweet tweet; + try (Connection connection = cb.create()) { + /* create the object that fills the database */ + DataFiller filler = new DataFiller(connection); + while ((tweet = reader.getTweet()) != null) { + filler.processTweet(tweet); + } + } catch (SQLException ex) { + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, + "DB error", ex); + } + } + public void run() { ITweetReader reader = null; try { if (m_filename == null) { @@ -51,10 +84,10 @@ public class Main { } else { reader = new FileTweetReader(m_filename); } - - Tweet tweet; - while ((tweet = reader.getTweet()) != null) { - filler.processTweet(tweet); + if (skipDb) { + printTweets(reader); + } else { + tweetsToDb(reader); } } catch (JsonSyntaxException ex) { System.err.println("Got an invalid tweet: " + ex); @@ -65,42 +98,43 @@ public class Main { reader.close(); } } - - connection.close(); - - System.out.print("exit succesfull."); } - private void parseGlobalOptions(String[] args) { + private void parseGlobalOptions(String[] args) + throws IllegalArgumentException { /* parse global options */ for (int i = 0; i < args.length; i++) { if ("--help".equals(args[i])) { printHelp(); + System.exit(0); } else if ("--dbhost".equals(args[i])) { - m_hostaddress = getParam(args, ++i); + cb.setServerName(getArg(args, ++i, "--dbhost")); + } else if ("--dbuser".equals(args[i])) { + cb.setUsername(getArg(args, ++i, "--dbuser")); + } else if ("--dbpass".equals(args[i])) { + cb.setPassword(getArg(args, ++i, "--dbpass")); + } else if ("--dbname".equals(args[i])) { + cb.setDbName(getArg(args, ++i, "--dbname")); + } else if ("--skipdb".equals(args[i])) { + skipDb = true; } else if (args[i].startsWith("-")) { throw new IllegalArgumentException("Invalid option: " + args[i]); } else { /* This should be the filename */ - m_filename = getParam(args, i); + m_filename = args[i]; } } - - if (args.length == 0) { - throw new IllegalArgumentException("No parameters specified, see --help for usage info."); - } } /** * Read an extra option for a command. */ - private String getParam(String[] args, Integer index) { - if (index + 1 <= args.length) { - index++; - return args[index - 1]; - } else { - throw new IllegalArgumentException("An extra option was missing."); + private String getArg(String[] params, int index, String name) { + if (index >= params.length) { + System.err.println("Missing argument for parameter " + name); + System.exit(1); } + return params[index]; } /** @@ -113,11 +147,16 @@ public class Main { } private final static String[] HELP = { + "Usage: java -jar DataFiller.jar [options] [tweets-file]", + "", "Global options:", - " --help Print this help text.", - " --dbhost <ip> Specify the database host ipaddress.", - " --file <tweet-filename> Specify the tweet file to read from.", - " ", - " If no --file was specified, program reads from standard input." + " --help Print this help text.", + " --dbhost HOST Database host (defaults to 'localhost')", + " --dbuser USER Database username (defaults to 'postgres')", + " --dbpass PASS Database password (defaults to '2IOC02')", + " --dbname NAME Database name (defaults to 'Twitter')", + " --skipdb Do not contact the database at all, just print data.", + "", + "If no tweets file is given, data will be read from standard input." }; } diff --git a/test/data/ValidatingJsonDeserializerTest.java b/test/data/ValidatingJsonDeserializerTest.java index ef19cd3..b51631a 100644 --- a/test/data/ValidatingJsonDeserializerTest.java +++ b/test/data/ValidatingJsonDeserializerTest.java @@ -3,9 +3,12 @@ package data; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonArray; +import com.google.gson.JsonElement; import com.google.gson.JsonNull; import com.google.gson.JsonObject; import com.google.gson.JsonParseException; +import com.google.gson.JsonPrimitive; +import org.apache.commons.lang3.StringUtils; import org.junit.Test; import static org.junit.Assert.*; @@ -358,8 +361,8 @@ public class ValidatingJsonDeserializerTest { tweet.addProperty("in_reply_to_user_id", 4); tweet.addProperty("created_at", "X"); tweet.addProperty("favorite_count", 4); - tweet.addProperty("place", "X"); - tweet.addProperty("coordinates", "X"); + tweet.add("place", JsonNull.INSTANCE); + tweet.add("coordinates", JsonNull.INSTANCE); tweet.addProperty("text", "X"); tweet.add("retweeted_status", JsonNull.INSTANCE); // Tweet object JsonObject entities = new JsonObject(); @@ -384,13 +387,34 @@ public class ValidatingJsonDeserializerTest { obj.remove(prop); } + private void addProperty(JsonObject obj, JsonElement val, String... names) { + String prop; + for (int i = 0; i < names.length - 1; i++) { + prop = names[i]; + if (!obj.has(prop)) { + obj.add(prop, new JsonObject()); + } + obj = obj.getAsJsonObject(prop); + } + prop = names[names.length - 1]; + obj.add(prop, val); + } + private void checkImpairedTweet(String... names) { JsonObject tweet = buildMinimalTweet(buildMinimalUser()); removeProperty(tweet, names); - String prop = names[names.length - 1]; + String prop = StringUtils.join(names, "."); checkTweetFail(tweet, "Missing field: " + prop); } + private void checkImpairedUser(String... names) { + JsonObject user = buildMinimalUser(); + removeProperty(user, names); + JsonObject tweet = buildMinimalTweet(user); + String prop = StringUtils.join(names, "."); + checkTweetFail(tweet, "Missing field: user." + prop); + } + private void checkTweetFail(JsonObject tweet, String exp) { Gson gson = new Gson(); System.out.println("Checking for failure: " + exp); @@ -419,7 +443,55 @@ public class ValidatingJsonDeserializerTest { @Test public void testTweet() { + checkImpairedTweet("id"); + checkImpairedTweet("lang"); + checkImpairedTweet("created_at"); + checkImpairedTweet("favorite_count"); checkImpairedTweet("text"); + checkImpairedTweet("retweet_count"); + checkImpairedTweet("user"); + } + + @Test + public void testTweetNulls() { + // place can be null + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(tweet, "place"); + checkTweetPass(tweet); + + tweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(tweet, "in_reply_to_user_id"); + checkTweetPass(tweet); + + tweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(tweet, "coordinates"); + checkTweetPass(tweet); + } + + @Test + public void testTweetCoordinates() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + addProperty(tweet, new JsonPrimitive("X"), "coordinates"); + checkTweetFail(tweet, "Expected object: coordinates"); + + JsonObject coords = new JsonObject(); + // overwrite coordinates with object + addProperty(tweet, coords, "coordinates"); + checkTweetFail(tweet, "Missing field: coordinates.coordinates"); + + // set coordinates.coordinates + JsonArray coordsFloat = new JsonArray(); + coords.add("coordinates", coordsFloat); + checkTweetFail(tweet, "Array smaller than 2: coordinates.coordinates"); + + coordsFloat.add(new JsonPrimitive(1.0f)); + checkTweetFail(tweet, "Array smaller than 2: coordinates.coordinates"); + + coordsFloat.add(new JsonPrimitive(1.0f)); + checkTweetPass(tweet); + + coordsFloat.add(new JsonPrimitive(1.0f)); + checkTweetFail(tweet, "Array larger than 2: coordinates.coordinates"); } @Test @@ -430,9 +502,94 @@ public class ValidatingJsonDeserializerTest { } @Test + public void testUser() { + checkImpairedUser("id"); + checkImpairedUser("name"); + checkImpairedUser("statuses_count"); + checkImpairedUser("followers_count"); + checkImpairedUser("friends_count"); + checkImpairedUser("location"); + checkImpairedUser("screen_name"); + checkImpairedUser("created_at"); + checkImpairedUser("lang"); + } + + @Test + public void testNullUserEntities() { + // entities can be null + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(tweet, "user", "entities"); + checkTweetPass(tweet); + + tweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(tweet, "user", "time_zone"); + checkTweetPass(tweet); + } + + @Test public void testUserEntities() { - checkImpairedTweet("user", "entities"); checkImpairedTweet("user", "entities", "url"); checkImpairedTweet("user", "entities", "url", "urls"); } + + @Test + public void testRetweetedStatus() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + JsonObject rtUser = buildMinimalUser(); + String screen_name = "Somebody"; + rtUser.addProperty("screen_name", screen_name); + JsonObject retweet = buildMinimalTweet(rtUser); + tweet.add("retweeted_status", retweet); + Tweet parsedTweet = checkTweetPass(tweet); + assertNotNull(parsedTweet.retweeted_status); + assertEquals(screen_name, parsedTweet.retweeted_status.user.screen_name); + } + + @Test + public void testRetweetedMissingText() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + JsonObject retweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(retweet, "text"); + tweet.add("retweeted_status", retweet); + checkTweetFail(tweet, "Missing field: retweeted_status.text"); + } + + @Test + public void testNullPlace() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + removeProperty(tweet, "place"); + tweet.add("place", null); + checkTweetPass(tweet); + } + + @Test + public void testWrongType() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + addProperty(tweet, new JsonPrimitive(1), "text"); + checkTweetFail(tweet, "Expected string: text"); + } + + @Test + public void testWrongTypeArray() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + addProperty(tweet, new JsonPrimitive(1), "entities", "urls"); + checkTweetFail(tweet, "Expected array: entities.urls"); + } + + @Test + public void testWrongTypeObject() { + JsonObject tweet = buildMinimalTweet(buildMinimalUser()); + addProperty(tweet, new JsonPrimitive(1), "entities"); + checkTweetFail(tweet, "Expected object: entities"); + } + + @Test + public void testNotATweetObject() { + try { + DebuggingJsonDeserializer.tryValidate("{}", Tweet.class); + fail("DebuggingJsonDeserializer must also check for type."); + } catch (JsonParseException ex) { + assertEquals("Missing field: id", ex.getMessage()); + } + } } |