summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/commons-lang3-3.3.2.jarbin0 -> 412740 bytes
-rw-r--r--nbproject/project.properties4
-rw-r--r--src/data/DebuggingJsonDeserializer.java41
-rw-r--r--src/data/Tweet.java27
-rw-r--r--src/data/User.java2
-rw-r--r--src/data/ValidatingJsonDeserializer.java88
-rw-r--r--src/database/ConnectionBuilder.java65
-rw-r--r--src/database/DBConnection.java59
-rw-r--r--src/database/NamedPreparedStatement.java69
-rw-r--r--src/database/QueryUtils.java211
-rw-r--r--src/io/TweetReader.java7
-rw-r--r--src/main/DataFiller.java53
-rw-r--r--src/main/Main.java111
-rw-r--r--test/data/ValidatingJsonDeserializerTest.java165
14 files changed, 654 insertions, 248 deletions
diff --git a/lib/commons-lang3-3.3.2.jar b/lib/commons-lang3-3.3.2.jar
new file mode 100644
index 0000000..bb06979
--- /dev/null
+++ b/lib/commons-lang3-3.3.2.jar
Binary files differ
diff --git a/nbproject/project.properties b/nbproject/project.properties
index d738523..f6acd32 100644
--- a/nbproject/project.properties
+++ b/nbproject/project.properties
@@ -29,13 +29,15 @@ dist.jar=${dist.dir}/Datafiller.jar
dist.javadoc.dir=${dist.dir}/javadoc
endorsed.classpath=
excludes=
+file.reference.commons-lang3-3.3.2.jar=lib/commons-lang3-3.3.2.jar
file.reference.gson-2.2.4.jar=lib/gson-2.2.4.jar
file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
includes=**
jar.compress=false
javac.classpath=\
${file.reference.postgresql-9.3-1101.jdbc41.jar}:\
- ${file.reference.gson-2.2.4.jar}
+ ${file.reference.gson-2.2.4.jar}:\
+ ${file.reference.commons-lang3-3.3.2.jar}
# Space-separated list of extra javac options
javac.compilerargs=
javac.deprecation=false
diff --git a/src/data/DebuggingJsonDeserializer.java b/src/data/DebuggingJsonDeserializer.java
new file mode 100644
index 0000000..1b00d4d
--- /dev/null
+++ b/src/data/DebuggingJsonDeserializer.java
@@ -0,0 +1,41 @@
+package data;
+
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonDeserializationContext;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonParseException;
+import java.lang.reflect.Type;
+
+/**
+ * Validates the deserialization of a JSON string. The main motivation of this
+ * class is to print the property name of an object that throws an
+ * IllegalStateException because a string was expected, but a different type got
+ * returned.
+ *
+ * @author Peter Wu
+ */
+public class DebuggingJsonDeserializer extends ValidatingJsonDeserializer {
+
+ private final Class overriddenClass;
+
+ private DebuggingJsonDeserializer(Class overriddenClass) {
+ this.overriddenClass = overriddenClass;
+ }
+
+ @Override
+ public Object deserialize(JsonElement je, Type type,
+ JsonDeserializationContext jdc) throws JsonParseException {
+ checkObject("", je, overriddenClass);
+ return null;
+ }
+
+ public static void tryValidate(String json, Class cls) {
+ new GsonBuilder()
+ .registerTypeAdapter(Dummy.class, new DebuggingJsonDeserializer(cls))
+ .create()
+ .fromJson(json, Dummy.class);
+ }
+
+ private class Dummy {
+ }
+}
diff --git a/src/data/Tweet.java b/src/data/Tweet.java
index 577b620..3caae6a 100644
--- a/src/data/Tweet.java
+++ b/src/data/Tweet.java
@@ -10,13 +10,19 @@ public class Tweet {
public long id;
public String lang;
+ @ValidatingJsonDeserializer.Nullable
public long in_reply_to_user_id;
public String created_at;
public long favorite_count;
- public String place;
- public String coordinates;
+ @ValidatingJsonDeserializer.Nullable
+ @ValidatingJsonDeserializer.Validator
+ public Place place;
+ @ValidatingJsonDeserializer.Nullable
+ @ValidatingJsonDeserializer.Validator
+ public Coordinates coordinates;
public String text;
@ValidatingJsonDeserializer.Nullable
+ @ValidatingJsonDeserializer.Validator
public Tweet retweeted_status;
@ValidatingJsonDeserializer.Validator
public Entities entities;
@@ -30,6 +36,23 @@ public class Tweet {
return gson.toJson(this);
}
+ public static class Place {
+
+ //public String id; // "a5b6bdd8008412b1"
+ //public String name; // "Danbury"
+ //public String country_code; // "US"
+ public String country; // "United States"
+ //public String url; // "https://api.twitter.com/1.1/geo/id/a5b6bdd8008412b1.json"
+ public String full_name; // "Danbury, CT"
+ }
+
+ public static class Coordinates {
+
+ //public String type; // always "Point"?
+ @ValidatingJsonDeserializer.ArrayValidator(minLen = 2, maxLen = 2)
+ public float[] coordinates; // e.g. [-73.49513755, 41.43286284]
+ }
+
public static class Entities {
@ValidatingJsonDeserializer.Validator
diff --git a/src/data/User.java b/src/data/User.java
index 390d986..91b5e07 100644
--- a/src/data/User.java
+++ b/src/data/User.java
@@ -7,6 +7,7 @@ public class User {
public long id;
public String name;
+ @ValidatingJsonDeserializer.Nullable
public String time_zone;
public long statuses_count;
public long followers_count;
@@ -14,6 +15,7 @@ public class User {
public String location;
public String screen_name;
public String created_at;
+ @ValidatingJsonDeserializer.Nullable
@ValidatingJsonDeserializer.Validator
public Entities entities;
public String lang;
diff --git a/src/data/ValidatingJsonDeserializer.java b/src/data/ValidatingJsonDeserializer.java
index 8711f59..d5a8dbc 100644
--- a/src/data/ValidatingJsonDeserializer.java
+++ b/src/data/ValidatingJsonDeserializer.java
@@ -1,7 +1,6 @@
package data;
import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonDeserializationContext;
import com.google.gson.JsonDeserializer;
@@ -25,46 +24,83 @@ public class ValidatingJsonDeserializer<T> implements JsonDeserializer<T> {
@Override
public T deserialize(JsonElement je, Type type,
JsonDeserializationContext jdc) throws JsonParseException {
- T obj = new Gson().fromJson(je, type);
- checkObject(je, obj.getClass());
+ T obj;
+ try {
+ obj = new Gson().fromJson(je, type);
+ } catch (JsonParseException jpe) {
+ DebuggingJsonDeserializer.tryValidate(je.toString(), (Class) type);
+ throw new JsonParseException("Debugger could not find a bug", jpe);
+ }
+ checkObject("", je, obj.getClass());
return obj;
}
- private void checkObject(JsonElement je, Class type) {
- if (!je.isJsonObject()) {
- throw new JsonParseException("Expected object: " + type.getName());
- }
+ void checkObject(String path, JsonElement je, Class type)
+ throws JsonParseException {
JsonObject jsonObj = je.getAsJsonObject();
for (Field f : type.getDeclaredFields()) {
- System.err.println("verif " + f.getName());
- if (!jsonObj.has(f.getName())) {
+ JsonElement val = jsonObj.get(f.getName());
+ if (!jsonObj.has(f.getName()) || val.isJsonNull()) {
if (f.getAnnotation(Nullable.class) != null) {
// null allowed, skip
continue;
}
- throw new JsonParseException("Missing field: " + f.getName());
+ throw new JsonParseException("Missing field: " + path + f.getName());
}
- tryValidateProperty(jsonObj.get(f.getName()), f);
+ if (f.getType().equals(String.class)) {
+ if (!val.isJsonPrimitive() || !val.getAsJsonPrimitive().isString()) {
+ throw new JsonParseException("Expected string: " + path + f.getName());
+ }
+ }
+ tryValidateProperty(path, val, f);
// TODO: validate type?
}
}
- private void tryValidateProperty(JsonElement je, Field f) {
+ private void tryValidateProperty(String path, JsonElement je, Field f)
+ throws JsonParseException {
+ Class<?> type = f.getType();
// assume that this annotation is only applied to objects
Validator v = f.getAnnotation(Validator.class);
+ ArrayValidator av = f.getAnnotation(ArrayValidator.class);
+ path += f.getName();
+ if (av != null) {
+ if (!type.isArray()) {
+ throw new RuntimeException("Invalid " + av.getClass().getName()
+ + " + annotation for " + path);
+ }
+ if (!je.isJsonArray()) {
+ throw new JsonParseException("Expected array: " + path);
+ }
+ JsonArray ja = je.getAsJsonArray();
+ int minLen = av.minLen(), maxLen = av.maxLen();
+ if (minLen >= 0 && ja.size() < minLen) {
+ throw new JsonParseException("Array smaller than "
+ + minLen + ": " + path);
+ }
+ if (maxLen >= 0 && ja.size() > maxLen) {
+ throw new JsonParseException("Array larger than "
+ + maxLen + ": " + path);
+ }
+ }
if (v != null) {
- Class<?> type = f.getType();
if (type.isArray()) {
+ // the class expects an array, so the value must have one too.
if (!je.isJsonArray()) {
- throw new JsonParseException("Not an array: " + f.getName());
+ throw new JsonParseException("Expected array: " + path);
}
JsonArray ja = je.getAsJsonArray();
type = type.getComponentType();
+ // for each array element, check if the object is valid.
for (JsonElement arr_je : ja) {
- checkObject(arr_je, type);
+ checkObject(path + ".", arr_je, type);
}
} else {
- checkObject(je, type);
+ // not an array, assume a verifiable object
+ if (!je.isJsonObject()) {
+ throw new JsonParseException("Expected object: " + path);
+ }
+ checkObject(path + ".", je, type);
}
}
}
@@ -74,9 +110,25 @@ public class ValidatingJsonDeserializer<T> implements JsonDeserializer<T> {
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.FIELD)
- public @interface Validator {
+ public @interface ArrayValidator {
+
+ /**
+ * @return Minimal length for array types (-1 if not checked).
+ */
+ int minLen() default -1;
- Class deserializer() default ValidatingJsonDeserializer.class;
+ /**
+ * @return Maximum length for array types (-1 if not checked).
+ */
+ int maxLen() default -1;
+ }
+
+ /**
+ * Marks a member as object that should be validated too.
+ */
+ @Retention(RetentionPolicy.RUNTIME)
+ @Target(ElementType.FIELD)
+ public @interface Validator {
}
/**
diff --git a/src/database/ConnectionBuilder.java b/src/database/ConnectionBuilder.java
new file mode 100644
index 0000000..74e5c33
--- /dev/null
+++ b/src/database/ConnectionBuilder.java
@@ -0,0 +1,65 @@
+package database;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.SQLException;
+
+/**
+ * Builder for connection.
+ *
+ * @author Peter Wu
+ */
+public class ConnectionBuilder {
+
+ private String dbms;
+ private String serverName;
+ private int port;
+ private String dbName;
+ private String username;
+ private String password;
+
+ /**
+ * Sets up a ConnectionBuilder for PostgreSQL and serverName localhost. The
+ * username, password and database name must still be supplied.
+ */
+ public ConnectionBuilder() {
+ dbms = "postgresql";
+ serverName = "localhost";
+ port = 5432;
+ }
+
+ public ConnectionBuilder setDbms(String dbms) {
+ this.dbms = dbms;
+ return this;
+ }
+
+ public ConnectionBuilder setServerName(String serverName) {
+ this.serverName = serverName;
+ return this;
+ }
+
+ public ConnectionBuilder setPort(int port) {
+ this.port = port;
+ return this;
+ }
+
+ public ConnectionBuilder setDbName(String dbName) {
+ this.dbName = dbName;
+ return this;
+ }
+
+ public ConnectionBuilder setUsername(String username) {
+ this.username = username;
+ return this;
+ }
+
+ public ConnectionBuilder setPassword(String password) {
+ this.password = password;
+ return this;
+ }
+
+ public Connection create() throws SQLException {
+ String url = "jdbc:" + dbms + "://" + serverName + ":" + port + "/" + dbName;
+ return DriverManager.getConnection(url, username, password);
+ }
+}
diff --git a/src/database/DBConnection.java b/src/database/DBConnection.java
deleted file mode 100644
index 330c2a5..0000000
--- a/src/database/DBConnection.java
+++ /dev/null
@@ -1,59 +0,0 @@
-package database;
-
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.SQLException;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-
-/**
- * Create a persistent database connection.
- *
- * @author Maurice Laveaux
- */
-public class DBConnection {
-
- /* The interface to the postgresql database connection. */
- private Connection m_connection;
-
- public DBConnection(final String hostaddress,
- final String port,
- final String databasename,
- final String username,
- final String password) {
-
- String url = "jdbc:postgresql://" + hostaddress + ":" + port + "/" + databasename;
-
- try {
- m_connection = DriverManager.getConnection(url, username, password);
- } catch (SQLException ex) {
- //TODO: retry when db connection fails or something.
- throw new RuntimeException("cannot connect to host: " + url);
- }
- }
-
- /**
- * prepares a statement.
- *
- * @param query The query to prepare.
- * @return A prepared statement.
- */
- public PreparedStatement create(final String query) throws SQLException {
- return m_connection.prepareStatement(query);
- }
-
- /**
- * Closes the connection if it exists.
- */
- public void close() {
- if (m_connection != null) {
- try {
- m_connection.close();
- } catch (SQLException ex) {
- /* TODO: what to do here else. */
- Logger.getLogger(DBConnection.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- }
-}
diff --git a/src/database/NamedPreparedStatement.java b/src/database/NamedPreparedStatement.java
new file mode 100644
index 0000000..c98edac
--- /dev/null
+++ b/src/database/NamedPreparedStatement.java
@@ -0,0 +1,69 @@
+package database;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Allows a prepared statement to contain named parameters instead of a question
+ * mark position marker.
+ *
+ * @author Peter Wu
+ */
+public class NamedPreparedStatement {
+
+ private final List<String> fields;
+ private final PreparedStatement stmt;
+
+ public NamedPreparedStatement(Connection conn, String query) throws SQLException {
+ fields = new ArrayList<>();
+ Pattern pattern = Pattern.compile(":\\w+");
+ Matcher matcher = pattern.matcher(query);
+ while (matcher.find()) {
+ fields.add(matcher.group());
+ }
+ String sql = query.replaceAll(pattern.pattern(), "");
+ stmt = conn.prepareStatement(sql);
+ }
+
+ private List<Integer> getParamIndices(String fieldName) {
+ List<Integer> indices = new ArrayList<>();
+ int index = 0;
+ for (String name : fields) {
+ ++index;
+ if (name.equals(fieldName)) {
+ indices.add(index);
+ }
+ }
+ if (indices.isEmpty()) {
+ throw new RuntimeException("Missing " + fieldName + " in query!");
+ }
+ return indices;
+ }
+
+ public void setLong(String name, long l) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setLong(paramIndex, l);
+ }
+ }
+
+ public void setString(String name, String str) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setString(paramIndex, str);
+ }
+ }
+
+ public void setString(String name, int i) throws SQLException {
+ for (int paramIndex : getParamIndices(name)) {
+ stmt.setInt(paramIndex, i);
+ }
+ }
+
+ public PreparedStatement getStmt() {
+ return stmt;
+ }
+}
diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java
index d0a424d..66edd1c 100644
--- a/src/database/QueryUtils.java
+++ b/src/database/QueryUtils.java
@@ -12,128 +12,145 @@ import java.sql.SQLException;
*/
public class QueryUtils {
- /**
- * Create an insert tweet.
- *
- * @return A valid database query.
- */
- public static String insertProfile() {
- return "INSERT INTO twitteruser (userid,displayname,timezone,tweetcount,"
- + "followercount,followedcount,location,tweetname,createdat,language) "
- + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM twitteruser WHERE userid= ? )";
- }
+ public final static String insertProfile
+ = buildQuery("twitteruser", new String[]{"userid"},
+ "userid", "displayname", "timezone", "tweetcount", "followercount",
+ "followedcount", "location", "tweetname", "createdat", "language");
- public static String insertTweet() {
- return "INSERT INTO tweet (tweetid,createdat,favcount,retweetcount,text,"
- + "coordinates,language,retweetid,replyid,place) "
- + "SELECT ?, ?, ?, ?, ?, ?, ?, ?, ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM tweet WHERE tweetid=? )";
- }
+ public final static String insertTweet
+ = buildQuery("tweet", new String[]{"tweetid"},
+ "tweetid", "createdat", "favcount", "retweetcount", "text",
+ "coordinates", "language", "retweetid", "replyid", "place",
+ "userid");
- public static String insertHash() {
- return "INSERT INTO hashtag (tweetid, hashtag) "
- + "SELECT ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM hashtag WHERE tweetid=? and hashtag =? )";
- }
+ public final static String insertHash
+ = buildQuery("hashtag", null, "tweetid", "hashtag");
- public static String insertUrl() {
- return "INSERT INTO url (tweetid, url) "
- + "SELECT ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM url WHERE tweetid=? and url =? )";
- }
+ // TODO: split url to userUrl and tweetUrl
+ public final static String insertUrl
+ = buildQuery("url", null, "tweetid", "url");
- public static String insertMentions() {
- return "INSERT INTO mentionsuser (tweetid, userid) "
- + "SELECT ?, ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM mentionsuser WHERE tweetid=? and userid =? )";
- }
+ public final static String insertMentions
+ = buildQuery("mentionsuser", null, "tweetid", "userid");
- public static String insertPosted() {
- return "INSERT INTO ispostedby (tweetid,userid) "
- + "SELECT ? , ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM ispostedby WHERE tweetid= ? )";
- }
+ public final static String insertBrand
+ = buildQuery("mentionsbrand", null, "tweetid", "brand");
+
+ /**
+ * Builds an upsert query for a table and keys.
+ *
+ * @param table Table to insert or update the data.
+ * @param primaryKeys The primary keys for the table or null if all keys
+ * form the primary key.
+ * @param keys The keys to be inserted or updated.
+ * @return A SQL query containing named parameters.
+ */
+ private static String buildQuery(String table, String[] primaryKeys,
+ String... keys) {
+ String sep = "";
+ String fields = ""; // a, b, c
+ String values = ""; // :a, :b, :c
+ String set_values = ""; // a = nv.a, b = nv.b, c = nv.c
+ String pkey_matches = "";
+ for (String key : keys) {
+ fields += sep + key;
+ values += sep + ":" + key;
+ // "u" is "table that gets updated", "nv" is "new values"
+ set_values += sep + key + " = nv." + key;
+ sep = ", ";
+ }
+ sep = "";
+ // assume that all fields form the primary key if pks are missing
+ if (primaryKeys == null) {
+ primaryKeys = keys;
+ }
+ for (String pkey : primaryKeys) {
+ pkey_matches = sep + "u." + pkey + " = nv." + pkey;
+ sep = " AND ";
+ }
- public static String insertBrand() {
- return "INSERT INTO mentionsbrand (tweetid,brand) "
- + "SELECT ? , ? "
- + "WHERE NOT EXISTS "
- + "(SELECT * FROM mentionsbrand WHERE tweetid = ? AND brand = ?)";
+ String sql;
+ // CTE that holds new values
+ sql = "WITH nv (" + fields + ") AS (\n";
+ sql += " VALUES (" + values + ")\n";
+ sql += ")\n";
+ // CTE that tries to update the table with new values (PSQL extension)
+ sql += ", upsert AS (\n";
+ sql += " UPDATE " + table + " SET " + set_values + " FROM nv\n";
+ sql += " WHERE " + pkey_matches + " RETURNING 1\n";
+ sql += ")\n";
+ // if nothing got updated, insert new entry
+ sql += "INSERT INTO " + table + " (" + fields + ")\n";
+ sql += "SELECT " + fields + " FROM nv\n";
+ sql += "WHERE NOT EXISTS (SELECT 1 FROM upsert)";
+ return sql;
}
- public static void setInsertParams(PreparedStatement tweetStatement,
- PreparedStatement profileStatement,
- PreparedStatement postedStatement,
+ public static void setInsertParams(NamedPreparedStatement tweetStatement,
+ NamedPreparedStatement profileStatement,
Tweet tweet) throws SQLException {
- tweetStatement.setLong(1, tweet.id);
- tweetStatement.setString(2, tweet.created_at);
- tweetStatement.setLong(3, tweet.favorite_count);
- tweetStatement.setLong(4, tweet.retweet_count);
- tweetStatement.setString(5, tweet.text);
- tweetStatement.setString(6, tweet.coordinates);
- tweetStatement.setString(7, tweet.lang);
+ tweetStatement.setLong("id", tweet.id);
+ tweetStatement.setString("createdat", tweet.created_at);
+ tweetStatement.setLong("favoritecount", tweet.favorite_count);
+ tweetStatement.setLong("retweetcount", tweet.retweet_count);
+ tweetStatement.setString("text", tweet.text);
+ if (tweet.coordinates != null) {
+ float[] coords = tweet.coordinates.coordinates;
+ String coords_str = String.format("%f,%f", coords[0], coords[1]);
+ tweetStatement.setString("coordinates", coords_str);
+ } else {
+ tweetStatement.setString("coordinates", null);
+ }
+ tweetStatement.setString("language", tweet.lang);
if (tweet.retweeted_status != null) {
- tweetStatement.setLong(8, tweet.retweeted_status.id);
+ tweetStatement.setLong("retweetid", tweet.retweeted_status.id);
+ } else {
+ tweetStatement.setLong("retweetid", 0);
+ }
+ tweetStatement.setLong("replyid", tweet.in_reply_to_user_id);
+ // TODO: place is not a string...
+ if (tweet.place != null) {
+ tweetStatement.setString("place", tweet.place.full_name + " " + tweet.place.country);
} else {
- tweetStatement.setLong(8, 0);
+ tweetStatement.setString("place", null);
}
- tweetStatement.setLong(9, tweet.in_reply_to_user_id);
- tweetStatement.setString(10, tweet.place);
- tweetStatement.setLong(11, tweet.id);
User twuser = tweet.user;
- profileStatement.setLong(1, twuser.id);
- profileStatement.setString(2, twuser.name);
- profileStatement.setString(3, twuser.time_zone);
- profileStatement.setLong(4, twuser.statuses_count);
- profileStatement.setLong(5, twuser.followers_count);
- profileStatement.setLong(6, twuser.friends_count);
- profileStatement.setString(7, twuser.location);
- profileStatement.setString(8, twuser.screen_name);
- profileStatement.setString(9, twuser.created_at);
- profileStatement.setString(10, twuser.lang);
- profileStatement.setLong(11, twuser.id);
- postedStatement.setLong(1, tweet.id);
- postedStatement.setLong(2, twuser.id);
- postedStatement.setLong(3, tweet.id);
+ tweetStatement.setLong("userid", twuser.id);
+ profileStatement.setLong("userid", twuser.id);
+ profileStatement.setString("displayname", twuser.name);
+ profileStatement.setString("timezone", twuser.time_zone);
+ profileStatement.setLong("tweetcount", twuser.statuses_count);
+ profileStatement.setLong("followercount", twuser.followers_count);
+ profileStatement.setLong("followedcount", twuser.friends_count);
+ profileStatement.setString("location", twuser.location);
+ profileStatement.setString("tweetname", twuser.screen_name);
+ profileStatement.setString("createdat", twuser.created_at);
+ profileStatement.setString("language", twuser.lang);
}
- public static void setInsertBrandParams(PreparedStatement brandStatement,
+ public static void setInsertBrandParams(NamedPreparedStatement brandStmt,
long id, String brand) throws SQLException {
- brandStatement.setLong(1, id);
- brandStatement.setString(2, brand);
- brandStatement.setLong(3, id);
- brandStatement.setString(4, brand);
+ brandStmt.setLong("tweetid", id);
+ brandStmt.setString("brand", brand);
+ // TODO: rating (positive)
}
- public static void setInsertHashParams(PreparedStatement hashStatement,
+ public static void setInsertHashParams(NamedPreparedStatement hashStmt,
long id, String text) throws SQLException {
- hashStatement.setLong(1, id);
- hashStatement.setString(2, text);
- hashStatement.setLong(3, id);
- hashStatement.setString(4, text);
+ hashStmt.setLong("tweetid", id);
+ hashStmt.setString("hashtag", text);
}
- public static void setInsertUrlParams(PreparedStatement UrlStatement,
+ public static void setInsertUrlParams(NamedPreparedStatement urlStmt,
long id, String text) throws SQLException {
- UrlStatement.setLong(1, id);
- UrlStatement.setString(2, text);
- UrlStatement.setLong(3, id);
- UrlStatement.setString(4, text);
+ urlStmt.setLong("tweetid", id);
+ urlStmt.setString("url", text);
}
- public static void setInsertMentionsParams(PreparedStatement UrlStatement,
+ public static void setInsertMentionsParams(NamedPreparedStatement mentStmt,
long id, long userid) throws SQLException {
- UrlStatement.setLong(1, id);
- UrlStatement.setLong(2, userid);
- UrlStatement.setLong(3, id);
- UrlStatement.setLong(4, userid);
+ mentStmt.setLong("tweetid", id);
+ mentStmt.setLong("userid", userid);
}
}
diff --git a/src/io/TweetReader.java b/src/io/TweetReader.java
index bf89a2c..8968b84 100644
--- a/src/io/TweetReader.java
+++ b/src/io/TweetReader.java
@@ -40,7 +40,7 @@ public class TweetReader implements ITweetReader {
try {
tweet = gson.fromJson(line, Tweet.class);
} catch (JsonSyntaxException ex) {
- // TODO: handle something?
+ debugTweet(line, ex);
throw ex;
}
}
@@ -54,4 +54,9 @@ public class TweetReader implements ITweetReader {
} catch (IOException ex) {
}
}
+
+ private void debugTweet(String line, JsonSyntaxException ex) {
+ System.err.println("Faulty line: " + line);
+ ex.printStackTrace();
+ }
}
diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java
index a65a032..10ed774 100644
--- a/src/main/DataFiller.java
+++ b/src/main/DataFiller.java
@@ -1,9 +1,9 @@
package main;
import data.Tweet;
-import database.DBConnection;
+import database.NamedPreparedStatement;
import database.QueryUtils;
-import java.sql.PreparedStatement;
+import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
@@ -20,58 +20,52 @@ public class DataFiller {
/**
* The main database connection to fill.
*/
- private final DBConnection m_connection;
+ private final Connection m_connection;
/**
* A single insert tweet that can be used.
*/
- private PreparedStatement m_insertTweet;
+ private final NamedPreparedStatement m_insertTweet;
/**
* A single insert profiles that can be used.
*/
- private PreparedStatement m_insertProfile;
- /**
- * A single insert ispostedby that can be used.
- */
- private PreparedStatement m_insertPosted;
+ private final NamedPreparedStatement m_insertProfile;
/**
* A single insert brand that can be used.
*/
- private PreparedStatement m_insertBrand;
+ private final NamedPreparedStatement m_insertBrand;
/**
* A single insert hashtag that can be used.
*/
- private PreparedStatement m_insertHash;
+ private final NamedPreparedStatement m_insertHash;
/**
* A single insert url that can be used.
*/
- private PreparedStatement m_insertUrl;
+ private final NamedPreparedStatement m_insertUrl;
/**
* A single insert url that can be used.
*/
- private PreparedStatement m_insertMentions;
+ private final NamedPreparedStatement m_insertMentions;
/**
* Create the datafiller object.
*
* @param connection The database connection to use.
*/
- public DataFiller(DBConnection connection) {
+ public DataFiller(Connection connection) {
try {
m_connection = connection;
- m_insertTweet = m_connection.create(QueryUtils.insertTweet());
- m_insertProfile = m_connection.create(QueryUtils.insertProfile());
- m_insertPosted = m_connection.create(QueryUtils.insertPosted());
- m_insertBrand = m_connection.create(QueryUtils.insertBrand());
- m_insertHash = m_connection.create(QueryUtils.insertHash());
- m_insertUrl = m_connection.create(QueryUtils.insertUrl());
- m_insertMentions = m_connection.create(QueryUtils.insertMentions());
-
+ m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet);
+ m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile);
+ m_insertBrand = new NamedPreparedStatement(m_connection, QueryUtils.insertBrand);
+ m_insertHash = new NamedPreparedStatement(m_connection, QueryUtils.insertHash);
+ m_insertUrl = new NamedPreparedStatement(m_connection, QueryUtils.insertUrl);
+ m_insertMentions = new NamedPreparedStatement(m_connection, QueryUtils.insertMentions);
} catch (SQLException ex) {
throw new RuntimeException(ex.getMessage());
}
@@ -81,25 +75,24 @@ public class DataFiller {
try {
for (Tweet.Hashtag hashtag : tweet.entities.hashtags) {
QueryUtils.setInsertHashParams(m_insertHash, tweet.id, hashtag.text);
- m_insertHash.executeUpdate();
+ m_insertHash.getStmt().executeUpdate();
}
for (Tweet.Url url : tweet.entities.urls) {
QueryUtils.setInsertHashParams(m_insertUrl, tweet.id, url.expanded_url);
- m_insertUrl.executeUpdate();
+ m_insertUrl.getStmt().executeUpdate();
}
for (Tweet.Mention mention : tweet.entities.user_mentions) {
QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.id, mention.id);
- m_insertMentions.executeUpdate();
+ m_insertMentions.getStmt().executeUpdate();
}
- QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, m_insertPosted, tweet);
- m_insertTweet.executeUpdate();
- m_insertProfile.executeUpdate();
- m_insertPosted.executeUpdate();
+ QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet);
+ m_insertTweet.getStmt().executeUpdate();
+ m_insertProfile.getStmt().executeUpdate();
List<String> brands = getBrands(tweet);
for (String brand : brands) {
QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand);
- m_insertBrand.executeUpdate();
+ m_insertBrand.getStmt().executeUpdate();
}
} catch (SQLException ex) {
Logger.getLogger(DataFiller.class.getName()).log(Level.SEVERE, null, ex);
diff --git a/src/main/Main.java b/src/main/Main.java
index 67032bf..b5df23f 100644
--- a/src/main/Main.java
+++ b/src/main/Main.java
@@ -1,13 +1,16 @@
package main;
-import com.google.gson.JsonIOException;
import com.google.gson.JsonSyntaxException;
import data.Tweet;
-import database.DBConnection;
+import database.ConnectionBuilder;
import io.FileTweetReader;
import io.ITweetReader;
import io.TweetReader;
import java.io.IOException;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
/**
* The main class.
@@ -20,30 +23,60 @@ public class Main {
* @param args the global arguments to pass to the program.
*/
public static void main(String[] args) {
+ Main main;
try {
- Main main = new Main(args);
+ main = new Main(args);
} catch (IllegalArgumentException ex) {
System.err.println(ex.getMessage());
System.exit(1);
+ return;
}
+ main.run();
}
- private String m_hostaddress;
-
private String m_filename;
+ private final ConnectionBuilder cb;
+ /**
+ * Whether the database should be contacted or not.
+ */
+ private boolean skipDb;
public Main(String[] args) {
+ // default connection properties
+ cb = new ConnectionBuilder()
+ .setServerName("localhost")
+ .setUsername("postgres")
+ .setPassword("2IOC02")
+ .setDbName("Twitter");
+ skipDb = false;
+
/* parse the global options. */
parseGlobalOptions(args);
+ }
- if (m_hostaddress == null) {
- throw new IllegalArgumentException("Missing --dbhost to specify the hostaddress.");
+ private void printTweets(ITweetReader reader) throws IOException {
+ Tweet tweet;
+ long tweetNo = 1;
+ while ((tweet = reader.getTweet()) != null) {
+ System.out.println("/*" + tweetNo++ + "*/ " + tweet);
}
+ }
- DBConnection connection = new DBConnection(m_hostaddress, "5432", "Twitter", "postgres", "2IOC02");
- /* create the object that fills the database */
- DataFiller filler = new DataFiller(connection);
+ private void tweetsToDb(ITweetReader reader) throws IOException {
+ Tweet tweet;
+ try (Connection connection = cb.create()) {
+ /* create the object that fills the database */
+ DataFiller filler = new DataFiller(connection);
+ while ((tweet = reader.getTweet()) != null) {
+ filler.processTweet(tweet);
+ }
+ } catch (SQLException ex) {
+ Logger.getLogger(Main.class.getName()).log(Level.SEVERE,
+ "DB error", ex);
+ }
+ }
+ public void run() {
ITweetReader reader = null;
try {
if (m_filename == null) {
@@ -51,10 +84,10 @@ public class Main {
} else {
reader = new FileTweetReader(m_filename);
}
-
- Tweet tweet;
- while ((tweet = reader.getTweet()) != null) {
- filler.processTweet(tweet);
+ if (skipDb) {
+ printTweets(reader);
+ } else {
+ tweetsToDb(reader);
}
} catch (JsonSyntaxException ex) {
System.err.println("Got an invalid tweet: " + ex);
@@ -65,42 +98,43 @@ public class Main {
reader.close();
}
}
-
- connection.close();
-
- System.out.print("exit succesfull.");
}
- private void parseGlobalOptions(String[] args) {
+ private void parseGlobalOptions(String[] args)
+ throws IllegalArgumentException {
/* parse global options */
for (int i = 0; i < args.length; i++) {
if ("--help".equals(args[i])) {
printHelp();
+ System.exit(0);
} else if ("--dbhost".equals(args[i])) {
- m_hostaddress = getParam(args, ++i);
+ cb.setServerName(getArg(args, ++i, "--dbhost"));
+ } else if ("--dbuser".equals(args[i])) {
+ cb.setUsername(getArg(args, ++i, "--dbuser"));
+ } else if ("--dbpass".equals(args[i])) {
+ cb.setPassword(getArg(args, ++i, "--dbpass"));
+ } else if ("--dbname".equals(args[i])) {
+ cb.setDbName(getArg(args, ++i, "--dbname"));
+ } else if ("--skipdb".equals(args[i])) {
+ skipDb = true;
} else if (args[i].startsWith("-")) {
throw new IllegalArgumentException("Invalid option: " + args[i]);
} else {
/* This should be the filename */
- m_filename = getParam(args, i);
+ m_filename = args[i];
}
}
-
- if (args.length == 0) {
- throw new IllegalArgumentException("No parameters specified, see --help for usage info.");
- }
}
/**
* Read an extra option for a command.
*/
- private String getParam(String[] args, Integer index) {
- if (index + 1 <= args.length) {
- index++;
- return args[index - 1];
- } else {
- throw new IllegalArgumentException("An extra option was missing.");
+ private String getArg(String[] params, int index, String name) {
+ if (index >= params.length) {
+ System.err.println("Missing argument for parameter " + name);
+ System.exit(1);
}
+ return params[index];
}
/**
@@ -113,11 +147,16 @@ public class Main {
}
private final static String[] HELP = {
+ "Usage: java -jar DataFiller.jar [options] [tweets-file]",
+ "",
"Global options:",
- " --help Print this help text.",
- " --dbhost <ip> Specify the database host ipaddress.",
- " --file <tweet-filename> Specify the tweet file to read from.",
- " ",
- " If no --file was specified, program reads from standard input."
+ " --help Print this help text.",
+ " --dbhost HOST Database host (defaults to 'localhost')",
+ " --dbuser USER Database username (defaults to 'postgres')",
+ " --dbpass PASS Database password (defaults to '2IOC02')",
+ " --dbname NAME Database name (defaults to 'Twitter')",
+ " --skipdb Do not contact the database at all, just print data.",
+ "",
+ "If no tweets file is given, data will be read from standard input."
};
}
diff --git a/test/data/ValidatingJsonDeserializerTest.java b/test/data/ValidatingJsonDeserializerTest.java
index ef19cd3..b51631a 100644
--- a/test/data/ValidatingJsonDeserializerTest.java
+++ b/test/data/ValidatingJsonDeserializerTest.java
@@ -3,9 +3,12 @@ package data;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
import com.google.gson.JsonNull;
import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
+import com.google.gson.JsonPrimitive;
+import org.apache.commons.lang3.StringUtils;
import org.junit.Test;
import static org.junit.Assert.*;
@@ -358,8 +361,8 @@ public class ValidatingJsonDeserializerTest {
tweet.addProperty("in_reply_to_user_id", 4);
tweet.addProperty("created_at", "X");
tweet.addProperty("favorite_count", 4);
- tweet.addProperty("place", "X");
- tweet.addProperty("coordinates", "X");
+ tweet.add("place", JsonNull.INSTANCE);
+ tweet.add("coordinates", JsonNull.INSTANCE);
tweet.addProperty("text", "X");
tweet.add("retweeted_status", JsonNull.INSTANCE); // Tweet object
JsonObject entities = new JsonObject();
@@ -384,13 +387,34 @@ public class ValidatingJsonDeserializerTest {
obj.remove(prop);
}
+ private void addProperty(JsonObject obj, JsonElement val, String... names) {
+ String prop;
+ for (int i = 0; i < names.length - 1; i++) {
+ prop = names[i];
+ if (!obj.has(prop)) {
+ obj.add(prop, new JsonObject());
+ }
+ obj = obj.getAsJsonObject(prop);
+ }
+ prop = names[names.length - 1];
+ obj.add(prop, val);
+ }
+
private void checkImpairedTweet(String... names) {
JsonObject tweet = buildMinimalTweet(buildMinimalUser());
removeProperty(tweet, names);
- String prop = names[names.length - 1];
+ String prop = StringUtils.join(names, ".");
checkTweetFail(tweet, "Missing field: " + prop);
}
+ private void checkImpairedUser(String... names) {
+ JsonObject user = buildMinimalUser();
+ removeProperty(user, names);
+ JsonObject tweet = buildMinimalTweet(user);
+ String prop = StringUtils.join(names, ".");
+ checkTweetFail(tweet, "Missing field: user." + prop);
+ }
+
private void checkTweetFail(JsonObject tweet, String exp) {
Gson gson = new Gson();
System.out.println("Checking for failure: " + exp);
@@ -419,7 +443,55 @@ public class ValidatingJsonDeserializerTest {
@Test
public void testTweet() {
+ checkImpairedTweet("id");
+ checkImpairedTweet("lang");
+ checkImpairedTweet("created_at");
+ checkImpairedTweet("favorite_count");
checkImpairedTweet("text");
+ checkImpairedTweet("retweet_count");
+ checkImpairedTweet("user");
+ }
+
+ @Test
+ public void testTweetNulls() {
+ // place can be null
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(tweet, "place");
+ checkTweetPass(tweet);
+
+ tweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(tweet, "in_reply_to_user_id");
+ checkTweetPass(tweet);
+
+ tweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(tweet, "coordinates");
+ checkTweetPass(tweet);
+ }
+
+ @Test
+ public void testTweetCoordinates() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ addProperty(tweet, new JsonPrimitive("X"), "coordinates");
+ checkTweetFail(tweet, "Expected object: coordinates");
+
+ JsonObject coords = new JsonObject();
+ // overwrite coordinates with object
+ addProperty(tweet, coords, "coordinates");
+ checkTweetFail(tweet, "Missing field: coordinates.coordinates");
+
+ // set coordinates.coordinates
+ JsonArray coordsFloat = new JsonArray();
+ coords.add("coordinates", coordsFloat);
+ checkTweetFail(tweet, "Array smaller than 2: coordinates.coordinates");
+
+ coordsFloat.add(new JsonPrimitive(1.0f));
+ checkTweetFail(tweet, "Array smaller than 2: coordinates.coordinates");
+
+ coordsFloat.add(new JsonPrimitive(1.0f));
+ checkTweetPass(tweet);
+
+ coordsFloat.add(new JsonPrimitive(1.0f));
+ checkTweetFail(tweet, "Array larger than 2: coordinates.coordinates");
}
@Test
@@ -430,9 +502,94 @@ public class ValidatingJsonDeserializerTest {
}
@Test
+ public void testUser() {
+ checkImpairedUser("id");
+ checkImpairedUser("name");
+ checkImpairedUser("statuses_count");
+ checkImpairedUser("followers_count");
+ checkImpairedUser("friends_count");
+ checkImpairedUser("location");
+ checkImpairedUser("screen_name");
+ checkImpairedUser("created_at");
+ checkImpairedUser("lang");
+ }
+
+ @Test
+ public void testNullUserEntities() {
+ // entities can be null
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(tweet, "user", "entities");
+ checkTweetPass(tweet);
+
+ tweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(tweet, "user", "time_zone");
+ checkTweetPass(tweet);
+ }
+
+ @Test
public void testUserEntities() {
- checkImpairedTweet("user", "entities");
checkImpairedTweet("user", "entities", "url");
checkImpairedTweet("user", "entities", "url", "urls");
}
+
+ @Test
+ public void testRetweetedStatus() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ JsonObject rtUser = buildMinimalUser();
+ String screen_name = "Somebody";
+ rtUser.addProperty("screen_name", screen_name);
+ JsonObject retweet = buildMinimalTweet(rtUser);
+ tweet.add("retweeted_status", retweet);
+ Tweet parsedTweet = checkTweetPass(tweet);
+ assertNotNull(parsedTweet.retweeted_status);
+ assertEquals(screen_name, parsedTweet.retweeted_status.user.screen_name);
+ }
+
+ @Test
+ public void testRetweetedMissingText() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ JsonObject retweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(retweet, "text");
+ tweet.add("retweeted_status", retweet);
+ checkTweetFail(tweet, "Missing field: retweeted_status.text");
+ }
+
+ @Test
+ public void testNullPlace() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ removeProperty(tweet, "place");
+ tweet.add("place", null);
+ checkTweetPass(tweet);
+ }
+
+ @Test
+ public void testWrongType() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ addProperty(tweet, new JsonPrimitive(1), "text");
+ checkTweetFail(tweet, "Expected string: text");
+ }
+
+ @Test
+ public void testWrongTypeArray() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ addProperty(tweet, new JsonPrimitive(1), "entities", "urls");
+ checkTweetFail(tweet, "Expected array: entities.urls");
+ }
+
+ @Test
+ public void testWrongTypeObject() {
+ JsonObject tweet = buildMinimalTweet(buildMinimalUser());
+ addProperty(tweet, new JsonPrimitive(1), "entities");
+ checkTweetFail(tweet, "Expected object: entities");
+ }
+
+ @Test
+ public void testNotATweetObject() {
+ try {
+ DebuggingJsonDeserializer.tryValidate("{}", Tweet.class);
+ fail("DebuggingJsonDeserializer must also check for type.");
+ } catch (JsonParseException ex) {
+ assertEquals("Missing field: id", ex.getMessage());
+ }
+ }
}