summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-08 19:05:36 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-08 19:05:36 +0200
commit9fe9d1b848156928a0d3efde4fff80a78f1bf9e1 (patch)
treee1e987f71ecb90d513e994b9cc4f3aa6c3656bdb /src
parent5f892ac1e0fe6a21a00808cfb0afae2a2c2071dc (diff)
downloadDatafiller-9fe9d1b848156928a0d3efde4fff80a78f1bf9e1.tar.gz
Replace JSON by GSON, adding extra validations
Also change reader method, tweets are not received via an observed but by submitting from the caller. Added TODO WTF here and there, formatted with Alt + Shift + F.
Diffstat (limited to 'src')
-rw-r--r--src/database/QueryUtils.java117
-rw-r--r--src/io/FileTweetReader.java16
-rw-r--r--src/io/ITweetReader.java16
-rw-r--r--src/io/TweetReader.java57
-rw-r--r--src/main/DataFiller.java107
-rw-r--r--src/main/Main.java82
6 files changed, 248 insertions, 147 deletions
diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java
index 88a9b6d..d0a424d 100644
--- a/src/database/QueryUtils.java
+++ b/src/database/QueryUtils.java
@@ -1,9 +1,9 @@
package database;
+import data.Tweet;
+import data.User;
import java.sql.PreparedStatement;
import java.sql.SQLException;
-import org.json.JSONException;
-import org.json.JSONObject;
/**
* Utilities to create queries.
@@ -32,19 +32,21 @@ public class QueryUtils {
+ "WHERE NOT EXISTS "
+ "(SELECT * FROM tweet WHERE tweetid=? )";
}
-
+
public static String insertHash() {
return "INSERT INTO hashtag (tweetid, hashtag) "
+ "SELECT ?, ? "
+ "WHERE NOT EXISTS "
+ "(SELECT * FROM hashtag WHERE tweetid=? and hashtag =? )";
}
+
public static String insertUrl() {
return "INSERT INTO url (tweetid, url) "
+ "SELECT ?, ? "
+ "WHERE NOT EXISTS "
+ "(SELECT * FROM url WHERE tweetid=? and url =? )";
}
+
public static String insertMentions() {
return "INSERT INTO mentionsuser (tweetid, userid) "
+ "SELECT ?, ? "
@@ -52,87 +54,86 @@ public class QueryUtils {
+ "(SELECT * FROM mentionsuser WHERE tweetid=? and userid =? )";
}
- public static String insertPosted(){
+ public static String insertPosted() {
return "INSERT INTO ispostedby (tweetid,userid) "
+ "SELECT ? , ? "
+ "WHERE NOT EXISTS "
+ "(SELECT * FROM ispostedby WHERE tweetid= ? )";
}
- public static String insertBrand(){
+
+ public static String insertBrand() {
return "INSERT INTO mentionsbrand (tweetid,brand) "
+ "SELECT ? , ? "
+ "WHERE NOT EXISTS "
+ "(SELECT * FROM mentionsbrand WHERE tweetid = ? AND brand = ?)";
}
-
-
- public static void setInsertParams(PreparedStatement tweetStatement, PreparedStatement profileStatement, PreparedStatement postedStatement, JSONObject tweet) throws JSONException, SQLException {
- tweetStatement.setLong( 1, tweet.getLong( "id"));
- tweetStatement.setString(2, tweet.getString("created_at"));
- tweetStatement.setLong( 3, tweet.getLong( "favorite_count"));
- tweetStatement.setLong( 4, tweet.getLong( "retweet_count"));
- tweetStatement.setString(5, tweet.getString("text"));
- tweetStatement.setString(6, tweet.getString("coordinates"));
- tweetStatement.setString(7, tweet.getString("lang"));
- Long id= 0l;
- try{
- id=tweet.getJSONObject("retweeted_status").getLong("id");
- } catch(Exception e){}
- tweetStatement.setLong(8,id);
- id= 0l;
- try{
- id=tweet.getJSONObject("in_reply_to_user_id").getLong("id");
- } catch(Exception e){}
- tweetStatement.setLong(9,id);
- tweetStatement.setString(10,tweet.getString("place"));
- tweetStatement.setLong(11, tweet.getLong("id"));
- JSONObject twuser= tweet.getJSONObject("user");
- profileStatement.setLong(1, twuser.getLong("id"));
- profileStatement.setString(2, twuser.getString("name"));
- profileStatement.setString(3, twuser.getString("time_zone"));
- profileStatement.setLong(4, twuser.getLong("statuses_count"));
- profileStatement.setLong(5, twuser.getLong("followers_count"));
- profileStatement.setLong(6, twuser.getLong("friends_count"));
- profileStatement.setString(7, twuser.getString("location"));
- profileStatement.setString(8, twuser.getString("screen_name"));
- profileStatement.setString(9, twuser.getString("created_at"));
- profileStatement.setString(10, twuser.getString("lang"));
- profileStatement.setLong(11, twuser.getLong("id"));
-
- postedStatement.setLong(1, tweet.getLong("id"));
- postedStatement.setLong(2, twuser.getLong("id"));
- postedStatement.setLong(3, tweet.getLong("id"));
-
+ public static void setInsertParams(PreparedStatement tweetStatement,
+ PreparedStatement profileStatement,
+ PreparedStatement postedStatement,
+ Tweet tweet) throws SQLException {
+ tweetStatement.setLong(1, tweet.id);
+ tweetStatement.setString(2, tweet.created_at);
+ tweetStatement.setLong(3, tweet.favorite_count);
+ tweetStatement.setLong(4, tweet.retweet_count);
+ tweetStatement.setString(5, tweet.text);
+ tweetStatement.setString(6, tweet.coordinates);
+ tweetStatement.setString(7, tweet.lang);
+ if (tweet.retweeted_status != null) {
+ tweetStatement.setLong(8, tweet.retweeted_status.id);
+ } else {
+ tweetStatement.setLong(8, 0);
+ }
+ tweetStatement.setLong(9, tweet.in_reply_to_user_id);
+ tweetStatement.setString(10, tweet.place);
+ tweetStatement.setLong(11, tweet.id);
+
+ User twuser = tweet.user;
+ profileStatement.setLong(1, twuser.id);
+ profileStatement.setString(2, twuser.name);
+ profileStatement.setString(3, twuser.time_zone);
+ profileStatement.setLong(4, twuser.statuses_count);
+ profileStatement.setLong(5, twuser.followers_count);
+ profileStatement.setLong(6, twuser.friends_count);
+ profileStatement.setString(7, twuser.location);
+ profileStatement.setString(8, twuser.screen_name);
+ profileStatement.setString(9, twuser.created_at);
+ profileStatement.setString(10, twuser.lang);
+ profileStatement.setLong(11, twuser.id);
+ postedStatement.setLong(1, tweet.id);
+ postedStatement.setLong(2, twuser.id);
+ postedStatement.setLong(3, tweet.id);
}
-
- public static void setInsertBrandParams(PreparedStatement brandStatement, Long id, String brand) throws JSONException, SQLException {
+
+ public static void setInsertBrandParams(PreparedStatement brandStatement,
+ long id, String brand) throws SQLException {
brandStatement.setLong(1, id);
brandStatement.setString(2, brand);
brandStatement.setLong(3, id);
brandStatement.setString(4, brand);
-
- }
- public static void setInsertHashParams(PreparedStatement hashStatement, Long id, String text) throws JSONException, SQLException {
+ }
+
+ public static void setInsertHashParams(PreparedStatement hashStatement,
+ long id, String text) throws SQLException {
hashStatement.setLong(1, id);
hashStatement.setString(2, text);
hashStatement.setLong(3, id);
hashStatement.setString(4, text);
-
- }
- public static void setInsertUrlParams(PreparedStatement UrlStatement, Long id, String text) throws JSONException, SQLException {
+ }
+
+ public static void setInsertUrlParams(PreparedStatement UrlStatement,
+ long id, String text) throws SQLException {
UrlStatement.setLong(1, id);
UrlStatement.setString(2, text);
UrlStatement.setLong(3, id);
UrlStatement.setString(4, text);
-
- }
- public static void setInsertMentionsParams(PreparedStatement UrlStatement, Long id, Long userid) throws JSONException, SQLException {
+ }
+
+ public static void setInsertMentionsParams(PreparedStatement UrlStatement,
+ long id, long userid) throws SQLException {
UrlStatement.setLong(1, id);
UrlStatement.setLong(2, userid);
UrlStatement.setLong(3, id);
UrlStatement.setLong(4, userid);
-
- }
-
+ }
}
diff --git a/src/io/FileTweetReader.java b/src/io/FileTweetReader.java
new file mode 100644
index 0000000..216cc26
--- /dev/null
+++ b/src/io/FileTweetReader.java
@@ -0,0 +1,16 @@
+package io;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+
+/**
+ * Reads tweets from file.
+ *
+ * @author Peter Wu
+ */
+public class FileTweetReader extends TweetReader {
+
+ public FileTweetReader(String filename) throws FileNotFoundException {
+ super(new FileInputStream(filename));
+ }
+}
diff --git a/src/io/ITweetReader.java b/src/io/ITweetReader.java
new file mode 100644
index 0000000..38dafbd
--- /dev/null
+++ b/src/io/ITweetReader.java
@@ -0,0 +1,16 @@
+package io;
+
+import data.Tweet;
+import java.io.IOException;
+
+/**
+ * Allow you to gather tweet object from some file.
+ *
+ * @author Peter Wu
+ */
+public interface ITweetReader {
+
+ public Tweet getTweet() throws IOException;
+
+ public void close();
+}
diff --git a/src/io/TweetReader.java b/src/io/TweetReader.java
new file mode 100644
index 0000000..d82978e
--- /dev/null
+++ b/src/io/TweetReader.java
@@ -0,0 +1,57 @@
+package io;
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import com.google.gson.JsonSyntaxException;
+import data.Tweet;
+import data.ValidatingJsonDeserializer;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+/**
+ * Reads tweets from an InputStream.
+ *
+ * @author Peter Wu
+ */
+public class TweetReader implements ITweetReader {
+
+ private final InputStream is;
+ private final BufferedReader reader;
+ private final Gson gson;
+
+ public TweetReader(InputStream is) {
+ if (is == null) {
+ throw new NullPointerException();
+ }
+ this.is = is;
+ reader = new BufferedReader(new InputStreamReader(is));
+ GsonBuilder gsonBuilder = new GsonBuilder();
+ ValidatingJsonDeserializer.addValidation(gsonBuilder, Tweet.class);
+ gson = gsonBuilder.create();
+ }
+
+ @Override
+ public Tweet getTweet() throws IOException {
+ String line = reader.readLine();
+ Tweet tweet = null;
+ if (line != null) {
+ try {
+ tweet = gson.fromJson(line, Tweet.class);
+ } catch (JsonSyntaxException ex) {
+ // TODO: handle something?
+ throw ex;
+ }
+ }
+ return tweet;
+ }
+
+ @Override
+ public void close() {
+ try {
+ is.close();
+ } catch (IOException ex) {
+ }
+ }
+}
diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java
index c9a5811..a65a032 100644
--- a/src/main/DataFiller.java
+++ b/src/main/DataFiller.java
@@ -1,21 +1,21 @@
package main;
+import data.Tweet;
import database.DBConnection;
import database.QueryUtils;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
+import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.json.JSONException;
-import org.json.JSONObject;
/**
* Process that incoming tweets and fill the database.
*
* @author Maurice Laveaux
*/
-public class DataFiller implements ResultListener {
+public class DataFiller {
/**
* The main database connection to fill.
@@ -26,7 +26,7 @@ public class DataFiller implements ResultListener {
* A single insert tweet that can be used.
*/
private PreparedStatement m_insertTweet;
-
+
/**
* A single insert profiles that can be used.
*/
@@ -35,7 +35,7 @@ public class DataFiller implements ResultListener {
* A single insert ispostedby that can be used.
*/
private PreparedStatement m_insertPosted;
-
+
/**
* A single insert brand that can be used.
*/
@@ -63,7 +63,7 @@ public class DataFiller implements ResultListener {
*/
public DataFiller(DBConnection connection) {
try {
- m_connection = connection;
+ m_connection = connection;
m_insertTweet = m_connection.create(QueryUtils.insertTweet());
m_insertProfile = m_connection.create(QueryUtils.insertProfile());
m_insertPosted = m_connection.create(QueryUtils.insertPosted());
@@ -71,79 +71,68 @@ public class DataFiller implements ResultListener {
m_insertHash = m_connection.create(QueryUtils.insertHash());
m_insertUrl = m_connection.create(QueryUtils.insertUrl());
m_insertMentions = m_connection.create(QueryUtils.insertMentions());
-
-
-
+
} catch (SQLException ex) {
throw new RuntimeException(ex.getMessage());
}
}
- @Override
- public void tweetReceived(JSONObject tweet) {
+ public void processTweet(Tweet tweet) {
try {
-
- for(int i=0;i<tweet.getJSONObject("entities").getJSONArray("hashtags").length();i++){
- String text = tweet.getJSONObject("entities").getJSONArray("hashtags").getJSONObject(i).getString("text");
- QueryUtils.setInsertHashParams(m_insertHash, tweet.getLong("id"), text);
+ for (Tweet.Hashtag hashtag : tweet.entities.hashtags) {
+ QueryUtils.setInsertHashParams(m_insertHash, tweet.id, hashtag.text);
m_insertHash.executeUpdate();
}
- for(int i=0;i<tweet.getJSONObject("entities").getJSONArray("urls").length();i++){
- String text = tweet.getJSONObject("entities").getJSONArray("urls").getJSONObject(i).getString("expanded_url");
- QueryUtils.setInsertHashParams(m_insertUrl, tweet.getLong("id"), text);
+ for (Tweet.Url url : tweet.entities.urls) {
+ QueryUtils.setInsertHashParams(m_insertUrl, tweet.id, url.expanded_url);
m_insertUrl.executeUpdate();
}
- for(int i=0;i<tweet.getJSONObject("entities").getJSONArray("user_mentions").length();i++){
- Long id = tweet.getJSONObject("entities").getJSONArray("user_mentions").getJSONObject(i).getLong("id");
- QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.getLong("id"), id);
+ for (Tweet.Mention mention : tweet.entities.user_mentions) {
+ QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.id, mention.id);
m_insertMentions.executeUpdate();
}
-
- QueryUtils.setInsertParams(m_insertTweet,m_insertProfile,m_insertPosted, tweet);
+
+ QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, m_insertPosted, tweet);
m_insertTweet.executeUpdate();
m_insertProfile.executeUpdate();
m_insertPosted.executeUpdate();
- ArrayList<String> brands=getBrands(tweet);
- for(String brand : brands){
- QueryUtils.setInsertBrandParams(m_insertBrand, tweet.getLong("id"), brand);
+ List<String> brands = getBrands(tweet);
+ for (String brand : brands) {
+ QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand);
m_insertBrand.executeUpdate();
}
- } catch (SQLException | JSONException ex) {
+ } catch (SQLException ex) {
Logger.getLogger(DataFiller.class.getName()).log(Level.SEVERE, null, ex);
}
}
- ArrayList<String> getBrands(JSONObject tweet){
- ArrayList<String> result= new ArrayList<String>();
- String text=null;
- try {
- text = tweet.getString("text");
- text = text.toLowerCase();
- } catch (JSONException ex) {
-
+
+ ArrayList<String> getBrands(Tweet tweet) {
+ ArrayList<String> result = new ArrayList<>();
+ String text = tweet.text.toLowerCase();
+ if (text.contains("samsung") || text.contains("galaxy")) {
+ result.add("Samsung");
}
- if(text.contains("samsung")||text.contains("galaxy")){
- result.add("Samsung");
- }
- if (text.contains("htc")||text.contains("one")){
- result.add("HTC");
- }
- if (text.contains("apple")||text.contains("iphone")){
- result.add("Apple");
- }
- if (text.contains("sony")||text.contains("xperia")){
- result.add("Sony");
- }
- if (text.contains("huawei")||text.contains("ascend")){
- result.add("Huawei");
- }
- if (text.contains("lg")){
- result.add("LG");
- }
-
- if(result.isEmpty()) {
- result.add("geen");
- System.out.println(text);
- }
- return result;
+ if (text.contains("htc") || text.contains("one")) {
+ result.add("HTC");
+ }
+ if (text.contains("apple") || text.contains("iphone")) {
+ result.add("Apple");
+ }
+ if (text.contains("sony") || text.contains("xperia")) {
+ result.add("Sony");
}
+ if (text.contains("huawei") || text.contains("ascend")) {
+ result.add("Huawei");
+ }
+ if (text.contains("lg")) {
+ result.add("LG");
+ }
+
+ // TODO: WTF IS THIS PILE OF SHIT?!
+ if (result.isEmpty()) {
+ result.add("geen");
+ System.out.println(text);
+ }
+ return result;
+ }
}
diff --git a/src/main/Main.java b/src/main/Main.java
index f4f644b..5a8e137 100644
--- a/src/main/Main.java
+++ b/src/main/Main.java
@@ -1,9 +1,13 @@
package main;
+import com.google.gson.JsonIOException;
+import com.google.gson.JsonSyntaxException;
+import data.Tweet;
import database.DBConnection;
-import io.DataReader;
-import io.InputReader;
-import java.util.Arrays;
+import io.FileTweetReader;
+import io.ITweetReader;
+import io.TweetReader;
+import java.io.IOException;
/**
* The main class.
@@ -12,6 +16,7 @@ public class Main {
/**
* The main method of the application.
+ *
* @param args the global arguments to pass to the program.
*/
public static void main(String[] args) {
@@ -22,43 +27,56 @@ public class Main {
System.exit(1);
}
}
-
+
private String m_hostaddress;
-
+
private String m_filename;
-
+
public Main(String[] args) {
/* parse the global options. */
parseGlobalOptions(args);
-
+
if (m_hostaddress == null) {
throw new IllegalArgumentException("Missing --dbhost to specify the hostaddress.");
- }
-
+ }
+
DBConnection connection = new DBConnection(m_hostaddress, "5432", "Twitter", "postgres", "2IOC02");
- /* create the object that fills the database */
+ /* create the object that fills the database */
DataFiller filler = new DataFiller(connection);
-
- if (m_filename == null) {
- InputReader reader = new InputReader(filler);
-
- reader.startLoop();
- } else {
- DataReader reader = new DataReader(m_filename, filler);
- reader.startloop();
+
+ ITweetReader reader = null;
+ try {
+ if (m_filename == null) {
+ reader = new TweetReader(System.in);
+ } else {
+ reader = new FileTweetReader(m_filename);
+ }
+
+ Tweet tweet;
+ while ((tweet = reader.getTweet()) != null) {
+ filler.processTweet(tweet);
+ }
+ } catch (JsonSyntaxException ex) {
+ System.err.println("Got an invalid tweet: " + ex);
+ } catch (IOException ex) {
+ System.err.println("Cannot open tweets: " + ex);
+ } finally {
+ if (reader != null) {
+ reader.close();
+ }
}
-
+
connection.close();
-
+
System.out.print("exit succesfull.");
}
-
+
private void parseGlobalOptions(String[] args) {
/* parse global options */
for (int i = 0; i < args.length; i++) {
if ("--help".equals(args[i])) {
printHelp();
- } else if("--dbhost".equals(args[i])) {
+ } else if ("--dbhost".equals(args[i])) {
m_hostaddress = getParam(args, ++i);
} else if (args[i].startsWith("-")) {
throw new IllegalArgumentException("Invalid option: " + args[i]);
@@ -67,29 +85,33 @@ public class Main {
m_filename = getParam(args, i);
}
}
-
+
if (args.length == 0) {
- throw new IllegalArgumentException("No parameters specified, see --help for usage info.");
+ throw new IllegalArgumentException("No parameters specified, see --help for usage info.");
}
}
-
- /** Read an extra option for a command. */
+
+ /**
+ * Read an extra option for a command.
+ */
private String getParam(String[] args, Integer index) {
if (index + 1 <= args.length) {
- index++;
+ index++;
return args[index - 1];
} else {
throw new IllegalArgumentException("An extra option was missing.");
}
}
-
- /** Print some useful help messages. */
+
+ /**
+ * Print some useful help messages.
+ */
private void printHelp() {
for (String line : HELP) {
System.out.println(line);
}
}
-
+
private final static String[] HELP = {
"Global options:",
" --help Print this help text.",