diff options
-rw-r--r-- | lib/json.jar | bin | 93396 -> 0 bytes | |||
-rw-r--r-- | nbproject/project.properties | 2 | ||||
-rw-r--r-- | src/database/QueryUtils.java | 117 | ||||
-rw-r--r-- | src/io/FileTweetReader.java | 16 | ||||
-rw-r--r-- | src/io/ITweetReader.java | 16 | ||||
-rw-r--r-- | src/io/TweetReader.java | 57 | ||||
-rw-r--r-- | src/main/DataFiller.java | 107 | ||||
-rw-r--r-- | src/main/Main.java | 82 |
8 files changed, 248 insertions, 149 deletions
diff --git a/lib/json.jar b/lib/json.jar Binary files differdeleted file mode 100644 index 5a93e51..0000000 --- a/lib/json.jar +++ /dev/null diff --git a/nbproject/project.properties b/nbproject/project.properties index 5ed1af4..95e5bfe 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -30,12 +30,10 @@ dist.javadoc.dir=${dist.dir}/javadoc endorsed.classpath= excludes= file.reference.gson-2.2.4.jar=lib/gson-2.2.4.jar -file.reference.json.jar=lib/json.jar file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar includes=** jar.compress=false javac.classpath=\ - ${file.reference.json.jar}:\ ${file.reference.postgresql-9.3-1101.jdbc41.jar}:\ ${file.reference.gson-2.2.4.jar} # Space-separated list of extra javac options diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java index 88a9b6d..d0a424d 100644 --- a/src/database/QueryUtils.java +++ b/src/database/QueryUtils.java @@ -1,9 +1,9 @@ package database; +import data.Tweet; +import data.User; import java.sql.PreparedStatement; import java.sql.SQLException; -import org.json.JSONException; -import org.json.JSONObject; /** * Utilities to create queries. @@ -32,19 +32,21 @@ public class QueryUtils { + "WHERE NOT EXISTS " + "(SELECT * FROM tweet WHERE tweetid=? )"; } - + public static String insertHash() { return "INSERT INTO hashtag (tweetid, hashtag) " + "SELECT ?, ? " + "WHERE NOT EXISTS " + "(SELECT * FROM hashtag WHERE tweetid=? and hashtag =? )"; } + public static String insertUrl() { return "INSERT INTO url (tweetid, url) " + "SELECT ?, ? " + "WHERE NOT EXISTS " + "(SELECT * FROM url WHERE tweetid=? and url =? )"; } + public static String insertMentions() { return "INSERT INTO mentionsuser (tweetid, userid) " + "SELECT ?, ? " @@ -52,87 +54,86 @@ public class QueryUtils { + "(SELECT * FROM mentionsuser WHERE tweetid=? and userid =? )"; } - public static String insertPosted(){ + public static String insertPosted() { return "INSERT INTO ispostedby (tweetid,userid) " + "SELECT ? , ? " + "WHERE NOT EXISTS " + "(SELECT * FROM ispostedby WHERE tweetid= ? )"; } - public static String insertBrand(){ + + public static String insertBrand() { return "INSERT INTO mentionsbrand (tweetid,brand) " + "SELECT ? , ? " + "WHERE NOT EXISTS " + "(SELECT * FROM mentionsbrand WHERE tweetid = ? AND brand = ?)"; } - - - public static void setInsertParams(PreparedStatement tweetStatement, PreparedStatement profileStatement, PreparedStatement postedStatement, JSONObject tweet) throws JSONException, SQLException { - tweetStatement.setLong( 1, tweet.getLong( "id")); - tweetStatement.setString(2, tweet.getString("created_at")); - tweetStatement.setLong( 3, tweet.getLong( "favorite_count")); - tweetStatement.setLong( 4, tweet.getLong( "retweet_count")); - tweetStatement.setString(5, tweet.getString("text")); - tweetStatement.setString(6, tweet.getString("coordinates")); - tweetStatement.setString(7, tweet.getString("lang")); - Long id= 0l; - try{ - id=tweet.getJSONObject("retweeted_status").getLong("id"); - } catch(Exception e){} - tweetStatement.setLong(8,id); - id= 0l; - try{ - id=tweet.getJSONObject("in_reply_to_user_id").getLong("id"); - } catch(Exception e){} - tweetStatement.setLong(9,id); - tweetStatement.setString(10,tweet.getString("place")); - tweetStatement.setLong(11, tweet.getLong("id")); - JSONObject twuser= tweet.getJSONObject("user"); - profileStatement.setLong(1, twuser.getLong("id")); - profileStatement.setString(2, twuser.getString("name")); - profileStatement.setString(3, twuser.getString("time_zone")); - profileStatement.setLong(4, twuser.getLong("statuses_count")); - profileStatement.setLong(5, twuser.getLong("followers_count")); - profileStatement.setLong(6, twuser.getLong("friends_count")); - profileStatement.setString(7, twuser.getString("location")); - profileStatement.setString(8, twuser.getString("screen_name")); - profileStatement.setString(9, twuser.getString("created_at")); - profileStatement.setString(10, twuser.getString("lang")); - profileStatement.setLong(11, twuser.getLong("id")); - - postedStatement.setLong(1, tweet.getLong("id")); - postedStatement.setLong(2, twuser.getLong("id")); - postedStatement.setLong(3, tweet.getLong("id")); - + public static void setInsertParams(PreparedStatement tweetStatement, + PreparedStatement profileStatement, + PreparedStatement postedStatement, + Tweet tweet) throws SQLException { + tweetStatement.setLong(1, tweet.id); + tweetStatement.setString(2, tweet.created_at); + tweetStatement.setLong(3, tweet.favorite_count); + tweetStatement.setLong(4, tweet.retweet_count); + tweetStatement.setString(5, tweet.text); + tweetStatement.setString(6, tweet.coordinates); + tweetStatement.setString(7, tweet.lang); + if (tweet.retweeted_status != null) { + tweetStatement.setLong(8, tweet.retweeted_status.id); + } else { + tweetStatement.setLong(8, 0); + } + tweetStatement.setLong(9, tweet.in_reply_to_user_id); + tweetStatement.setString(10, tweet.place); + tweetStatement.setLong(11, tweet.id); + + User twuser = tweet.user; + profileStatement.setLong(1, twuser.id); + profileStatement.setString(2, twuser.name); + profileStatement.setString(3, twuser.time_zone); + profileStatement.setLong(4, twuser.statuses_count); + profileStatement.setLong(5, twuser.followers_count); + profileStatement.setLong(6, twuser.friends_count); + profileStatement.setString(7, twuser.location); + profileStatement.setString(8, twuser.screen_name); + profileStatement.setString(9, twuser.created_at); + profileStatement.setString(10, twuser.lang); + profileStatement.setLong(11, twuser.id); + postedStatement.setLong(1, tweet.id); + postedStatement.setLong(2, twuser.id); + postedStatement.setLong(3, tweet.id); } - - public static void setInsertBrandParams(PreparedStatement brandStatement, Long id, String brand) throws JSONException, SQLException { + + public static void setInsertBrandParams(PreparedStatement brandStatement, + long id, String brand) throws SQLException { brandStatement.setLong(1, id); brandStatement.setString(2, brand); brandStatement.setLong(3, id); brandStatement.setString(4, brand); - - } - public static void setInsertHashParams(PreparedStatement hashStatement, Long id, String text) throws JSONException, SQLException { + } + + public static void setInsertHashParams(PreparedStatement hashStatement, + long id, String text) throws SQLException { hashStatement.setLong(1, id); hashStatement.setString(2, text); hashStatement.setLong(3, id); hashStatement.setString(4, text); - - } - public static void setInsertUrlParams(PreparedStatement UrlStatement, Long id, String text) throws JSONException, SQLException { + } + + public static void setInsertUrlParams(PreparedStatement UrlStatement, + long id, String text) throws SQLException { UrlStatement.setLong(1, id); UrlStatement.setString(2, text); UrlStatement.setLong(3, id); UrlStatement.setString(4, text); - - } - public static void setInsertMentionsParams(PreparedStatement UrlStatement, Long id, Long userid) throws JSONException, SQLException { + } + + public static void setInsertMentionsParams(PreparedStatement UrlStatement, + long id, long userid) throws SQLException { UrlStatement.setLong(1, id); UrlStatement.setLong(2, userid); UrlStatement.setLong(3, id); UrlStatement.setLong(4, userid); - - } - + } } diff --git a/src/io/FileTweetReader.java b/src/io/FileTweetReader.java new file mode 100644 index 0000000..216cc26 --- /dev/null +++ b/src/io/FileTweetReader.java @@ -0,0 +1,16 @@ +package io; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; + +/** + * Reads tweets from file. + * + * @author Peter Wu + */ +public class FileTweetReader extends TweetReader { + + public FileTweetReader(String filename) throws FileNotFoundException { + super(new FileInputStream(filename)); + } +} diff --git a/src/io/ITweetReader.java b/src/io/ITweetReader.java new file mode 100644 index 0000000..38dafbd --- /dev/null +++ b/src/io/ITweetReader.java @@ -0,0 +1,16 @@ +package io; + +import data.Tweet; +import java.io.IOException; + +/** + * Allow you to gather tweet object from some file. + * + * @author Peter Wu + */ +public interface ITweetReader { + + public Tweet getTweet() throws IOException; + + public void close(); +} diff --git a/src/io/TweetReader.java b/src/io/TweetReader.java new file mode 100644 index 0000000..d82978e --- /dev/null +++ b/src/io/TweetReader.java @@ -0,0 +1,57 @@ +package io; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.JsonSyntaxException; +import data.Tweet; +import data.ValidatingJsonDeserializer; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; + +/** + * Reads tweets from an InputStream. + * + * @author Peter Wu + */ +public class TweetReader implements ITweetReader { + + private final InputStream is; + private final BufferedReader reader; + private final Gson gson; + + public TweetReader(InputStream is) { + if (is == null) { + throw new NullPointerException(); + } + this.is = is; + reader = new BufferedReader(new InputStreamReader(is)); + GsonBuilder gsonBuilder = new GsonBuilder(); + ValidatingJsonDeserializer.addValidation(gsonBuilder, Tweet.class); + gson = gsonBuilder.create(); + } + + @Override + public Tweet getTweet() throws IOException { + String line = reader.readLine(); + Tweet tweet = null; + if (line != null) { + try { + tweet = gson.fromJson(line, Tweet.class); + } catch (JsonSyntaxException ex) { + // TODO: handle something? + throw ex; + } + } + return tweet; + } + + @Override + public void close() { + try { + is.close(); + } catch (IOException ex) { + } + } +} diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java index c9a5811..a65a032 100644 --- a/src/main/DataFiller.java +++ b/src/main/DataFiller.java @@ -1,21 +1,21 @@ package main; +import data.Tweet; import database.DBConnection; import database.QueryUtils; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.ArrayList; +import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import org.json.JSONException; -import org.json.JSONObject; /** * Process that incoming tweets and fill the database. * * @author Maurice Laveaux */ -public class DataFiller implements ResultListener { +public class DataFiller { /** * The main database connection to fill. @@ -26,7 +26,7 @@ public class DataFiller implements ResultListener { * A single insert tweet that can be used. */ private PreparedStatement m_insertTweet; - + /** * A single insert profiles that can be used. */ @@ -35,7 +35,7 @@ public class DataFiller implements ResultListener { * A single insert ispostedby that can be used. */ private PreparedStatement m_insertPosted; - + /** * A single insert brand that can be used. */ @@ -63,7 +63,7 @@ public class DataFiller implements ResultListener { */ public DataFiller(DBConnection connection) { try { - m_connection = connection; + m_connection = connection; m_insertTweet = m_connection.create(QueryUtils.insertTweet()); m_insertProfile = m_connection.create(QueryUtils.insertProfile()); m_insertPosted = m_connection.create(QueryUtils.insertPosted()); @@ -71,79 +71,68 @@ public class DataFiller implements ResultListener { m_insertHash = m_connection.create(QueryUtils.insertHash()); m_insertUrl = m_connection.create(QueryUtils.insertUrl()); m_insertMentions = m_connection.create(QueryUtils.insertMentions()); - - - + } catch (SQLException ex) { throw new RuntimeException(ex.getMessage()); } } - @Override - public void tweetReceived(JSONObject tweet) { + public void processTweet(Tweet tweet) { try { - - for(int i=0;i<tweet.getJSONObject("entities").getJSONArray("hashtags").length();i++){ - String text = tweet.getJSONObject("entities").getJSONArray("hashtags").getJSONObject(i).getString("text"); - QueryUtils.setInsertHashParams(m_insertHash, tweet.getLong("id"), text); + for (Tweet.Hashtag hashtag : tweet.entities.hashtags) { + QueryUtils.setInsertHashParams(m_insertHash, tweet.id, hashtag.text); m_insertHash.executeUpdate(); } - for(int i=0;i<tweet.getJSONObject("entities").getJSONArray("urls").length();i++){ - String text = tweet.getJSONObject("entities").getJSONArray("urls").getJSONObject(i).getString("expanded_url"); - QueryUtils.setInsertHashParams(m_insertUrl, tweet.getLong("id"), text); + for (Tweet.Url url : tweet.entities.urls) { + QueryUtils.setInsertHashParams(m_insertUrl, tweet.id, url.expanded_url); m_insertUrl.executeUpdate(); } - for(int i=0;i<tweet.getJSONObject("entities").getJSONArray("user_mentions").length();i++){ - Long id = tweet.getJSONObject("entities").getJSONArray("user_mentions").getJSONObject(i).getLong("id"); - QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.getLong("id"), id); + for (Tweet.Mention mention : tweet.entities.user_mentions) { + QueryUtils.setInsertMentionsParams(m_insertMentions, tweet.id, mention.id); m_insertMentions.executeUpdate(); } - - QueryUtils.setInsertParams(m_insertTweet,m_insertProfile,m_insertPosted, tweet); + + QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, m_insertPosted, tweet); m_insertTweet.executeUpdate(); m_insertProfile.executeUpdate(); m_insertPosted.executeUpdate(); - ArrayList<String> brands=getBrands(tweet); - for(String brand : brands){ - QueryUtils.setInsertBrandParams(m_insertBrand, tweet.getLong("id"), brand); + List<String> brands = getBrands(tweet); + for (String brand : brands) { + QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); m_insertBrand.executeUpdate(); } - } catch (SQLException | JSONException ex) { + } catch (SQLException ex) { Logger.getLogger(DataFiller.class.getName()).log(Level.SEVERE, null, ex); } } - ArrayList<String> getBrands(JSONObject tweet){ - ArrayList<String> result= new ArrayList<String>(); - String text=null; - try { - text = tweet.getString("text"); - text = text.toLowerCase(); - } catch (JSONException ex) { - + + ArrayList<String> getBrands(Tweet tweet) { + ArrayList<String> result = new ArrayList<>(); + String text = tweet.text.toLowerCase(); + if (text.contains("samsung") || text.contains("galaxy")) { + result.add("Samsung"); } - if(text.contains("samsung")||text.contains("galaxy")){ - result.add("Samsung"); - } - if (text.contains("htc")||text.contains("one")){ - result.add("HTC"); - } - if (text.contains("apple")||text.contains("iphone")){ - result.add("Apple"); - } - if (text.contains("sony")||text.contains("xperia")){ - result.add("Sony"); - } - if (text.contains("huawei")||text.contains("ascend")){ - result.add("Huawei"); - } - if (text.contains("lg")){ - result.add("LG"); - } - - if(result.isEmpty()) { - result.add("geen"); - System.out.println(text); - } - return result; + if (text.contains("htc") || text.contains("one")) { + result.add("HTC"); + } + if (text.contains("apple") || text.contains("iphone")) { + result.add("Apple"); + } + if (text.contains("sony") || text.contains("xperia")) { + result.add("Sony"); } + if (text.contains("huawei") || text.contains("ascend")) { + result.add("Huawei"); + } + if (text.contains("lg")) { + result.add("LG"); + } + + // TODO: WTF IS THIS PILE OF SHIT?! + if (result.isEmpty()) { + result.add("geen"); + System.out.println(text); + } + return result; + } } diff --git a/src/main/Main.java b/src/main/Main.java index f4f644b..5a8e137 100644 --- a/src/main/Main.java +++ b/src/main/Main.java @@ -1,9 +1,13 @@ package main; +import com.google.gson.JsonIOException; +import com.google.gson.JsonSyntaxException; +import data.Tweet; import database.DBConnection; -import io.DataReader; -import io.InputReader; -import java.util.Arrays; +import io.FileTweetReader; +import io.ITweetReader; +import io.TweetReader; +import java.io.IOException; /** * The main class. @@ -12,6 +16,7 @@ public class Main { /** * The main method of the application. + * * @param args the global arguments to pass to the program. */ public static void main(String[] args) { @@ -22,43 +27,56 @@ public class Main { System.exit(1); } } - + private String m_hostaddress; - + private String m_filename; - + public Main(String[] args) { /* parse the global options. */ parseGlobalOptions(args); - + if (m_hostaddress == null) { throw new IllegalArgumentException("Missing --dbhost to specify the hostaddress."); - } - + } + DBConnection connection = new DBConnection(m_hostaddress, "5432", "Twitter", "postgres", "2IOC02"); - /* create the object that fills the database */ + /* create the object that fills the database */ DataFiller filler = new DataFiller(connection); - - if (m_filename == null) { - InputReader reader = new InputReader(filler); - - reader.startLoop(); - } else { - DataReader reader = new DataReader(m_filename, filler); - reader.startloop(); + + ITweetReader reader = null; + try { + if (m_filename == null) { + reader = new TweetReader(System.in); + } else { + reader = new FileTweetReader(m_filename); + } + + Tweet tweet; + while ((tweet = reader.getTweet()) != null) { + filler.processTweet(tweet); + } + } catch (JsonSyntaxException ex) { + System.err.println("Got an invalid tweet: " + ex); + } catch (IOException ex) { + System.err.println("Cannot open tweets: " + ex); + } finally { + if (reader != null) { + reader.close(); + } } - + connection.close(); - + System.out.print("exit succesfull."); } - + private void parseGlobalOptions(String[] args) { /* parse global options */ for (int i = 0; i < args.length; i++) { if ("--help".equals(args[i])) { printHelp(); - } else if("--dbhost".equals(args[i])) { + } else if ("--dbhost".equals(args[i])) { m_hostaddress = getParam(args, ++i); } else if (args[i].startsWith("-")) { throw new IllegalArgumentException("Invalid option: " + args[i]); @@ -67,29 +85,33 @@ public class Main { m_filename = getParam(args, i); } } - + if (args.length == 0) { - throw new IllegalArgumentException("No parameters specified, see --help for usage info."); + throw new IllegalArgumentException("No parameters specified, see --help for usage info."); } } - - /** Read an extra option for a command. */ + + /** + * Read an extra option for a command. + */ private String getParam(String[] args, Integer index) { if (index + 1 <= args.length) { - index++; + index++; return args[index - 1]; } else { throw new IllegalArgumentException("An extra option was missing."); } } - - /** Print some useful help messages. */ + + /** + * Print some useful help messages. + */ private void printHelp() { for (String line : HELP) { System.out.println(line); } } - + private final static String[] HELP = { "Global options:", " --help Print this help text.", |