package main; import data.Tweet; import data.User; import database.NamedPreparedStatement; import database.QueryUtils; import java.sql.Connection; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; /** * Process that incoming tweets and fill the database. * * @author Maurice Laveaux */ public class DataFiller { /** * The main database connection to fill. */ private final Connection m_connection; private final NamedPreparedStatement m_insertTweet; private final NamedPreparedStatement m_insertProfile; private final NamedPreparedStatement m_insertBrand; private final NamedPreparedStatement m_insertHash; private final NamedPreparedStatement m_insertTweetUrl; private final NamedPreparedStatement m_insertUserUrl; private final NamedPreparedStatement m_insertMentions; /** * Create the datafiller object. * * @param connection The database connection to use. * @throws java.sql.SQLException on error preparing the database connection. */ public DataFiller(Connection connection) throws SQLException { m_connection = connection; m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet); m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile); m_insertBrand = new NamedPreparedStatement(m_connection, QueryUtils.insertBrand); m_insertHash = new NamedPreparedStatement(m_connection, QueryUtils.insertHash); m_insertTweetUrl = new NamedPreparedStatement(m_connection, QueryUtils.insertTweetUrl); m_insertUserUrl = new NamedPreparedStatement(m_connection, QueryUtils.insertUserUrl); m_insertMentions = new NamedPreparedStatement(m_connection, QueryUtils.insertMentions); } /** * Replaces three HTML entities ('&', '<', '>') by their normal * forms. Later, this might also strip useless characters. * * @param text The tweet message. * @return A sanitized form of the tweet text. */ private String sanitizeTweetText(String text) { // sample tweets, 12k contained '&', 2.6k '>' and 300 '<'. // Note: time_zone, description, expanded_url, location are unaffected. return text .replaceAll("<", "<") .replaceAll(">", ">") .replaceAll("&", "&"); } public void processTweet(Tweet tweet) throws SQLException { // process retweets first because of the foreign key. if (tweet.retweeted_status != null) { processTweet(tweet.retweeted_status); } String text = sanitizeTweetText(tweet.text); // ensure that the user and tweet are known before adding relations QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet, text); m_insertProfile.executeUpdate(); m_insertTweet.executeUpdate(); for (Tweet.Hashtag hashtag : tweet.entities.hashtags) { m_insertHash.setLong("tweetid", tweet.id); m_insertHash.setString("hashtag", hashtag.text); m_insertHash.executeUpdate(); } for (Tweet.Url url : tweet.entities.urls) { m_insertTweetUrl.setLong("tweetid", tweet.id); m_insertTweetUrl.setString("url", url.expanded_url); m_insertTweetUrl.executeUpdate(); } for (Tweet.Mention mention : tweet.entities.user_mentions) { m_insertMentions.setLong("tweetid", tweet.id); m_insertMentions.setLong("userid", mention.id); m_insertMentions.executeUpdate(); } User user = tweet.user; if (user.entities != null) { for (Tweet.Url url : tweet.entities.urls) { m_insertUserUrl.setLong("userid", user.id); m_insertUserUrl.setString("url", url.expanded_url); m_insertUserUrl.executeUpdate(); } } // determine the user's perception of the brand List brands = getBrands(tweet); for (String brand : brands) { QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); m_insertBrand.executeUpdate(); } } ArrayList getBrands(Tweet tweet) { ArrayList result = new ArrayList<>(); String text = tweet.text.toLowerCase(); if (text.contains("samsung") || text.contains("galaxy")) { result.add("Samsung"); } if (text.contains("htc") || text.contains("one")) { result.add("HTC"); } if (text.contains("apple") || text.contains("iphone")) { result.add("Apple"); } if (text.contains("sony") || text.contains("xperia")) { result.add("Sony"); } if (text.contains("huawei") || text.contains("ascend")) { result.add("Huawei"); } if (text.contains("lg")) { result.add("LG"); } // TODO: WTF IS THIS PILE OF SHIT?! return result; } }