From ed029928c6a74a7e3a625d1f2ada16212c00511d Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 7 May 2014 11:12:42 +0200 Subject: Get rid of separate profiles These are always available from the tweets themselves... --- src/io/CompressableDataWriter.java | 5 ++--- src/io/DataWriter.java | 35 +++++-------------------------- src/io/StreamImpl.java | 9 -------- src/main/TweetCounter.java | 7 ++----- src/main/TweetShell.java | 14 ++----------- src/provider/CompositeResultListener.java | 7 ------- src/provider/ResultListener.java | 7 ------- src/utils/Configuration.java | 7 +------ 8 files changed, 12 insertions(+), 79 deletions(-) diff --git a/src/io/CompressableDataWriter.java b/src/io/CompressableDataWriter.java index 84b2150..32531d1 100644 --- a/src/io/CompressableDataWriter.java +++ b/src/io/CompressableDataWriter.java @@ -32,15 +32,14 @@ public class CompressableDataWriter extends DataWriter { * written to the compressed file. After successful conversion, the * uncompressed file is removed. * - * @param profilesName The base filename to store user profiles. * @param tweetsName The base filename to store tweets. * @param convertAndRemoveUncompressed True if non-empty uncompressed files * should be converted to compressed ones. * @throws IOException */ - public CompressableDataWriter(String profilesName, String tweetsName, + public CompressableDataWriter(String tweetsName, boolean convertAndRemoveUncompressed) throws IOException { - super(profilesName, tweetsName); + super(tweetsName); this.convertAndRemoveUncompressed = convertAndRemoveUncompressed; } diff --git a/src/io/DataWriter.java b/src/io/DataWriter.java index 578c3a7..ec7a30e 100644 --- a/src/io/DataWriter.java +++ b/src/io/DataWriter.java @@ -20,7 +20,7 @@ import org.json.JSONObject; import provider.ResultListener; /** - * This class writes the JSON objects for tweets and users to (separate) files. + * This class writes the JSON objects for tweets to files. * * @author Maurice Laveaux */ @@ -31,34 +31,19 @@ public class DataWriter implements ResultListener, Closeable { */ private final Store m_tweet; - /** - * the writer for the profile stream. - */ - private final Store m_profile; - /** * the buffer of tweet ids that already exist. */ private final Set m_tweetIdSet; - /** - * the buffer of profile ids that already exist. - */ - private final Set m_profileIdSet; - - public final static String CFG_PROFILE_FILENAME = "profiles-filename"; public final static String CFG_TWEETS_FILENAME = "tweets-filename"; /** - * Creates an instance, specifying the profile and file names to which - * tweets and users are written. + * Creates an instance, specifying the file to store tweets. * - * @param profilesName The file to write the profiles to. * @param tweetsName The file to write the tweets to. */ - public DataWriter(final String profilesName, final String tweetsName) { - m_profile = getStore(profilesName); - m_profileIdSet = new HashSet<>(); + public DataWriter(final String tweetsName) { m_tweet = getStore(tweetsName); m_tweetIdSet = new HashSet<>(); } @@ -70,13 +55,10 @@ public class DataWriter implements ResultListener, Closeable { */ public void open() throws IOException { try { - readIds(m_profileIdSet, m_profile); readIds(m_tweetIdSet, m_tweet); // open after reading input to prevent simultaneous r/w access - m_profile.open(); m_tweet.open(); } catch (IOException ex) { - m_profile.close(); m_tweet.close(); throw ex; } @@ -92,14 +74,9 @@ public class DataWriter implements ResultListener, Closeable { return new SimpleFileStore(filename); } + @Override public void close() { m_tweet.close(); - m_profile.close(); - } - - @Override - public void profileGenerated(JSONObject obj) { - writeObject(obj, m_profile.getOutputStream(), m_profileIdSet); } @Override @@ -108,8 +85,7 @@ public class DataWriter implements ResultListener, Closeable { } /** - * Read the current existing tweetName and profileName filenames and fill - * the existing id set. + * Read tweet IDs from file to avoid storing duplicate tweets later. * * @param is An input stream that provides JSON objects. * @return The set of ids, may be empty if the fill does not exist. @@ -156,7 +132,6 @@ public class DataWriter implements ResultListener, Closeable { long id = obj.getLong("id"); if (!idSet.contains(id)) { - // Write a single profile into the profile file. try { output.write((obj.toString() + "\n").getBytes(Charsets.UTF_8)); idSet.add(id); diff --git a/src/io/StreamImpl.java b/src/io/StreamImpl.java index 749be1c..21beae5 100644 --- a/src/io/StreamImpl.java +++ b/src/io/StreamImpl.java @@ -249,15 +249,6 @@ public class StreamImpl implements Stream { } private void processObject(JSONObject obj) { - try { - JSONObject user = obj.getJSONObject("user"); - resultListener.profileGenerated(user); - } catch (JSONException ex) { - // should not happen because the worker inserts tweets (which - // assumes that a tweet has a user member). - Logger.getLogger(getClass().getName()) - .severe("Expected a user in a tweet!"); - } synchronized (resultListenerSync) { if (resultListener != null) { resultListener.tweetGenerated(obj); diff --git a/src/main/TweetCounter.java b/src/main/TweetCounter.java index 4c348b3..e922c5c 100644 --- a/src/main/TweetCounter.java +++ b/src/main/TweetCounter.java @@ -28,12 +28,9 @@ public class TweetCounter implements ResultListener { @Override public void tweetGenerated(JSONObject obj) { tweetCount++; - } - - @Override - public void profileGenerated(JSONObject obj) { try { - String screen_name = obj.getString("screen_name"); + JSONObject userObj = obj.getJSONObject("user"); + String screen_name = userObj.getString("screen_name"); users.add(screen_name); } catch (JSONException ex) { LOGGER.log(Level.WARNING, "Profile is missing data", ex); diff --git a/src/main/TweetShell.java b/src/main/TweetShell.java index 608a15a..fae4125 100644 --- a/src/main/TweetShell.java +++ b/src/main/TweetShell.java @@ -97,15 +97,6 @@ public class TweetShell implements TwitterApi.PinSupplier { } } - @Override - public void profileGenerated(JSONObject obj) { - try { - System.out.println("Got user: " + obj.getString("name")); - } catch (JSONException ex) { - getLogger().log(Level.SEVERE, "Failed to parse user", ex); - } - } - private Logger getLogger() { return Logger.getLogger(getClass().getName()); } @@ -429,7 +420,6 @@ public class TweetShell implements TwitterApi.PinSupplier { if (DataWriter.class.isAssignableFrom(rlCls)) { Configuration config = Configuration.getConfig(); - String profilesFilename = config.getProperty(DataWriter.CFG_PROFILE_FILENAME); String tweetsFilename = config.getProperty(DataWriter.CFG_TWEETS_FILENAME); try { DataWriter dw; @@ -437,10 +427,10 @@ public class TweetShell implements TwitterApi.PinSupplier { // compressed stream, convert by default (removing orig) boolean convertUncompressed = Boolean.parseBoolean( config.getProperty("convert-uncompressed", "true")); - dw = new CompressableDataWriter(profilesFilename, + dw = new CompressableDataWriter( tweetsFilename, convertUncompressed); } else { - dw = new DataWriter(profilesFilename, tweetsFilename); + dw = new DataWriter(tweetsFilename); } dw.open(); resultListeners.register(dw); diff --git a/src/provider/CompositeResultListener.java b/src/provider/CompositeResultListener.java index bc8ad74..dc4a754 100644 --- a/src/provider/CompositeResultListener.java +++ b/src/provider/CompositeResultListener.java @@ -47,13 +47,6 @@ public class CompositeResultListener implements ResultListener, Closeable { } } - @Override - public void profileGenerated(JSONObject obj) { - for (ResultListener rl : listeners) { - rl.profileGenerated(obj); - } - } - @Override public void close() { for (ResultListener rl : listeners) { diff --git a/src/provider/ResultListener.java b/src/provider/ResultListener.java index 87bc786..6fd5e47 100644 --- a/src/provider/ResultListener.java +++ b/src/provider/ResultListener.java @@ -13,11 +13,4 @@ public interface ResultListener { * @param obj A single JSON object. */ public void tweetGenerated(JSONObject obj); - - /** - * This method is called when a new profile is provided. - * - * @param obj A single JSON object - */ - public void profileGenerated(JSONObject obj); } diff --git a/src/utils/Configuration.java b/src/utils/Configuration.java index 1e810c3..f737706 100644 --- a/src/utils/Configuration.java +++ b/src/utils/Configuration.java @@ -25,7 +25,6 @@ public class Configuration { public static final String ACCESS_TOKEN_URL = "https://twitter.com/oauth/access_token"; public static final String DEFAULT_TWEETS_FILENAME = "tweets.txt"; - public static final String DEFAULT_PROFILE_FILENAME = "profiles.txt"; private final Properties properties; private final File storeFile; @@ -50,11 +49,7 @@ public class Configuration { Properties defs = new Properties(); // set default preferences as needed - String profilesFilename = defs.getProperty(DataWriter.CFG_PROFILE_FILENAME, "profiles.txt"); - String tweetsFilename = defs.getProperty(DataWriter.CFG_TWEETS_FILENAME, "tweets.txt"); - - defs.setProperty(DataWriter.CFG_PROFILE_FILENAME, profilesFilename); - defs.setProperty(DataWriter.CFG_TWEETS_FILENAME, tweetsFilename); + defs.setProperty(DataWriter.CFG_TWEETS_FILENAME, "tweets.txt"); return defs; } -- cgit v1.2.1