summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-07 11:12:42 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-07 11:12:42 +0200
commited029928c6a74a7e3a625d1f2ada16212c00511d (patch)
treed751f6d0647d40fb58107fcc88474263bf32a0c2
parentd56af15b3e343930f6674868c1d9be8a48f002ff (diff)
downloadTwitterDataAnalytics-ed029928c6a74a7e3a625d1f2ada16212c00511d.tar.gz
Get rid of separate profiles
These are always available from the tweets themselves...
-rw-r--r--src/io/CompressableDataWriter.java5
-rw-r--r--src/io/DataWriter.java35
-rw-r--r--src/io/StreamImpl.java9
-rw-r--r--src/main/TweetCounter.java7
-rw-r--r--src/main/TweetShell.java14
-rw-r--r--src/provider/CompositeResultListener.java7
-rw-r--r--src/provider/ResultListener.java7
-rw-r--r--src/utils/Configuration.java7
8 files changed, 12 insertions, 79 deletions
diff --git a/src/io/CompressableDataWriter.java b/src/io/CompressableDataWriter.java
index 84b2150..32531d1 100644
--- a/src/io/CompressableDataWriter.java
+++ b/src/io/CompressableDataWriter.java
@@ -32,15 +32,14 @@ public class CompressableDataWriter extends DataWriter {
* written to the compressed file. After successful conversion, the
* uncompressed file is removed.
*
- * @param profilesName The base filename to store user profiles.
* @param tweetsName The base filename to store tweets.
* @param convertAndRemoveUncompressed True if non-empty uncompressed files
* should be converted to compressed ones.
* @throws IOException
*/
- public CompressableDataWriter(String profilesName, String tweetsName,
+ public CompressableDataWriter(String tweetsName,
boolean convertAndRemoveUncompressed) throws IOException {
- super(profilesName, tweetsName);
+ super(tweetsName);
this.convertAndRemoveUncompressed = convertAndRemoveUncompressed;
}
diff --git a/src/io/DataWriter.java b/src/io/DataWriter.java
index 578c3a7..ec7a30e 100644
--- a/src/io/DataWriter.java
+++ b/src/io/DataWriter.java
@@ -20,7 +20,7 @@ import org.json.JSONObject;
import provider.ResultListener;
/**
- * This class writes the JSON objects for tweets and users to (separate) files.
+ * This class writes the JSON objects for tweets to files.
*
* @author Maurice Laveaux
*/
@@ -32,33 +32,18 @@ public class DataWriter implements ResultListener, Closeable {
private final Store m_tweet;
/**
- * the writer for the profile stream.
- */
- private final Store m_profile;
-
- /**
* the buffer of tweet ids that already exist.
*/
private final Set<Long> m_tweetIdSet;
- /**
- * the buffer of profile ids that already exist.
- */
- private final Set<Long> m_profileIdSet;
-
- public final static String CFG_PROFILE_FILENAME = "profiles-filename";
public final static String CFG_TWEETS_FILENAME = "tweets-filename";
/**
- * Creates an instance, specifying the profile and file names to which
- * tweets and users are written.
+ * Creates an instance, specifying the file to store tweets.
*
- * @param profilesName The file to write the profiles to.
* @param tweetsName The file to write the tweets to.
*/
- public DataWriter(final String profilesName, final String tweetsName) {
- m_profile = getStore(profilesName);
- m_profileIdSet = new HashSet<>();
+ public DataWriter(final String tweetsName) {
m_tweet = getStore(tweetsName);
m_tweetIdSet = new HashSet<>();
}
@@ -70,13 +55,10 @@ public class DataWriter implements ResultListener, Closeable {
*/
public void open() throws IOException {
try {
- readIds(m_profileIdSet, m_profile);
readIds(m_tweetIdSet, m_tweet);
// open after reading input to prevent simultaneous r/w access
- m_profile.open();
m_tweet.open();
} catch (IOException ex) {
- m_profile.close();
m_tweet.close();
throw ex;
}
@@ -92,14 +74,9 @@ public class DataWriter implements ResultListener, Closeable {
return new SimpleFileStore(filename);
}
+ @Override
public void close() {
m_tweet.close();
- m_profile.close();
- }
-
- @Override
- public void profileGenerated(JSONObject obj) {
- writeObject(obj, m_profile.getOutputStream(), m_profileIdSet);
}
@Override
@@ -108,8 +85,7 @@ public class DataWriter implements ResultListener, Closeable {
}
/**
- * Read the current existing tweetName and profileName filenames and fill
- * the existing id set.
+ * Read tweet IDs from file to avoid storing duplicate tweets later.
*
* @param is An input stream that provides JSON objects.
* @return The set of ids, may be empty if the fill does not exist.
@@ -156,7 +132,6 @@ public class DataWriter implements ResultListener, Closeable {
long id = obj.getLong("id");
if (!idSet.contains(id)) {
- // Write a single profile into the profile file.
try {
output.write((obj.toString() + "\n").getBytes(Charsets.UTF_8));
idSet.add(id);
diff --git a/src/io/StreamImpl.java b/src/io/StreamImpl.java
index 749be1c..21beae5 100644
--- a/src/io/StreamImpl.java
+++ b/src/io/StreamImpl.java
@@ -249,15 +249,6 @@ public class StreamImpl implements Stream {
}
private void processObject(JSONObject obj) {
- try {
- JSONObject user = obj.getJSONObject("user");
- resultListener.profileGenerated(user);
- } catch (JSONException ex) {
- // should not happen because the worker inserts tweets (which
- // assumes that a tweet has a user member).
- Logger.getLogger(getClass().getName())
- .severe("Expected a user in a tweet!");
- }
synchronized (resultListenerSync) {
if (resultListener != null) {
resultListener.tweetGenerated(obj);
diff --git a/src/main/TweetCounter.java b/src/main/TweetCounter.java
index 4c348b3..e922c5c 100644
--- a/src/main/TweetCounter.java
+++ b/src/main/TweetCounter.java
@@ -28,12 +28,9 @@ public class TweetCounter implements ResultListener {
@Override
public void tweetGenerated(JSONObject obj) {
tweetCount++;
- }
-
- @Override
- public void profileGenerated(JSONObject obj) {
try {
- String screen_name = obj.getString("screen_name");
+ JSONObject userObj = obj.getJSONObject("user");
+ String screen_name = userObj.getString("screen_name");
users.add(screen_name);
} catch (JSONException ex) {
LOGGER.log(Level.WARNING, "Profile is missing data", ex);
diff --git a/src/main/TweetShell.java b/src/main/TweetShell.java
index 608a15a..fae4125 100644
--- a/src/main/TweetShell.java
+++ b/src/main/TweetShell.java
@@ -97,15 +97,6 @@ public class TweetShell implements TwitterApi.PinSupplier {
}
}
- @Override
- public void profileGenerated(JSONObject obj) {
- try {
- System.out.println("Got user: " + obj.getString("name"));
- } catch (JSONException ex) {
- getLogger().log(Level.SEVERE, "Failed to parse user", ex);
- }
- }
-
private Logger getLogger() {
return Logger.getLogger(getClass().getName());
}
@@ -429,7 +420,6 @@ public class TweetShell implements TwitterApi.PinSupplier {
if (DataWriter.class.isAssignableFrom(rlCls)) {
Configuration config = Configuration.getConfig();
- String profilesFilename = config.getProperty(DataWriter.CFG_PROFILE_FILENAME);
String tweetsFilename = config.getProperty(DataWriter.CFG_TWEETS_FILENAME);
try {
DataWriter dw;
@@ -437,10 +427,10 @@ public class TweetShell implements TwitterApi.PinSupplier {
// compressed stream, convert by default (removing orig)
boolean convertUncompressed = Boolean.parseBoolean(
config.getProperty("convert-uncompressed", "true"));
- dw = new CompressableDataWriter(profilesFilename,
+ dw = new CompressableDataWriter(
tweetsFilename, convertUncompressed);
} else {
- dw = new DataWriter(profilesFilename, tweetsFilename);
+ dw = new DataWriter(tweetsFilename);
}
dw.open();
resultListeners.register(dw);
diff --git a/src/provider/CompositeResultListener.java b/src/provider/CompositeResultListener.java
index bc8ad74..dc4a754 100644
--- a/src/provider/CompositeResultListener.java
+++ b/src/provider/CompositeResultListener.java
@@ -48,13 +48,6 @@ public class CompositeResultListener implements ResultListener, Closeable {
}
@Override
- public void profileGenerated(JSONObject obj) {
- for (ResultListener rl : listeners) {
- rl.profileGenerated(obj);
- }
- }
-
- @Override
public void close() {
for (ResultListener rl : listeners) {
if (rl instanceof Closeable) {
diff --git a/src/provider/ResultListener.java b/src/provider/ResultListener.java
index 87bc786..6fd5e47 100644
--- a/src/provider/ResultListener.java
+++ b/src/provider/ResultListener.java
@@ -13,11 +13,4 @@ public interface ResultListener {
* @param obj A single JSON object.
*/
public void tweetGenerated(JSONObject obj);
-
- /**
- * This method is called when a new profile is provided.
- *
- * @param obj A single JSON object
- */
- public void profileGenerated(JSONObject obj);
}
diff --git a/src/utils/Configuration.java b/src/utils/Configuration.java
index 1e810c3..f737706 100644
--- a/src/utils/Configuration.java
+++ b/src/utils/Configuration.java
@@ -25,7 +25,6 @@ public class Configuration {
public static final String ACCESS_TOKEN_URL = "https://twitter.com/oauth/access_token";
public static final String DEFAULT_TWEETS_FILENAME = "tweets.txt";
- public static final String DEFAULT_PROFILE_FILENAME = "profiles.txt";
private final Properties properties;
private final File storeFile;
@@ -50,11 +49,7 @@ public class Configuration {
Properties defs = new Properties();
// set default preferences as needed
- String profilesFilename = defs.getProperty(DataWriter.CFG_PROFILE_FILENAME, "profiles.txt");
- String tweetsFilename = defs.getProperty(DataWriter.CFG_TWEETS_FILENAME, "tweets.txt");
-
- defs.setProperty(DataWriter.CFG_PROFILE_FILENAME, profilesFilename);
- defs.setProperty(DataWriter.CFG_TWEETS_FILENAME, tweetsFilename);
+ defs.setProperty(DataWriter.CFG_TWEETS_FILENAME, "tweets.txt");
return defs;
}