summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-02 17:31:12 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-02 17:31:12 +0200
commit2287f5d537cc327b3e1d513ddec2e229ece21d24 (patch)
tree1ccdfb5e4e5b57ebd7e8992c7085f3c0f87adbd7
parent42bc583b80535affa5b1235d4c534984a738c958 (diff)
downloadTwitterDataAnalytics-2287f5d537cc327b3e1d513ddec2e229ece21d24.tar.gz
Make DataWriter more generic
Prepare for compressed file formats, this requires that the data writer can handle compressed files. In theory an implementation can even write stuff over the network instead of a file.
-rw-r--r--src/io/DataWriter.java51
1 files changed, 40 insertions, 11 deletions
diff --git a/src/io/DataWriter.java b/src/io/DataWriter.java
index 42d69f8..841b27d 100644
--- a/src/io/DataWriter.java
+++ b/src/io/DataWriter.java
@@ -1,20 +1,23 @@
package io;
-import java.io.File;
+import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.apache.commons.io.Charsets;
import org.json.JSONException;
import org.json.JSONObject;
import provider.ResultListener;
/**
- * This class writes the output data into seperate files.
+ * This class writes the JSON objects for tweets and users to (separate) files.
*
* @author Maurice Laveaux
*/
@@ -23,12 +26,12 @@ public class DataWriter implements ResultListener {
/**
* The writer for the tweet stream.
*/
- private final FileWriter m_tweetWriter;
+ private final OutputStream m_tweetWriter;
/**
* the writer for the profile stream.
*/
- private final FileWriter m_profileWriter;
+ private final OutputStream m_profileWriter;
/**
* the buffer of tweet ids that already exist.
@@ -53,10 +56,34 @@ public class DataWriter implements ResultListener {
public DataWriter(final String profilesName, final String tweetsName)
throws IOException {
m_profileIdSet = readIds(profilesName);
- m_profileWriter = new FileWriter(profilesName, true);
+ m_profileWriter = getFileWriter(profilesName);
m_tweetIdSet = readIds(tweetsName);
- m_tweetWriter = new FileWriter(tweetsName, true);
+ m_tweetWriter = getFileWriter(tweetsName);
+ }
+
+ /**
+ * Given a filename, return a suitable input stream.
+ *
+ * @param tweetsName File name.
+ * @return A stream from which JSON objects can be read (one per line).
+ * @throws IOException
+ */
+ protected InputStream getFileReader(String tweetsName)
+ throws IOException {
+ return new FileInputStream(tweetsName);
+ }
+
+ /**
+ * Given a filename, return a suitable output stream.
+ *
+ * @param tweetsName File name.
+ * @return A stream to which JSON objects can be written (one per line).
+ * @throws IOException
+ */
+ protected OutputStream getFileWriter(String tweetsName)
+ throws IOException {
+ return new FileOutputStream(tweetsName, true);
}
public void close() {
@@ -88,7 +115,8 @@ public class DataWriter implements ResultListener {
private Set readIds(String filename) throws IOException {
Set<Long> idSet = new HashSet<>();
try {
- Scanner reader = new Scanner(new File(filename));
+ InputStream is = getFileReader(filename);
+ Scanner reader = new Scanner(is);
// parse each line into a JSONObject, read the id and add it to
// the set of ids.
while (reader.hasNext()) {
@@ -109,17 +137,18 @@ public class DataWriter implements ResultListener {
* Writes the JSONObject to a writer and update the idSet.
*
* @param obj The object to write.
- * @param writer The writer object to append the object to.
+ * @param output The stream to write objects to.
* @param idSet The id set to add the obj id to.
*/
- private void writeObject(JSONObject obj, FileWriter writer, Set<Long> idSet) {
+ private void writeObject(JSONObject obj, OutputStream output,
+ Set<Long> idSet) {
try {
long id = obj.getLong("id");
if (!idSet.contains(id)) {
// Write a single profile into the profile file.
try {
- writer.write(obj.toString() + "\n");
+ output.write((obj.toString() + "\n").getBytes(Charsets.UTF_8));
idSet.add(id);
} catch (IOException ex) {
getLogger().log(Level.WARNING, "Cannot write to file", ex);