summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/io/DataWriter.java51
1 files changed, 40 insertions, 11 deletions
diff --git a/src/io/DataWriter.java b/src/io/DataWriter.java
index 42d69f8..841b27d 100644
--- a/src/io/DataWriter.java
+++ b/src/io/DataWriter.java
@@ -1,20 +1,23 @@
package io;
-import java.io.File;
+import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.util.HashSet;
import java.util.Scanner;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.apache.commons.io.Charsets;
import org.json.JSONException;
import org.json.JSONObject;
import provider.ResultListener;
/**
- * This class writes the output data into seperate files.
+ * This class writes the JSON objects for tweets and users to (separate) files.
*
* @author Maurice Laveaux
*/
@@ -23,12 +26,12 @@ public class DataWriter implements ResultListener {
/**
* The writer for the tweet stream.
*/
- private final FileWriter m_tweetWriter;
+ private final OutputStream m_tweetWriter;
/**
* the writer for the profile stream.
*/
- private final FileWriter m_profileWriter;
+ private final OutputStream m_profileWriter;
/**
* the buffer of tweet ids that already exist.
@@ -53,10 +56,34 @@ public class DataWriter implements ResultListener {
public DataWriter(final String profilesName, final String tweetsName)
throws IOException {
m_profileIdSet = readIds(profilesName);
- m_profileWriter = new FileWriter(profilesName, true);
+ m_profileWriter = getFileWriter(profilesName);
m_tweetIdSet = readIds(tweetsName);
- m_tweetWriter = new FileWriter(tweetsName, true);
+ m_tweetWriter = getFileWriter(tweetsName);
+ }
+
+ /**
+ * Given a filename, return a suitable input stream.
+ *
+ * @param tweetsName File name.
+ * @return A stream from which JSON objects can be read (one per line).
+ * @throws IOException
+ */
+ protected InputStream getFileReader(String tweetsName)
+ throws IOException {
+ return new FileInputStream(tweetsName);
+ }
+
+ /**
+ * Given a filename, return a suitable output stream.
+ *
+ * @param tweetsName File name.
+ * @return A stream to which JSON objects can be written (one per line).
+ * @throws IOException
+ */
+ protected OutputStream getFileWriter(String tweetsName)
+ throws IOException {
+ return new FileOutputStream(tweetsName, true);
}
public void close() {
@@ -88,7 +115,8 @@ public class DataWriter implements ResultListener {
private Set readIds(String filename) throws IOException {
Set<Long> idSet = new HashSet<>();
try {
- Scanner reader = new Scanner(new File(filename));
+ InputStream is = getFileReader(filename);
+ Scanner reader = new Scanner(is);
// parse each line into a JSONObject, read the id and add it to
// the set of ids.
while (reader.hasNext()) {
@@ -109,17 +137,18 @@ public class DataWriter implements ResultListener {
* Writes the JSONObject to a writer and update the idSet.
*
* @param obj The object to write.
- * @param writer The writer object to append the object to.
+ * @param output The stream to write objects to.
* @param idSet The id set to add the obj id to.
*/
- private void writeObject(JSONObject obj, FileWriter writer, Set<Long> idSet) {
+ private void writeObject(JSONObject obj, OutputStream output,
+ Set<Long> idSet) {
try {
long id = obj.getLong("id");
if (!idSet.contains(id)) {
// Write a single profile into the profile file.
try {
- writer.write(obj.toString() + "\n");
+ output.write((obj.toString() + "\n").getBytes(Charsets.UTF_8));
idSet.add(id);
} catch (IOException ex) {
getLogger().log(Level.WARNING, "Cannot write to file", ex);