diff options
-rw-r--r-- | src/io/DataWriter.java | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/src/io/DataWriter.java b/src/io/DataWriter.java index 42d69f8..841b27d 100644 --- a/src/io/DataWriter.java +++ b/src/io/DataWriter.java @@ -1,20 +1,23 @@ package io; -import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileWriter; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.HashSet; import java.util.Scanner; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; +import org.apache.commons.io.Charsets; import org.json.JSONException; import org.json.JSONObject; import provider.ResultListener; /** - * This class writes the output data into seperate files. + * This class writes the JSON objects for tweets and users to (separate) files. * * @author Maurice Laveaux */ @@ -23,12 +26,12 @@ public class DataWriter implements ResultListener { /** * The writer for the tweet stream. */ - private final FileWriter m_tweetWriter; + private final OutputStream m_tweetWriter; /** * the writer for the profile stream. */ - private final FileWriter m_profileWriter; + private final OutputStream m_profileWriter; /** * the buffer of tweet ids that already exist. @@ -53,10 +56,34 @@ public class DataWriter implements ResultListener { public DataWriter(final String profilesName, final String tweetsName) throws IOException { m_profileIdSet = readIds(profilesName); - m_profileWriter = new FileWriter(profilesName, true); + m_profileWriter = getFileWriter(profilesName); m_tweetIdSet = readIds(tweetsName); - m_tweetWriter = new FileWriter(tweetsName, true); + m_tweetWriter = getFileWriter(tweetsName); + } + + /** + * Given a filename, return a suitable input stream. + * + * @param tweetsName File name. + * @return A stream from which JSON objects can be read (one per line). + * @throws IOException + */ + protected InputStream getFileReader(String tweetsName) + throws IOException { + return new FileInputStream(tweetsName); + } + + /** + * Given a filename, return a suitable output stream. + * + * @param tweetsName File name. + * @return A stream to which JSON objects can be written (one per line). + * @throws IOException + */ + protected OutputStream getFileWriter(String tweetsName) + throws IOException { + return new FileOutputStream(tweetsName, true); } public void close() { @@ -88,7 +115,8 @@ public class DataWriter implements ResultListener { private Set readIds(String filename) throws IOException { Set<Long> idSet = new HashSet<>(); try { - Scanner reader = new Scanner(new File(filename)); + InputStream is = getFileReader(filename); + Scanner reader = new Scanner(is); // parse each line into a JSONObject, read the id and add it to // the set of ids. while (reader.hasNext()) { @@ -109,17 +137,18 @@ public class DataWriter implements ResultListener { * Writes the JSONObject to a writer and update the idSet. * * @param obj The object to write. - * @param writer The writer object to append the object to. + * @param output The stream to write objects to. * @param idSet The id set to add the obj id to. */ - private void writeObject(JSONObject obj, FileWriter writer, Set<Long> idSet) { + private void writeObject(JSONObject obj, OutputStream output, + Set<Long> idSet) { try { long id = obj.getLong("id"); if (!idSet.contains(id)) { // Write a single profile into the profile file. try { - writer.write(obj.toString() + "\n"); + output.write((obj.toString() + "\n").getBytes(Charsets.UTF_8)); idSet.add(id); } catch (IOException ex) { getLogger().log(Level.WARNING, "Cannot write to file", ex); |