summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-02 19:20:32 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-02 19:20:32 +0200
commit4244f2d82536bad4ac2b4d5306daf84378828b60 (patch)
tree926a8beb29bbb74486f118824a906b818e758e78
parent3d5a702a1a830314847ad3f13b121e7160bccbc9 (diff)
downloadTwitterDataAnalytics-4244f2d82536bad4ac2b4d5306daf84378828b60.tar.gz
Support compressed files
-rw-r--r--src/io/CompressableDataWriter.java88
-rw-r--r--src/main/TweetShell.java13
2 files changed, 99 insertions, 2 deletions
diff --git a/src/io/CompressableDataWriter.java b/src/io/CompressableDataWriter.java
new file mode 100644
index 0000000..68bb789
--- /dev/null
+++ b/src/io/CompressableDataWriter.java
@@ -0,0 +1,88 @@
+package io;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+import org.apache.commons.io.IOUtils;
+
+/**
+ * A DataWriter which can handle compressed data.
+ *
+ * @author Peter Wu
+ */
+public class CompressableDataWriter extends DataWriter {
+
+ /**
+ * If true, then the output will always be a gzip-compressed stream
+ * (possibly converting the input). Otherwise, if the input was not
+ * compressed. then neither will the output be compressed.
+ */
+ private final boolean convertUncompressed;
+
+ public CompressableDataWriter(String profilesName, String tweetsName,
+ boolean convertUncompressed) throws IOException {
+ super(profilesName, tweetsName);
+ this.convertUncompressed = convertUncompressed;
+ }
+
+ @Override
+ protected Store getStore(String filename) throws IOException {
+ return new CompressedStore(filename);
+ }
+
+ class CompressedStore extends Store {
+
+ private boolean compressable = false;
+
+ CompressedStore(String filename) throws IOException {
+ super(filename);
+ BufferedInputStream bis = null;
+ try {
+ bis = new BufferedInputStream(new FileInputStream(getFileName()));
+ // file found containing magic? OK, gzip writable!
+ compressable = isCompressed(bis);
+ // TODO: convert uncompressed
+ } catch (FileNotFoundException ex) {
+ // file not found? Then we are free to write.
+ compressable = true;
+ } finally {
+ IOUtils.closeQuietly(bis);
+ }
+ // now prepare a compressed output stream if possible...
+ os = new FileOutputStream(getFileName(), true);
+ if (compressable) {
+ os = new GZIPOutputStream(os);
+ }
+ }
+
+ protected boolean isCompressed(BufferedInputStream bis) {
+ try {
+ // file can be opened, check for GZIP magic to see whether it
+ // is compressed or not
+ byte[] header = new byte[2];
+ if (bis.read(header, 0, header.length) == 2) {
+ int magic = (header[1] << 8) | header[0];
+ return magic == GZIPInputStream.GZIP_MAGIC;
+ }
+ } catch (IOException ex) {
+ // file is too small, do not overwrite.
+ }
+ return false;
+ }
+
+ @Override
+ public InputStream getInputStream() throws IOException {
+ FileInputStream fis = new FileInputStream(getFileName());
+ if (compressable) {
+ return new GZIPInputStream(fis);
+ } else {
+ return fis;
+ }
+ }
+ }
+}
diff --git a/src/main/TweetShell.java b/src/main/TweetShell.java
index 1f99999..a0cfec6 100644
--- a/src/main/TweetShell.java
+++ b/src/main/TweetShell.java
@@ -1,5 +1,6 @@
package main;
+import io.CompressableDataWriter;
import io.DataWriter;
import io.OAuthRequester;
import io.StreamImpl;
@@ -320,6 +321,7 @@ public class TweetShell implements TwitterApi.PinSupplier {
private ClassEnabledTracker<ResultListener> getPossibleTargets() {
Map<String, Class<? extends ResultListener>> targets = new TreeMap<>();
targets.put("file", DataWriter.class);
+ targets.put("cfile", CompressableDataWriter.class);
targets.put("shell", StreamHandler.class);
ClassEnabledTracker<ResultListener> targetFoo = new ClassEnabledTracker<>(targets);
@@ -376,13 +378,20 @@ public class TweetShell implements TwitterApi.PinSupplier {
return false;
}
- if (rlCls == DataWriter.class) {
+ if (DataWriter.class.isAssignableFrom(rlCls)) {
Configuration config = Configuration.getConfig();
String profilesFilename = config.getProperty(DataWriter.CFG_PROFILE_FILENAME);
String tweetsFilename = config.getProperty(DataWriter.CFG_TWEETS_FILENAME);
try {
- DataWriter dw = new DataWriter(profilesFilename, tweetsFilename);
+ DataWriter dw;
+ if (CompressableDataWriter.class.isAssignableFrom(rlCls)) {
+ // compressed stream, do not convert uncompressed input
+ dw = new CompressableDataWriter(profilesFilename,
+ tweetsFilename, false);
+ } else {
+ dw = new DataWriter(profilesFilename, tweetsFilename);
+ }
resultListeners.register(dw);
// save the changes to the config.
config.save();