diff options
author | Peter Wu <peter@lekensteyn.nl> | 2014-05-02 19:20:32 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2014-05-02 19:20:32 +0200 |
commit | 4244f2d82536bad4ac2b4d5306daf84378828b60 (patch) | |
tree | 926a8beb29bbb74486f118824a906b818e758e78 /src/io/CompressableDataWriter.java | |
parent | 3d5a702a1a830314847ad3f13b121e7160bccbc9 (diff) | |
download | TwitterDataAnalytics-4244f2d82536bad4ac2b4d5306daf84378828b60.tar.gz |
Support compressed files
Diffstat (limited to 'src/io/CompressableDataWriter.java')
-rw-r--r-- | src/io/CompressableDataWriter.java | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/src/io/CompressableDataWriter.java b/src/io/CompressableDataWriter.java new file mode 100644 index 0000000..68bb789 --- /dev/null +++ b/src/io/CompressableDataWriter.java @@ -0,0 +1,88 @@ +package io; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; +import org.apache.commons.io.IOUtils; + +/** + * A DataWriter which can handle compressed data. + * + * @author Peter Wu + */ +public class CompressableDataWriter extends DataWriter { + + /** + * If true, then the output will always be a gzip-compressed stream + * (possibly converting the input). Otherwise, if the input was not + * compressed. then neither will the output be compressed. + */ + private final boolean convertUncompressed; + + public CompressableDataWriter(String profilesName, String tweetsName, + boolean convertUncompressed) throws IOException { + super(profilesName, tweetsName); + this.convertUncompressed = convertUncompressed; + } + + @Override + protected Store getStore(String filename) throws IOException { + return new CompressedStore(filename); + } + + class CompressedStore extends Store { + + private boolean compressable = false; + + CompressedStore(String filename) throws IOException { + super(filename); + BufferedInputStream bis = null; + try { + bis = new BufferedInputStream(new FileInputStream(getFileName())); + // file found containing magic? OK, gzip writable! + compressable = isCompressed(bis); + // TODO: convert uncompressed + } catch (FileNotFoundException ex) { + // file not found? Then we are free to write. + compressable = true; + } finally { + IOUtils.closeQuietly(bis); + } + // now prepare a compressed output stream if possible... + os = new FileOutputStream(getFileName(), true); + if (compressable) { + os = new GZIPOutputStream(os); + } + } + + protected boolean isCompressed(BufferedInputStream bis) { + try { + // file can be opened, check for GZIP magic to see whether it + // is compressed or not + byte[] header = new byte[2]; + if (bis.read(header, 0, header.length) == 2) { + int magic = (header[1] << 8) | header[0]; + return magic == GZIPInputStream.GZIP_MAGIC; + } + } catch (IOException ex) { + // file is too small, do not overwrite. + } + return false; + } + + @Override + public InputStream getInputStream() throws IOException { + FileInputStream fis = new FileInputStream(getFileName()); + if (compressable) { + return new GZIPInputStream(fis); + } else { + return fis; + } + } + } +} |