From 4244f2d82536bad4ac2b4d5306daf84378828b60 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 2 May 2014 19:20:32 +0200 Subject: Support compressed files --- src/io/CompressableDataWriter.java | 88 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 src/io/CompressableDataWriter.java (limited to 'src/io') diff --git a/src/io/CompressableDataWriter.java b/src/io/CompressableDataWriter.java new file mode 100644 index 0000000..68bb789 --- /dev/null +++ b/src/io/CompressableDataWriter.java @@ -0,0 +1,88 @@ +package io; + +import java.io.BufferedInputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; +import org.apache.commons.io.IOUtils; + +/** + * A DataWriter which can handle compressed data. + * + * @author Peter Wu + */ +public class CompressableDataWriter extends DataWriter { + + /** + * If true, then the output will always be a gzip-compressed stream + * (possibly converting the input). Otherwise, if the input was not + * compressed. then neither will the output be compressed. + */ + private final boolean convertUncompressed; + + public CompressableDataWriter(String profilesName, String tweetsName, + boolean convertUncompressed) throws IOException { + super(profilesName, tweetsName); + this.convertUncompressed = convertUncompressed; + } + + @Override + protected Store getStore(String filename) throws IOException { + return new CompressedStore(filename); + } + + class CompressedStore extends Store { + + private boolean compressable = false; + + CompressedStore(String filename) throws IOException { + super(filename); + BufferedInputStream bis = null; + try { + bis = new BufferedInputStream(new FileInputStream(getFileName())); + // file found containing magic? OK, gzip writable! + compressable = isCompressed(bis); + // TODO: convert uncompressed + } catch (FileNotFoundException ex) { + // file not found? Then we are free to write. + compressable = true; + } finally { + IOUtils.closeQuietly(bis); + } + // now prepare a compressed output stream if possible... + os = new FileOutputStream(getFileName(), true); + if (compressable) { + os = new GZIPOutputStream(os); + } + } + + protected boolean isCompressed(BufferedInputStream bis) { + try { + // file can be opened, check for GZIP magic to see whether it + // is compressed or not + byte[] header = new byte[2]; + if (bis.read(header, 0, header.length) == 2) { + int magic = (header[1] << 8) | header[0]; + return magic == GZIPInputStream.GZIP_MAGIC; + } + } catch (IOException ex) { + // file is too small, do not overwrite. + } + return false; + } + + @Override + public InputStream getInputStream() throws IOException { + FileInputStream fis = new FileInputStream(getFileName()); + if (compressable) { + return new GZIPInputStream(fis); + } else { + return fis; + } + } + } +} -- cgit v1.2.1