package io; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.logging.Logger; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.apache.commons.io.IOUtils; /** * A DataWriter which can handle compressed data. * * @author Peter Wu */ public class CompressableDataWriter extends DataWriter { /** * If true, then any uncompressed files will be converted to the compressed * one, removing the uncompressed original. */ private final boolean convertAndRemoveUncompressed; /** * A DataWriter that writes compressed files. Compressed files will get a * ".gz" extension. If both the compressed and uncompressed files exist and * conversion is requested, then the contents of the uncompressed file is * written to the compressed file. After successful conversion, the * uncompressed file is removed. * * @param tweetsName The base filename to store tweets. * @param convertAndRemoveUncompressed True if non-empty uncompressed files * should be converted to compressed ones. * @throws IOException */ public CompressableDataWriter(String tweetsName, boolean convertAndRemoveUncompressed) throws IOException { super(tweetsName); this.convertAndRemoveUncompressed = convertAndRemoveUncompressed; } @Override protected Store getStore(String filename) { return new CompressedStore(filename); } class CompressedStore extends Store { private static final String FILE_EXT = ".gz"; CompressedStore(String filename) { super(filename); } private String getFileNameGz() { String filename = getFileName(); if (!getFileName().endsWith(FILE_EXT)) { filename += FILE_EXT; } return filename; } @Override public void open() throws IOException { if (os != null) { // already open, don't bother. return; } // throws FileNotFoundException if the dirs do not exist... os = new FileOutputStream(getFileNameGz(), true); try { os = new GZIPOutputStream(os); // try to convert uncompressed files if necessary if (convertAndRemoveUncompressed) { if (!getFileName().equals(getFileNameGz())) { doConvertUncompressed(getFileName(), getFileNameGz()); } } } catch (IOException ex) { IOUtils.closeQuietly(os); os = null; throw ex; } } private void doConvertUncompressed(String filename, String filenameGz) throws IOException { BufferedInputStream bis = null; try { // original, uncompressed file. File bigFile = new File(filename); bis = new BufferedInputStream(new FileInputStream(bigFile)); long origSize = bigFile.length(); long origSizeGz = new File(filenameGz).length(); // start converting the uncompressed file IOUtils.copy(bis, os); // flush all data to file to have a more correct size ((GZIPOutputStream) os).finish(); os.flush(); // done, now try to remove the big blob and print some stats. IOUtils.closeQuietly(bis); if (!bigFile.delete()) { getLogger().info(filename + ": converted file, " + "but cannot remove the old, uncompressed file."); } // calculate effective size of newly compressed part long newSize = new File(filenameGz).length(); newSize -= origSizeGz; printStats(filename, origSize, newSize); } catch (FileNotFoundException ex) { // file not found? No problem, nothing to convert then. } finally { IOUtils.closeQuietly(bis); } } private void printStats(String filename, long origSize, long newSize) { String msg = "Succesfully converted " + filename + ". "; msg += "Uncompressed size: " + origSize + "; " + "compressed size: " + newSize + "."; if (newSize > 0 && origSize > 0) { long bytesSaved = origSize - newSize; long perc = 100 * -bytesSaved / origSize; if (bytesSaved > 0) { msg += " Saved " + bytesSaved + " bytes (" + perc + "%). "; } } getLogger().info(msg); } private Logger getLogger() { return Logger.getLogger(this.getClass().getName()); } @Override public InputStream getInputStream() throws IOException { FileInputStream fis = new FileInputStream(getFileNameGz()); try { return new GZIPInputStream(fis); } catch (IOException ex) { IOUtils.closeQuietly(fis); throw ex; } } } }