package io; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.StandardCopyOption; import java.util.logging.Level; import java.util.logging.Logger; import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.apache.commons.io.IOUtils; /** * A DataWriter which can handle compressed data. * * @author Peter Wu */ public class CompressableDataWriter extends DataWriter { /** * If true, then the output will always be a gzip-compressed stream * (possibly converting the input). Otherwise, if the input was not * compressed. then neither will the output be compressed. */ private final boolean convertUncompressed; public CompressableDataWriter(String profilesName, String tweetsName, boolean convertUncompressed) throws IOException { super(profilesName, tweetsName); this.convertUncompressed = convertUncompressed; } @Override protected Store getStore(String filename) { return new CompressedStore(filename); } class CompressedStore extends Store { private boolean compressable = false; CompressedStore(String filename) { super(filename); } @Override public void open() throws IOException { BufferedInputStream bis = null; try { bis = new BufferedInputStream(new FileInputStream(getFileName())); // file found containing magic? OK, gzip writable! compressable = isCompressed(bis); if (!compressable) { if (CompressableDataWriter.this.convertUncompressed) { tryConvertToGzip(); } else { getLogger().info(getFileName() + ": not compressed and " + "won't be compressed either."); } } } catch (FileNotFoundException ex) { // file not found? Then we are free to write. compressable = true; } finally { IOUtils.closeQuietly(bis); } // now prepare a compressed output stream if possible... os = new FileOutputStream(getFileName(), true); if (compressable) { os = new GZIPOutputStream(os); } } protected boolean isCompressed(BufferedInputStream bis) { try { // file can be opened, check for GZIP magic to see whether it // is compressed or not byte[] header = new byte[2]; if (bis.read(header, 0, header.length) == 2) { int magic = (header[1] << 8) | header[0]; return magic == GZIPInputStream.GZIP_MAGIC; } } catch (IOException ex) { // file is too small, do not overwrite. } return false; } private void tryConvertToGzip() { // How to convert: // 1. Rename old file // 2. Create new compressed file in same dir // 3. rename new file to old // 4. if (3) fails, delete old file and rename again File newTmpFile = null; FileInputStream fis = null; OutputStream gzOs = null; try { File origFile = new File(getFileName()); fis = new FileInputStream(origFile); newTmpFile = File.createTempFile(getFileName(), "origFile", origFile.getParentFile()); gzOs = new GZIPOutputStream(new FileOutputStream(newTmpFile)); // now compress data, hopefully we have enough time and disk IOUtils.copy(fis, gzOs); gzOs.close(); fis.close(); // compression ready! Don't delete the result! Files.move(newTmpFile.toPath(), origFile.toPath(), StandardCopyOption.REPLACE_EXISTING); newTmpFile = null; compressable = true; } catch (IOException ex) { // failed to convert (disk space too low? Not compressed?) getLogger().log(Level.INFO, getFileName() + ": Unable to " + "convert an uncompressed file", ex); } finally { IOUtils.closeQuietly(fis); IOUtils.closeQuietly(gzOs); if (newTmpFile != null) { newTmpFile.delete(); } } } private Logger getLogger() { return Logger.getLogger(this.getClass().getName()); } @Override public InputStream getInputStream() throws IOException { FileInputStream fis = new FileInputStream(getFileName()); if (compressable) { return new GZIPInputStream(fis); } else { return fis; } } } }