summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-03 00:07:47 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-03 00:07:47 +0200
commit651d400a4f7cca02fe07d06b2d6b6bcc1de09bd9 (patch)
tree1cd6b671f044ad017a1502fd620d00ac0e8eb511
parent4bd53349774beecbf235f1589a3d9168b04a17f5 (diff)
downloadTwitterDataAnalytics-651d400a4f7cca02fe07d06b2d6b6bcc1de09bd9.tar.gz
More compression fixes
* Fix magic check (bytes are read as signed numbers...). * Assume that a file can be compressed if the file is too small. * Try to convert a file only if the gzip detection is correctly closed.
-rw-r--r--src/io/CompressableDataWriter.java21
1 files changed, 12 insertions, 9 deletions
diff --git a/src/io/CompressableDataWriter.java b/src/io/CompressableDataWriter.java
index 7460c96..a3b069f 100644
--- a/src/io/CompressableDataWriter.java
+++ b/src/io/CompressableDataWriter.java
@@ -56,20 +56,20 @@ public class CompressableDataWriter extends DataWriter {
bis = new BufferedInputStream(new FileInputStream(getFileName()));
// file found containing magic? OK, gzip writable!
compressable = isCompressed(bis);
- if (!compressable) {
- if (CompressableDataWriter.this.convertUncompressed) {
- tryConvertToGzip();
- } else {
- getLogger().info(getFileName() + ": not compressed and "
- + "won't be compressed either.");
- }
- }
} catch (FileNotFoundException ex) {
// file not found? Then we are free to write.
compressable = true;
} finally {
IOUtils.closeQuietly(bis);
}
+ if (!compressable) {
+ if (CompressableDataWriter.this.convertUncompressed) {
+ tryConvertToGzip();
+ } else {
+ getLogger().info(getFileName() + ": not compressed and "
+ + "won't be compressed either.");
+ }
+ }
// now prepare a compressed output stream if possible...
os = new FileOutputStream(getFileName(), true);
if (compressable) {
@@ -83,9 +83,12 @@ public class CompressableDataWriter extends DataWriter {
// is compressed or not
byte[] header = new byte[2];
if (bis.read(header, 0, header.length) == 2) {
- int magic = (header[1] << 8) | header[0];
+ int magic = ((header[1] & 0xFF) << 8) | (header[0] & 0xFF);
return magic == GZIPInputStream.GZIP_MAGIC;
}
+ // file is too small, it is likely empty or contains a single
+ // newline or other junk.
+ return true;
} catch (IOException ex) {
// file is too small, do not overwrite.
}