summaryrefslogtreecommitdiff
path: root/src/io/CompressableDataWriter.java
blob: d81d80bb638c5109f3035b80093494befe787a12 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
package io;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.io.IOUtils;

/**
 * A DataWriter which can handle compressed data.
 *
 * @author Peter Wu
 */
public class CompressableDataWriter extends DataWriter {

    /**
     * If true, then the output will always be a gzip-compressed stream
     * (possibly converting the input). Otherwise, if the input was not
     * compressed. then neither will the output be compressed.
     */
    private final boolean convertUncompressed;

    public CompressableDataWriter(String profilesName, String tweetsName,
            boolean convertUncompressed) throws IOException {
        super(profilesName, tweetsName);
        this.convertUncompressed = convertUncompressed;
    }

    @Override
    protected Store getStore(String filename) {
        return new CompressedStore(filename);
    }

    class CompressedStore extends Store {

        private boolean compressable = false;

        CompressedStore(String filename) {
            super(filename);
        }

        @Override
        public void open() throws IOException {
            BufferedInputStream bis = null;
            try {
                bis = new BufferedInputStream(new FileInputStream(getFileName()));
                // file found containing magic? OK, gzip writable!
                compressable = isCompressed(bis);
            } catch (FileNotFoundException ex) {
                // file not found? Then we are free to write.
                compressable = true;
            } finally {
                IOUtils.closeQuietly(bis);
            }
            if (!compressable) {
                if (CompressableDataWriter.this.convertUncompressed) {
                    tryConvertToGzip();
                } else {
                    getLogger().info(getFileName() + ": not compressed and "
                            + "won't be compressed either.");
                }
            }
            // now prepare a compressed output stream if possible...
            os = new FileOutputStream(getFileName(), true);
            if (compressable) {
                os = new GZIPOutputStream(os);
            }
        }

        protected boolean isCompressed(BufferedInputStream bis) {
            try {
                // file can be opened, check for GZIP magic to see whether it
                // is compressed or not
                byte[] header = new byte[2];
                if (bis.read(header, 0, header.length) == 2) {
                    int magic = ((header[1] & 0xFF) << 8) | (header[0] & 0xFF);
                    return magic == GZIPInputStream.GZIP_MAGIC;
                }
                // file is too small, it is likely empty or contains a single
                // newline or other junk.
                return true;
            } catch (IOException ex) {
                // file is too small, do not overwrite.
            }
            return false;
        }

        private void tryConvertToGzip() {
            // How to convert:
            // 1. Rename old file
            // 2. Create new compressed file in same dir
            // 3. rename new file to old
            // 4. if (3) fails, delete old file and rename again
            File newTmpFile = null;
            FileInputStream fis = null;
            OutputStream gzOs = null;
            try {
                File origFile = new File(getFileName());
                long origSize = origFile.length();
                fis = new FileInputStream(origFile);
                newTmpFile = File.createTempFile(getFileName(), "origFile",
                        origFile.getParentFile());
                gzOs = new GZIPOutputStream(new FileOutputStream(newTmpFile));

                // now compress data, hopefully we have enough time and disk
                IOUtils.copy(fis, gzOs);
                gzOs.close();
                fis.close();
                long newSize = newTmpFile.length();

                // compression ready! Don't delete the result!
                Files.move(newTmpFile.toPath(), origFile.toPath(),
                        StandardCopyOption.REPLACE_EXISTING);
                newTmpFile = null;
                compressable = true;
                // show some compression stats
                printStats(origSize, newSize);
            } catch (IOException ex) {
                // failed to convert (disk space too low? Not compressed?)
                getLogger().log(Level.INFO, getFileName() + ": Unable to "
                        + "convert an uncompressed file", ex);
            } finally {
                IOUtils.closeQuietly(fis);
                IOUtils.closeQuietly(gzOs);
                if (newTmpFile != null) {
                    newTmpFile.delete();
                }
            }
        }

        private void printStats(long origSize, long newSize) {
            String msg = "Succesfully converted " + getFileName() + ". ";
            msg += "Uncompressed size: " + origSize + "; "
                    + "compressed size: " + newSize + ".";
            if (newSize > 0 && origSize > 0) {
                long bytesSaved = origSize - newSize;
                long perc = 100 * -bytesSaved / origSize;
                if (bytesSaved > 0) {
                    msg += " Saved " + bytesSaved + " bytes (" + perc + "%). ";
                }
            }
            getLogger().info(msg);
        }

        private Logger getLogger() {
            return Logger.getLogger(this.getClass().getName());
        }

        @Override
        public InputStream getInputStream() throws IOException {
            FileInputStream fis = new FileInputStream(getFileName());
            if (compressable) {
                return new GZIPInputStream(fis);
            } else {
                return fis;
            }
        }
    }
}