summaryrefslogtreecommitdiff
path: root/src/io/CompressableDataWriter.java
blob: 26a6c26416f1b9c9bf475ea208bda0a35989d7b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
package io;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.io.IOUtils;

/**
 * A DataWriter which can handle compressed data.
 *
 * @author Peter Wu
 */
public class CompressableDataWriter extends DataWriter {

    /**
     * If true, then the output will always be a gzip-compressed stream
     * (possibly converting the input). Otherwise, if the input was not
     * compressed. then neither will the output be compressed.
     */
    private final boolean convertUncompressed;

    public CompressableDataWriter(String profilesName, String tweetsName,
            boolean convertUncompressed) throws IOException {
        super(profilesName, tweetsName);
        this.convertUncompressed = convertUncompressed;
    }

    @Override
    protected Store getStore(String filename) {
        return new CompressedStore(filename);
    }

    class CompressedStore extends Store {

        private boolean compressable = false;

        CompressedStore(String filename) {
            super(filename);
        }

        @Override
        public void open() throws IOException {
            BufferedInputStream bis = null;
            try {
                bis = new BufferedInputStream(new FileInputStream(getFileName()));
                // file found containing magic? OK, gzip writable!
                compressable = isCompressed(bis);
                if (CompressableDataWriter.this.convertUncompressed
                        && !compressable) {
                    tryConvertToGzip();
                }
            } catch (FileNotFoundException ex) {
                // file not found? Then we are free to write.
                compressable = true;
            } finally {
                IOUtils.closeQuietly(bis);
            }
            // now prepare a compressed output stream if possible...
            os = new FileOutputStream(getFileName(), true);
            if (compressable) {
                os = new GZIPOutputStream(os);
            }
        }

        protected boolean isCompressed(BufferedInputStream bis) {
            try {
                // file can be opened, check for GZIP magic to see whether it
                // is compressed or not
                byte[] header = new byte[2];
                if (bis.read(header, 0, header.length) == 2) {
                    int magic = (header[1] << 8) | header[0];
                    return magic == GZIPInputStream.GZIP_MAGIC;
                }
            } catch (IOException ex) {
                // file is too small, do not overwrite.
            }
            return false;
        }

        private void tryConvertToGzip() {
            // How to convert:
            // 1. Rename old file
            // 2. Create new compressed file in same dir
            // 3. rename new file to old
            // 4. if (3) fails, delete old file and rename again
            File newTmpFile = null;
            FileInputStream fis = null;
            OutputStream os = null;
            boolean deleteTmp = true;
            try {
                File origFile = new File(getFileName());
                fis = new FileInputStream(origFile);
                newTmpFile = File.createTempFile(getFileName(), "origFile",
                        origFile.getParentFile());
                os = new GZIPOutputStream(new FileOutputStream(newTmpFile));

                // now compress data, hopefully we have enough time and disk
                IOUtils.copy(fis, os);
                os.close();
                fis.close();

                // compression ready! Don't delete the result!
                deleteTmp = false;
                // first try overwriting the file if possible on this platform.
                if (!newTmpFile.renameTo(origFile)) {
                    // that didn't work (Windows?), try deleting and rename
                    if (!origFile.delete()) {
                        deleteTmp = true;
                        getLogger().warning("Cannot rename compressed nor "
                                + "delete uncompressed " + getFileName());
                    } else if (!newTmpFile.renameTo(origFile)) {
                        getLogger().warning("Uncompressed file is deleted, but "
                                + " compressed file cannot be renamed, sorry.");
                    } else {
                        compressable = true;
                    }
                } else {
                    compressable = true;
                }
            } catch (IOException ex) {
                // failed to convert (disk space too low? Not compressed?)
                getLogger().log(Level.INFO, "Cannot compress/decompress", ex);
            } finally {
                IOUtils.closeQuietly(fis);
                IOUtils.closeQuietly(os);
                if (newTmpFile != null && deleteTmp) {
                    newTmpFile.delete();
                }
            }
        }

        private Logger getLogger() {
            return Logger.getLogger(this.getClass().getName());
        }

        @Override
        public InputStream getInputStream() throws IOException {
            FileInputStream fis = new FileInputStream(getFileName());
            if (compressable) {
                return new GZIPInputStream(fis);
            } else {
                return fis;
            }
        }
    }
}