summaryrefslogtreecommitdiff
path: root/src/io/CompressableDataWriter.java
blob: 84b21500f3d4c81d612781b3c2ae641ee38a0b18 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package io;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.logging.Logger;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.io.IOUtils;

/**
 * A DataWriter which can handle compressed data.
 *
 * @author Peter Wu
 */
public class CompressableDataWriter extends DataWriter {

    /**
     * If true, then any uncompressed files will be converted to the compressed
     * one, removing the uncompressed original.
     */
    private final boolean convertAndRemoveUncompressed;

    /**
     * A DataWriter that writes compressed files. Compressed files will get a
     * ".gz" extension. If both the compressed and uncompressed files exist and
     * conversion is requested, then the contents of the uncompressed file is
     * written to the compressed file. After successful conversion, the
     * uncompressed file is removed.
     *
     * @param profilesName The base filename to store user profiles.
     * @param tweetsName The base filename to store tweets.
     * @param convertAndRemoveUncompressed True if non-empty uncompressed files
     * should be converted to compressed ones.
     * @throws IOException
     */
    public CompressableDataWriter(String profilesName, String tweetsName,
            boolean convertAndRemoveUncompressed) throws IOException {
        super(profilesName, tweetsName);
        this.convertAndRemoveUncompressed = convertAndRemoveUncompressed;
    }

    @Override
    protected Store getStore(String filename) {
        return new CompressedStore(filename);
    }

    class CompressedStore extends Store {

        private static final String FILE_EXT = ".gz";

        CompressedStore(String filename) {
            super(filename);
        }

        private String getFileNameGz() {
            String filename = getFileName();
            if (!getFileName().endsWith(FILE_EXT)) {
                filename += FILE_EXT;
            }
            return filename;
        }

        @Override
        public void open() throws IOException {
            // throws FileNotFoundException if the dirs do not exist...
            os = new FileOutputStream(getFileNameGz(), true);
            try {
                os = new GZIPOutputStream(os);

                // try to convert uncompressed files if necessary
                if (convertAndRemoveUncompressed) {
                    if (!getFileName().equals(getFileNameGz())) {
                        doConvertUncompressed(getFileName(), getFileNameGz());
                    }
                }
            } catch (IOException ex) {
                IOUtils.closeQuietly(os);
                os = null;
                throw ex;
            }
        }

        private void doConvertUncompressed(String filename, String filenameGz)
                throws IOException {
            BufferedInputStream bis = null;
            try {
                // original, uncompressed file.
                File bigFile = new File(filename);
                bis = new BufferedInputStream(new FileInputStream(bigFile));
                long origSize = bigFile.length();
                long origSizeGz = new File(filenameGz).length();

                // start converting the uncompressed file
                IOUtils.copy(bis, os);
                // flush all data to file to have a more correct size
                ((GZIPOutputStream) os).finish();
                os.flush();

                // done, now try to remove the big blob and print some stats.
                IOUtils.closeQuietly(bis);
                if (!bigFile.delete()) {
                    getLogger().info(filename + ": converted file, "
                            + "but cannot remove the old, uncompressed file.");
                }
                // calculate effective size of newly compressed part
                long newSize = new File(filenameGz).length();
                newSize -= origSizeGz;
                printStats(filename, origSize, newSize);
            } catch (FileNotFoundException ex) {
                // file not found? No problem, nothing to convert then.
            } finally {
                IOUtils.closeQuietly(bis);
            }
        }

        private void printStats(String filename, long origSize, long newSize) {
            String msg = "Succesfully converted " + filename + ". ";
            msg += "Uncompressed size: " + origSize + "; "
                    + "compressed size: " + newSize + ".";
            if (newSize > 0 && origSize > 0) {
                long bytesSaved = origSize - newSize;
                long perc = 100 * -bytesSaved / origSize;
                if (bytesSaved > 0) {
                    msg += " Saved " + bytesSaved + " bytes (" + perc + "%). ";
                }
            }
            getLogger().info(msg);
        }

        private Logger getLogger() {
            return Logger.getLogger(this.getClass().getName());
        }

        @Override
        public InputStream getInputStream() throws IOException {
            FileInputStream fis = new FileInputStream(getFileNameGz());
            try {
                return new GZIPInputStream(fis);
            } catch (IOException ex) {
                IOUtils.closeQuietly(fis);
                throw ex;
            }
        }
    }
}