blob: 5c48e2850aadb3188c869d948459fa6a388d1123 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
package Chapter4.classification.bayes;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import com.google.gson.JsonObject;
import com.google.gson.JsonStreamParser;
public class NBCxv {
public static void main(String[] args){
String filename = args.length >= 1 ? args[0] : "owsemoticons.json";
ArrayList<String> allTexts = new ArrayList<String>();
try {
//read the file, and train each document
JsonStreamParser parser = new JsonStreamParser(new FileReader(filename));
JsonObject elem;
while (parser.hasNext()) {
elem = parser.next().getAsJsonObject();
allTexts.add(elem.get("text").getAsString());
}
} catch (IOException e) {
e.printStackTrace();
}
//do 5-fold cross validation 3 times
Map<Integer, ArrayList<String>> buckets;
int bucketIdx;
NaiveBayesSentimentClassifier nbsc;
for(int i = 0; i < 3; i++){
//randomly split the texts into 5 buckets
buckets = new HashMap<Integer, ArrayList<String>>();
//initialize the 5 buckets
for(int j = 0; j < 5; j++) buckets.put(j, new ArrayList<String>());
for(String text : allTexts){
bucketIdx = (int) (Math.random()*5);
buckets.get(bucketIdx).add(text);
}
for(int j = 0; j < 5; j++){
//use all but j as the training, use j as the test.
nbsc = new NaiveBayesSentimentClassifier();
for(int k = 0; k < 5; k++){
if(k != j){
nbsc.trainInstances(buckets.get(k));
}
}
//test with bucket j
}
}
}
}
|