diff options
author | Peter Wu <peter@lekensteyn.nl> | 2014-05-15 16:08:11 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2014-05-15 16:09:25 +0200 |
commit | 879deb56f0666e66e0075df510f6a779373243d5 (patch) | |
tree | 39456dabfb14f6b11b878faa13d6ab28902523ef /src | |
parent | ef264519937f41a728fbba068121946be03aa985 (diff) | |
download | Goldfarmer-879deb56f0666e66e0075df510f6a779373243d5.tar.gz |
Lowercase bigrams and unigrams
Diffstat (limited to 'src')
-rw-r--r-- | src/main/Analyzor.java | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 0fe880a..00e3442 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -41,7 +41,8 @@ public class Analyzor { Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
//Fill the map of unigrams
while (uniScanner.hasNext()) {
- unimap.put(uniScanner.next(), uniScanner.nextDouble());
+ String words = uniScanner.next();
+ unimap.put(words.toLowerCase(), uniScanner.nextDouble());
if (uniScanner.hasNextLine()) {
uniScanner.nextLine();
}
@@ -49,7 +50,8 @@ public class Analyzor { //fill the map of bigrams
while (biScanner.hasNext()) {
- bimap.put(biScanner.next() + " " + biScanner.next(), biScanner.nextDouble());
+ String words = biScanner.next() + " " + biScanner.next();
+ bimap.put(words.toLowerCase(), biScanner.nextDouble());
if (biScanner.hasNextLine()) {
biScanner.nextLine();
}
|