From 879deb56f0666e66e0075df510f6a779373243d5 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 15 May 2014 16:08:11 +0200 Subject: Lowercase bigrams and unigrams --- src/main/Analyzor.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 0fe880a..00e3442 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -41,7 +41,8 @@ public class Analyzor { Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) { //Fill the map of unigrams while (uniScanner.hasNext()) { - unimap.put(uniScanner.next(), uniScanner.nextDouble()); + String words = uniScanner.next(); + unimap.put(words.toLowerCase(), uniScanner.nextDouble()); if (uniScanner.hasNextLine()) { uniScanner.nextLine(); } @@ -49,7 +50,8 @@ public class Analyzor { //fill the map of bigrams while (biScanner.hasNext()) { - bimap.put(biScanner.next() + " " + biScanner.next(), biScanner.nextDouble()); + String words = biScanner.next() + " " + biScanner.next(); + bimap.put(words.toLowerCase(), biScanner.nextDouble()); if (biScanner.hasNextLine()) { biScanner.nextLine(); } -- cgit v1.2.1