summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-03-30 16:52:17 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-03-30 16:52:17 +0200
commitf28f52b09dcbe1d9c2416f31920f8844fc10e3a3 (patch)
tree2d6f39ebb902cc3c1960564b1323ce5b3dcea858
parent8f57be0fbb66ef25254e949c577562d71912082a (diff)
downloadassignment4-f28f52b09dcbe1d9c2416f31920f8844fc10e3a3.tar.gz
Reformat code (apply consistent coding style)
-rw-r--r--spellchecker/src/ConfusionMatrixReader.java52
-rw-r--r--spellchecker/src/CorpusReader.java93
-rw-r--r--spellchecker/src/SpellChecker.java45
-rw-r--r--spellchecker/src/SpellCorrector.java55
4 files changed, 113 insertions, 132 deletions
diff --git a/spellchecker/src/ConfusionMatrixReader.java b/spellchecker/src/ConfusionMatrixReader.java
index b75da47..c9e79ab 100644
--- a/spellchecker/src/ConfusionMatrixReader.java
+++ b/spellchecker/src/ConfusionMatrixReader.java
@@ -1,5 +1,4 @@
-
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
@@ -10,58 +9,55 @@ import java.util.logging.Level;
import java.util.logging.Logger;
public class ConfusionMatrixReader {
-
+
final static String DATAFILE_LOC = "confusion_matrix.txt";
- final private HashMap<String,Integer> confusionMatrix = new HashMap<>();
- final private HashMap<String,Integer> countMatrix = new HashMap<>();
- public ConfusionMatrixReader()
- {
+ final private HashMap<String, Integer> confusionMatrix = new HashMap<>();
+ final private HashMap<String, Integer> countMatrix = new HashMap<>();
+
+ public ConfusionMatrixReader() {
try {
readConfusionMatrix();
} catch (Exception ex) {
Logger.getLogger(ConfusionMatrixReader.class.getName()).log(Level.SEVERE, null, ex);
}
}
-
- private void readConfusionMatrix()
- throws FileNotFoundException, IOException
- {
+
+ private void readConfusionMatrix()
+ throws FileNotFoundException, IOException {
FileInputStream fis;
fis = new FileInputStream(DATAFILE_LOC);
BufferedReader in = new BufferedReader(new InputStreamReader(fis));
-
- while( in.ready() )
- {
+
+ while (in.ready()) {
String line = in.readLine();
int space = line.lastIndexOf(' ');
- String keys = line.substring(0,space);
+ String keys = line.substring(0, space);
try {
- int count = Integer.parseInt(line.substring(space+1));
+ int count = Integer.parseInt(line.substring(space + 1));
confusionMatrix.put(keys, count);
- String key = keys.substring(0,keys.indexOf('|'));
+ String key = keys.substring(0, keys.indexOf('|'));
Integer value = countMatrix.get(key);
- if (value==null) {
+ if (value == null) {
value = 0;
}
- countMatrix.put(key, value+count);
- } catch(NumberFormatException e) {
- System.err.println("problems with string <"+line+">");
+ countMatrix.put(key, value + count);
+ } catch (NumberFormatException e) {
+ System.err.println("problems with string <" + line + ">");
}
}
}
-
+
/**
- * Returns the count for the pair <error>|<correct> in the confusion
- * matrix, e.g. "c|ct" is 36
- *
+ * Returns the count for the pair <error>|<correct> in the confusion matrix,
+ * e.g. "c|ct" is 36
+ *
* @param error
* @param correct
* @return
*/
- public int getConfusionCount(String error, String correct)
- {
- Integer count = confusionMatrix.get(error+"|"+correct);
- return count==null?0:count;
+ public int getConfusionCount(String error, String correct) {
+ Integer count = confusionMatrix.get(error + "|" + correct);
+ return count == null ? 0 : count;
}
}
diff --git a/spellchecker/src/CorpusReader.java b/spellchecker/src/CorpusReader.java
index e771e87..1392654 100644
--- a/spellchecker/src/CorpusReader.java
+++ b/spellchecker/src/CorpusReader.java
@@ -1,3 +1,4 @@
+
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
@@ -7,41 +8,36 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
-public class CorpusReader
-{
+public class CorpusReader {
+
final static String CNTFILE_LOC = "samplecnt.txt";
final static String VOCFILE_LOC = "samplevoc.txt";
-
- private HashMap<String,Integer> ngrams;
+
+ private HashMap<String, Integer> ngrams;
private Set<String> vocabulary;
-
- public CorpusReader() throws IOException
- {
+
+ public CorpusReader() throws IOException {
readNGrams();
readVocabulary();
}
-
+
/**
* Returns the n-gram count of <NGram> in the file
- *
- *
+ *
+ *
* @param nGram : space-separated list of words, e.g. "adopted by him"
- * @return 0 if <NGram> cannot be found,
- * otherwise count of <NGram> in file
+ * @return 0 if <NGram> cannot be found, otherwise count of <NGram> in file
*/
- public int getNGramCount(String nGram) throws NumberFormatException
- {
- if(nGram == null || nGram.length() == 0)
- {
+ public int getNGramCount(String nGram) throws NumberFormatException {
+ if (nGram == null || nGram.length() == 0) {
throw new IllegalArgumentException("NGram must be non-empty.");
}
Integer value = ngrams.get(nGram);
- return value==null?0:value;
+ return value == null ? 0 : value;
}
-
- private void readNGrams() throws
- FileNotFoundException, IOException, NumberFormatException
- {
+
+ private void readNGrams() throws
+ FileNotFoundException, IOException, NumberFormatException {
ngrams = new HashMap<>();
FileInputStream fis;
@@ -65,61 +61,54 @@ public class CorpusReader
}
}
}
-
-
+
private void readVocabulary() throws FileNotFoundException, IOException {
vocabulary = new HashSet<>();
-
+
FileInputStream fis = new FileInputStream(VOCFILE_LOC);
BufferedReader in = new BufferedReader(new InputStreamReader(fis));
-
- while(in.ready())
- {
+
+ while (in.ready()) {
String line = in.readLine();
vocabulary.add(line);
}
}
-
+
/**
* Returns the size of the number of unique words in the dataset
- *
+ *
* @return the size of the number of unique words in the dataset
*/
- public int getVocabularySize()
- {
+ public int getVocabularySize() {
return vocabulary.size();
}
-
+
/**
* Returns the subset of words in set that are in the vocabulary
- *
+ *
* @param set
- * @return
+ * @return
*/
- public HashSet<String> inVocabulary(Set<String> set)
- {
+ public HashSet<String> inVocabulary(Set<String> set) {
HashSet<String> h = new HashSet<>(set);
h.retainAll(vocabulary);
return h;
}
-
- public boolean inVocabulary(String word)
- {
- return vocabulary.contains(word);
- }
-
- public double getSmoothedCount(String NGram)
- {
- if(NGram == null || NGram.length() == 0)
- {
+
+ public boolean inVocabulary(String word) {
+ return vocabulary.contains(word);
+ }
+
+ public double getSmoothedCount(String NGram) {
+ if (NGram == null || NGram.length() == 0) {
throw new IllegalArgumentException("NGram must be non-empty.");
}
-
+
double smoothedCount = 0.0;
-
- /** ADD CODE HERE **/
-
-
- return smoothedCount;
+
+ /**
+ * ADD CODE HERE *
+ */
+ return smoothedCount;
}
}
diff --git a/spellchecker/src/SpellChecker.java b/spellchecker/src/SpellChecker.java
index cdabf92..110061a 100644
--- a/spellchecker/src/SpellChecker.java
+++ b/spellchecker/src/SpellChecker.java
@@ -1,18 +1,15 @@
-
import java.io.IOException;
import java.util.Scanner;
-
public class SpellChecker {
/**
* @param args the command line arguments
*/
- public static void main(String[] args)
- {
+ public static void main(String[] args) {
boolean inPeach = false; // set this to true if you submit to peach!!!
-
+
try {
CorpusReader cr = new CorpusReader();
ConfusionMatrixReader cmr = new ConfusionMatrixReader();
@@ -27,24 +24,24 @@ public class SpellChecker {
ex.printStackTrace();
}
}
-
- static void nonPeachTest(SpellCorrector sc) throws IOException {
- String[] sentences = {
- "at the hme locations there were traces of water"
- };
-
- for(String s0: sentences) {
- System.out.println("Input : " + s0);
- String result=sc.correctPhrase(s0);
- System.out.println("Answer: " +result);
- System.out.println();
- }
+
+ static void nonPeachTest(SpellCorrector sc) throws IOException {
+ String[] sentences = {
+ "at the hme locations there were traces of water"
+ };
+
+ for (String s0 : sentences) {
+ System.out.println("Input : " + s0);
+ String result = sc.correctPhrase(s0);
+ System.out.println("Answer: " + result);
+ System.out.println();
+ }
}
-
+
static void peachTest(SpellCorrector sc) throws IOException {
- Scanner input = new Scanner(System.in);
-
- String sentence = input.nextLine();
- System.out.println("Answer: " + sc.correctPhrase(sentence));
- }
-} \ No newline at end of file
+ Scanner input = new Scanner(System.in);
+
+ String sentence = input.nextLine();
+ System.out.println("Answer: " + sc.correctPhrase(sentence));
+ }
+}
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 03c6ed4..c8451bf 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -1,47 +1,46 @@
+
import java.util.HashSet;
public class SpellCorrector {
+
final private CorpusReader cr;
final private ConfusionMatrixReader cmr;
-
+
final char[] ALPHABET = "abcdefghijklmnopqrstuvwxyz'".toCharArray();
-
-
- public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr)
- {
+
+ public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr) {
this.cr = cr;
this.cmr = cmr;
}
-
- public String correctPhrase(String phrase)
- {
- if(phrase == null || phrase.length() == 0)
- {
+
+ public String correctPhrase(String phrase) {
+ if (phrase == null || phrase.length() == 0) {
throw new IllegalArgumentException("phrase must be non-empty.");
}
-
+
String[] words = phrase.split(" ");
String finalSuggestion = "";
-
- /** CODE TO BE ADDED **/
-
+
+ /**
+ * CODE TO BE ADDED *
+ */
return finalSuggestion.trim();
}
-
- public double calculateChannelModelProbability(String suggested, String incorrect)
- {
- /** CODE TO BE ADDED **/
-
+
+ public double calculateChannelModelProbability(String suggested, String incorrect) {
+ /**
+ * CODE TO BE ADDED *
+ */
+
return 0.0;
}
-
-
- public HashSet<String> getCandidateWords(String word)
- {
+
+ public HashSet<String> getCandidateWords(String word) {
HashSet<String> ListOfWords = new HashSet<String>();
-
- /** CODE TO BE ADDED **/
-
+
+ /**
+ * CODE TO BE ADDED *
+ */
return cr.inVocabulary(ListOfWords);
- }
-} \ No newline at end of file
+ }
+}