summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-02 16:02:47 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-02 16:02:47 +0200
commit719cdadfdee023fe742ba76cf5b404602519497d (patch)
tree73428ce761f308774773de914c83b7775160238b
parent1fda5521c53e7db8c5ef3111fca9b92c05e6ad4f (diff)
downloadassignment4-719cdadfdee023fe742ba76cf5b404602519497d.tar.gz
Remove broken greedy approach, fix corrections limit
-rw-r--r--spellchecker/src/SpellCorrector.java56
1 files changed, 18 insertions, 38 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 8cbc8ca..bdfc8e1 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -14,11 +14,17 @@ public class SpellCorrector {
* is for unigrams, the second for bigrams, etc.
*/
private final static double[] LAMBDAS = new double[]{.25, .75};
+
/**
* The language model probability for uncorrected words.
*/
private final static double LM_PROBABILITY_UNMODIFIED = .95;
+ /**
+ * The maximum number of misspelled words to look for.
+ */
+ private final static int MAX_TYPOS = 2;
+
private final boolean DEBUG_SCORE = System.getenv("DEBUG_SCORE") != null;
public SpellCorrector(CorpusReader cr, ConfusionMatrixReader cmr) {
@@ -27,41 +33,13 @@ public class SpellCorrector {
}
/**
- * Broken greedy approach, not used.
- *
- * @param words
- */
- private void greedyApproach(String[] words) {
- SentenceRater rater = new SentenceRater(words);
-
- // Try to change 1 word and see whether the suggestions are OK.
- for (int attempt = 0; attempt < 3; attempt++) {
- for (int i = 0; i < words.length; i++) {
- final int word_index = i;
- String old_word = words[word_index];
- // try to find a better suggestion for this word.
- Map<String, Double> candidates = getCandidateWords(old_word);
- candidates.forEach((word, channel_probability) -> {
- rater.tryWord(word_index, word, channel_probability);
- });
- };
- if (rater.hasBetterSuggestion()) {
- rater.saveSuggestion();
- // TODO: make this nicer
- words = rater.getBestSentence();
- System.err.println("Suggest: " + String.join(" ", words));
- } else {
- System.err.println("No suggestion found.");
- break;
- }
- }
- }
-
- /**
+ * Tries to find up to {@code maxTypos} number of misspelled words.
*
- * @param level The number of words that have been modified so far.
+ * @param maxTypos The maximum number of look erroneous words for.
+ * @return The resulting rater containing probabilities and the corrected
+ * sentence or null if no suggestions are found.
*/
- private SentenceRater nonGreedyApproach(String[] words, int level) {
+ private SentenceRater findCorrected(String[] words, int maxTypos) {
SentenceRater rater = new SentenceRater(words);
// find best word
@@ -78,12 +56,13 @@ public class SpellCorrector {
// if a better word was found, use the change
if (rater.hasBetterSuggestion()) {
rater.saveSuggestion();
- // if there are less than 3 typos, hunt for more typos
- if (level < 3) {
+ // If some other errors are still possible, hunt for those!
+ if (maxTypos > 1) {
SentenceRater subResult;
- subResult = nonGreedyApproach(rater.words.clone(), level + 1);
- // check if other typos found
+ subResult = findCorrected(rater.getBestSentence(), maxTypos - 1);
+ // Did it find a corrected word?
if (subResult != null) {
+ // Yes it did. Is this the best one so far?
if (subResult.evaluateWord(-1) > rater.evaluateWord(-1)) {
System.err.println("Subresult is better!");
return subResult;
@@ -107,7 +86,8 @@ public class SpellCorrector {
String[] words = phrase.split(" ");
- SentenceRater rater = nonGreedyApproach(words, 0);
+ SentenceRater rater = findCorrected(words, MAX_TYPOS);
+ // if a better sentence is found, use it.
if (rater != null) {
words = rater.getBestSentence();
}