diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-04-03 16:10:33 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-04-03 16:10:33 +0200 |
commit | 1db72a38d9d051ef3f72ebc983d50cf0e14c62fd (patch) | |
tree | c1eaecdcdf417bf6256eb52113d6687b08b70ad6 /spellchecker | |
parent | 6d0e2e299835eca671951120e72ad5898866839f (diff) | |
download | assignment4-1db72a38d9d051ef3f72ebc983d50cf0e14c62fd.tar.gz |
Save all candidate modifications in one place
Also improve debugging to print the difference in score and cache
the evaluation result (sentence score).
Behavioral difference: now the best score is determined by the whole
sentence instead of one word. This means that multiple faulty words
will always result in a rating of 0....
Diffstat (limited to 'spellchecker')
-rw-r--r-- | spellchecker/src/SpellCorrector.java | 90 |
1 files changed, 48 insertions, 42 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java index 2bc788b..53255fd 100644 --- a/spellchecker/src/SpellCorrector.java +++ b/spellchecker/src/SpellCorrector.java @@ -68,7 +68,7 @@ public class SpellCorrector { // Did it find a corrected word? if (subResult != null) { // Yes it did. Is this the best one so far? - if (subResult.evaluateWord(-1) > rater.evaluateWord(-1)) { + if (subResult.getBestScore() > rater.getBestScore()) { System.err.println("Subresult is better!"); return subResult; } @@ -238,7 +238,7 @@ public class SpellCorrector { */ private final boolean[] words_readonly; - private double best_sentence_probability; + private double sentence_probability; private WordModification best_modification; public SentenceRater(String[] words) { @@ -249,13 +249,22 @@ public class SpellCorrector { word_likelihoods[i] = getWordLikelihood(i, words[i], LM_PROBABILITY_UNMODIFIED); } - // determine the rating of the current sentence without ignoring - // words. - best_sentence_probability = evaluateWord(-1); + sentence_probability = combineProbabilities(word_likelihoods); debugScore(); } /** + * Calculates the probability of a sentence as a whole. + */ + private double combineProbabilities(double[] probabilities) { + double p = 1; + for (double score : probabilities) { + p *= score; + } + return p; + } + + /** * Calculates the probability that the word {@code word} is valid at * position {@code index}. */ @@ -336,27 +345,21 @@ public class SpellCorrector { */ public void tryWord(int index, String word, double channel_probability) { double score, p; + double[] scores; - // find score and see how it affects the whole sentence. + scores = word_likelihoods.clone(); score = getWordLikelihood(index, word, channel_probability); - p = score * evaluateWord(index); + scores[index] = score; - if (p > best_sentence_probability) { - best_sentence_probability = p; - best_modification = new WordModification(index, word, score); - } - } + // group the effects of this modifications for tracking. + WordModification effect = new WordModification(index, word, scores); - private double evaluateWord(int ignoreIndex) { - double p = 1; - // calculate the probability of the combination of all word - // probabilities. - for (int i = 0; i < words.length; i++) { - if (ignoreIndex != i) { - p *= word_likelihoods[i]; - } + if ((best_modification != null + && effect.probability > best_modification.probability) + || effect.probability > sentence_probability) { + System.err.println("found better word!" + word + " p=" + effect.probability); + best_modification = effect; } - return p; } /** @@ -364,13 +367,19 @@ public class SpellCorrector { */ public String[] getBestSentence() { String[] new_words = words.clone(); - if (best_modification != null) { - new_words[best_modification.index] = best_modification.word; - } + assert best_modification == null : "Call saveSuggestion() first"; return new_words; } /** + * Returns the score of the current accepted sentence. + */ + public double getBestScore() { + assert best_modification == null : "Call saveSuggestion() first"; + return sentence_probability; + } + + /** * Returns true if it is likely that a word in the sentence can be * corrected. */ @@ -388,11 +397,13 @@ public class SpellCorrector { if (index >= 0) { System.err.println("Word: " + old_word + " -> " + words[index]); System.err.println("Word score : " + old_score - + " -> " + word_likelihoods[index]); + + " -> " + word_likelihoods[index] + + " (" + (word_likelihoods[index] - old_score) + ")"); System.err.println("Phrase evaluation: " + old_evaluation - + " -> " + evaluateWord(-1)); + + " -> " + sentence_probability + + " (" + (sentence_probability - old_evaluation) + ")"); } else { - System.err.println("Phrase evaluation: " + evaluateWord(-1)); + System.err.println("Phrase evaluation: " + sentence_probability); } for (int i = 0; i < words.length; i++) { System.err.println(String.format("%28s %s", words[i], word_likelihoods[i])); @@ -407,23 +418,16 @@ public class SpellCorrector { public void saveSuggestion() { int index = best_modification.index; String word = best_modification.word; - double score = best_modification.score; + double[] scores = best_modification.scores; // for debugging String old_word = words[index]; double old_score = word_likelihoods[index]; - double old_evaluation = 0; - - if (DEBUG_SCORE) { - // possibly expensive, only calculate it for debug - old_evaluation = evaluateWord(-1); - } + double old_evaluation = sentence_probability; - // save the word and its associated score - assert word_likelihoods[index] < score : - "The score should only get better for word " + word - + ". Change: " + word_likelihoods[index] + " -> " + score; + // save the word and the affected scores words[index] = word; - word_likelihoods[index] = score; + System.arraycopy(word_likelihoods, 0, scores, 0, words.length); + sentence_probability = best_modification.probability; if (DEBUG_SCORE) { debugScore(index, old_word, old_score, old_evaluation); @@ -447,12 +451,14 @@ public class SpellCorrector { private final int index; private final String word; - private final double score; + private final double[] scores; + private final double probability; - public WordModification(int index, String word, double score) { + public WordModification(int index, String word, double[] scores) { this.index = index; this.word = word; - this.score = score; + this.scores = scores; + this.probability = combineProbabilities(scores); } } } |