summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-04-03 16:10:33 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-04-03 16:10:33 +0200
commit1db72a38d9d051ef3f72ebc983d50cf0e14c62fd (patch)
treec1eaecdcdf417bf6256eb52113d6687b08b70ad6
parent6d0e2e299835eca671951120e72ad5898866839f (diff)
downloadassignment4-1db72a38d9d051ef3f72ebc983d50cf0e14c62fd.tar.gz
Save all candidate modifications in one place
Also improve debugging to print the difference in score and cache the evaluation result (sentence score). Behavioral difference: now the best score is determined by the whole sentence instead of one word. This means that multiple faulty words will always result in a rating of 0....
-rw-r--r--spellchecker/src/SpellCorrector.java90
1 files changed, 48 insertions, 42 deletions
diff --git a/spellchecker/src/SpellCorrector.java b/spellchecker/src/SpellCorrector.java
index 2bc788b..53255fd 100644
--- a/spellchecker/src/SpellCorrector.java
+++ b/spellchecker/src/SpellCorrector.java
@@ -68,7 +68,7 @@ public class SpellCorrector {
// Did it find a corrected word?
if (subResult != null) {
// Yes it did. Is this the best one so far?
- if (subResult.evaluateWord(-1) > rater.evaluateWord(-1)) {
+ if (subResult.getBestScore() > rater.getBestScore()) {
System.err.println("Subresult is better!");
return subResult;
}
@@ -238,7 +238,7 @@ public class SpellCorrector {
*/
private final boolean[] words_readonly;
- private double best_sentence_probability;
+ private double sentence_probability;
private WordModification best_modification;
public SentenceRater(String[] words) {
@@ -249,13 +249,22 @@ public class SpellCorrector {
word_likelihoods[i] = getWordLikelihood(i, words[i],
LM_PROBABILITY_UNMODIFIED);
}
- // determine the rating of the current sentence without ignoring
- // words.
- best_sentence_probability = evaluateWord(-1);
+ sentence_probability = combineProbabilities(word_likelihoods);
debugScore();
}
/**
+ * Calculates the probability of a sentence as a whole.
+ */
+ private double combineProbabilities(double[] probabilities) {
+ double p = 1;
+ for (double score : probabilities) {
+ p *= score;
+ }
+ return p;
+ }
+
+ /**
* Calculates the probability that the word {@code word} is valid at
* position {@code index}.
*/
@@ -336,27 +345,21 @@ public class SpellCorrector {
*/
public void tryWord(int index, String word, double channel_probability) {
double score, p;
+ double[] scores;
- // find score and see how it affects the whole sentence.
+ scores = word_likelihoods.clone();
score = getWordLikelihood(index, word, channel_probability);
- p = score * evaluateWord(index);
+ scores[index] = score;
- if (p > best_sentence_probability) {
- best_sentence_probability = p;
- best_modification = new WordModification(index, word, score);
- }
- }
+ // group the effects of this modifications for tracking.
+ WordModification effect = new WordModification(index, word, scores);
- private double evaluateWord(int ignoreIndex) {
- double p = 1;
- // calculate the probability of the combination of all word
- // probabilities.
- for (int i = 0; i < words.length; i++) {
- if (ignoreIndex != i) {
- p *= word_likelihoods[i];
- }
+ if ((best_modification != null
+ && effect.probability > best_modification.probability)
+ || effect.probability > sentence_probability) {
+ System.err.println("found better word!" + word + " p=" + effect.probability);
+ best_modification = effect;
}
- return p;
}
/**
@@ -364,13 +367,19 @@ public class SpellCorrector {
*/
public String[] getBestSentence() {
String[] new_words = words.clone();
- if (best_modification != null) {
- new_words[best_modification.index] = best_modification.word;
- }
+ assert best_modification == null : "Call saveSuggestion() first";
return new_words;
}
/**
+ * Returns the score of the current accepted sentence.
+ */
+ public double getBestScore() {
+ assert best_modification == null : "Call saveSuggestion() first";
+ return sentence_probability;
+ }
+
+ /**
* Returns true if it is likely that a word in the sentence can be
* corrected.
*/
@@ -388,11 +397,13 @@ public class SpellCorrector {
if (index >= 0) {
System.err.println("Word: " + old_word + " -> " + words[index]);
System.err.println("Word score : " + old_score
- + " -> " + word_likelihoods[index]);
+ + " -> " + word_likelihoods[index]
+ + " (" + (word_likelihoods[index] - old_score) + ")");
System.err.println("Phrase evaluation: " + old_evaluation
- + " -> " + evaluateWord(-1));
+ + " -> " + sentence_probability
+ + " (" + (sentence_probability - old_evaluation) + ")");
} else {
- System.err.println("Phrase evaluation: " + evaluateWord(-1));
+ System.err.println("Phrase evaluation: " + sentence_probability);
}
for (int i = 0; i < words.length; i++) {
System.err.println(String.format("%28s %s", words[i], word_likelihoods[i]));
@@ -407,23 +418,16 @@ public class SpellCorrector {
public void saveSuggestion() {
int index = best_modification.index;
String word = best_modification.word;
- double score = best_modification.score;
+ double[] scores = best_modification.scores;
// for debugging
String old_word = words[index];
double old_score = word_likelihoods[index];
- double old_evaluation = 0;
-
- if (DEBUG_SCORE) {
- // possibly expensive, only calculate it for debug
- old_evaluation = evaluateWord(-1);
- }
+ double old_evaluation = sentence_probability;
- // save the word and its associated score
- assert word_likelihoods[index] < score :
- "The score should only get better for word " + word
- + ". Change: " + word_likelihoods[index] + " -> " + score;
+ // save the word and the affected scores
words[index] = word;
- word_likelihoods[index] = score;
+ System.arraycopy(word_likelihoods, 0, scores, 0, words.length);
+ sentence_probability = best_modification.probability;
if (DEBUG_SCORE) {
debugScore(index, old_word, old_score, old_evaluation);
@@ -447,12 +451,14 @@ public class SpellCorrector {
private final int index;
private final String word;
- private final double score;
+ private final double[] scores;
+ private final double probability;
- public WordModification(int index, String word, double score) {
+ public WordModification(int index, String word, double[] scores) {
this.index = index;
this.word = word;
- this.score = score;
+ this.scores = scores;
+ this.probability = combineProbabilities(scores);
}
}
}