diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/analysis/BrandChecker.java | 140 |
1 files changed, 123 insertions, 17 deletions
diff --git a/src/analysis/BrandChecker.java b/src/analysis/BrandChecker.java index 6b57a39..740424c 100644 --- a/src/analysis/BrandChecker.java +++ b/src/analysis/BrandChecker.java @@ -17,45 +17,151 @@ package analysis; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; +import java.util.Locale; +import java.util.Scanner; +import java.util.Set; /** * This class obtains a text and returns the brands that are contained in this - * text. - * + * text. The input file contains lines with [brandname] [extensions] | + * [blacklisted words]. + * * @author Maurice Laveaux */ public class BrandChecker { - + + /** + * A set of rules that determine the brands. + */ + private final ArrayList<BrandRule> ruleset = new ArrayList(); + /** - * @param filename The filename that contains all the brands. + * @param filename The filename that contains all the rules. */ - public BrandChecker(final String filename) { + public BrandChecker(final String filename) { try { readFile(filename); } catch (FileNotFoundException ex) { - Logger.getLogger(BrandChecker.class.getName()).log(Level.SEVERE, null, ex); + throw new IllegalArgumentException("file named " + filename + " not found."); } } - + /** * Get the brands that are in some text. - * + * * @param text Any valid text. * @return The list of brands that are contained in this text or null. */ public List<String> getBrands(String text) { - - - - return null; + String[] words = text.toLowerCase().split("\\s+"); + + List<String> brands = new ArrayList(); + + for (BrandRule rule : ruleset) { + if (rule.analyze(words)) { + brands.add(rule.getBrand()); + } + } + + return brands; } - + private void readFile(final String filename) throws FileNotFoundException { - InputStream inFile = new FileInputStream(filename); - + Scanner readFile = new Scanner(inFile); + + while (readFile.hasNextLine()) { + String line = readFile.nextLine(); + + parseRule(line.toLowerCase(Locale.ENGLISH)); + } + } + + private void parseRule(String line) { + if (line.isEmpty()) { + return; + } + + if (!line.contains("-")) { + // only positive search entries. + String[] sequence = line.split("\\s+"); + String[] blacklist = {""}; + ruleset.add(new BrandRule(line, sequence, blacklist)); + } else { + String[] parts = line.split("-"); + // positive and negative. + if (parts.length < 2) { + throw new IllegalArgumentException("Brand rule contained '-' but not two parts."); + } + + String[] sequence = parts[0].trim().split("\\s+"); + String[] blacklist = parts[1].trim().split("\\s+"); + + ruleset.add(new BrandRule(parts[0].trim(), sequence, blacklist)); + } + + } + + private class BrandRule { + + /** + * The words that should be in the text. + */ + private final ArrayList<String> names; + + /** + * A blacklist of words that are not interesting. + */ + private final Set<String> blacklist; + + /** + * The brand name of this rule. + */ + private final String brand; + + /** + * + * @param brand The brand of this rule. + * @param sequential The sequence of strings to obtain. + * @param blacklist The blacklisted words. + */ + public BrandRule(final String brandname, final String[] names, final String[] blacklist) { + this.brand = brandname; + this.names = new ArrayList(Arrays.asList(names)); + this.blacklist = new HashSet(Arrays.asList(blacklist)); + } + + /** + * Analyzes if this rule is holds for some text. + * + * @param words A list of words in a line. + */ + public boolean analyze(String[] words) { + int found = 0; + + for (String word : words) { + if (blacklist.contains(word)) { + return false; + } + + if (names.contains(word)) { + found++; + } + } + + if (found == names.size()) { + return true; + } + + return false; + } + + public String getBrand() { + return brand; + } } } |