summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorS129778 <S129778@S129778.campus.tue.nl>2014-05-19 11:18:32 +0200
committerS129778 <S129778@S129778.campus.tue.nl>2014-05-19 11:18:32 +0200
commit8dac76108aeee3c155d695897156d624ce88960c (patch)
treef66759e695b7ddfa19fcb6cc1eaed5cb07f34276
parentadb75e8dd80edbd3ad4035a7540af8b060fa5dff (diff)
parentb1bbd92fdcf9f52d38852b6c5ae802cc401ff712 (diff)
downloadGoldfarmer-8dac76108aeee3c155d695897156d624ce88960c.tar.gz
Merge origin/master
-rwxr-xr-xbrandrules.txt102
-rw-r--r--src/analysis/BrandChecker.java91
-rw-r--r--test/analysis/BrandCheckerTest.java9
3 files changed, 128 insertions, 74 deletions
diff --git a/brandrules.txt b/brandrules.txt
index ae7d6f9..f89cddd 100755
--- a/brandrules.txt
+++ b/brandrules.txt
@@ -1,54 +1,58 @@
-galaxy s5 - tablet tab
-galaxy s4 - tablet tab
-galaxy s3
-galaxy K zoom
-galaxy note
-samsung note
-samsung zoom
+galaxy - galaxy - s5,s4,s3,zoom,note
+galaxy s5 - galaxy,s5
+galaxy s4 - galaxy,s4
+galaxy s3 - galaxy,s3
+galaxy K-Zoom - galaxy,k,zoom
+galaxy note - galaxy,note
-iphone 5
-iphone 5c
-iphone 5s
-iphone 4
-iphone 4s
+iphone - iphone - 4,4s,5,5s,5c
+iphone 4 - iphone,4
+iphone 4s - iphone,4s
+iphone 5 - iphone,5
+iphone 5s - iphone,5s
+iphone 5c - iphone,5c
-huawei ascend
-huawei p6
-huawei p7
-huawei mini
-huawei y300
-huawei y530
-huawei mate
-huawei g700
-huawei g510
-huawei g6
-huawei g525
+huawei - huawei - ascend,p6,p7,mini,y300,y530,mate,g700,g510,g6,g525
+huawei ascend - huawei,ascend
+huawei p6 - huawei,p6
+huawei p7 - huawei,p7
+huawei mini - huawei,mini
+huawei y300 - huawei,y300
+huawei y530 - huawei,y530
+huawei mate - huawei,mate
+huawei g700 - huawei,g700
+huawei g510 - huawei,g510
+huawei g6 - huawei,g6
+huawei g525 - huawei,g525
-sony xperia
-sony L
-sony E1
-sony Z
-sony Z1
-sony Z2
-sony compact
-sony ZR
-sony M
+sony - sony - xperia,L,E1,Z,Z1,Z2,compact,ZR,M
+sony xperia - sony,xperia
+sony L - sony,L
+sony E1 - sony,E1
+sony Z - sony,Z
+sony Z1 - sony,Z1
+sony Z2 - sony,Z2
+sony compact - sony,compact
+sony ZR - sony,ZR
+sony M - sony,M
-HTC one - phone
-HTC M8
-HTC mini
-HTC desire
-HTC X dual
-HTC SV
+HTC - htc - one,m8,mini,desire,dual,x,sv
+HTC one - htc,one
+HTC M8 - htc,m8
+HTC mini - htc,mini
+HTC desire - htc,desire
+HTC X dual - htc,x,dual
+HTC SV - htc,sv
-LG Nexus 5
-LG G2
-LG L70
-LG L90
-LG L40
-LG G flex
-LG mini
-LG L9
-LG L7
-LG L5
-LG L3
+LG - lg - nexus,g2,l70,l90,flex,mini,l9,l7,l5,l3
+LG Nexus 5 - nexus,5
+LG G2 - lg,g2
+LG L70 - lg,l70
+LG L90 - lg,l90
+LG L40 - lg,l40
+LG G flex - lg,g,flex
+LG mini - lg,mini
+LG L9 - lg,l9
+LG L7 - lg,l7
+LG L5 - lg,l5
+LG L3 - lg,l3
diff --git a/src/analysis/BrandChecker.java b/src/analysis/BrandChecker.java
index 740424c..5bde55b 100644
--- a/src/analysis/BrandChecker.java
+++ b/src/analysis/BrandChecker.java
@@ -19,6 +19,7 @@ import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
@@ -57,6 +58,7 @@ public class BrandChecker {
* @return The list of brands that are contained in this text or null.
*/
public List<String> getBrands(String text) {
+ text = removePunct(text);
String[] words = text.toLowerCase().split("\\s+");
List<String> brands = new ArrayList();
@@ -70,6 +72,9 @@ public class BrandChecker {
return brands;
}
+ /**
+ * Reads the file and parses the rules, which are added to the ruleset.
+ */
private void readFile(final String filename) throws FileNotFoundException {
InputStream inFile = new FileInputStream(filename);
Scanner readFile = new Scanner(inFile);
@@ -81,29 +86,52 @@ public class BrandChecker {
}
}
+ /**
+ * Parses the line and adds the BrandRule to the ruleset.
+ */
private void parseRule(String line) {
if (line.isEmpty()) {
return;
}
-
+
if (!line.contains("-")) {
- // only positive search entries.
- String[] sequence = line.split("\\s+");
- String[] blacklist = {""};
- ruleset.add(new BrandRule(line, sequence, blacklist));
+ System.err.println("illformatted rule: " + line + ", missing -");
} else {
String[] parts = line.split("-");
// positive and negative.
if (parts.length < 2) {
- throw new IllegalArgumentException("Brand rule contained '-' but not two parts.");
+ System.err.println("illformatted rule: " + line + ", missing <name> - <positive>.");
+ return;
}
- String[] sequence = parts[0].trim().split("\\s+");
- String[] blacklist = parts[1].trim().split("\\s+");
+ if (parts.length > 4) {
+ System.err.println("illformatted rule: " + line + ", forth part with - was given thus will be ignored.");
+ }
- ruleset.add(new BrandRule(parts[0].trim(), sequence, blacklist));
+ // Read the <name> line.
+ String name = parts[0].trim();
+
+ // Read the positive words.
+ String positive = parts[1].replaceAll(" ","");
+ String[] sequence = positive.split(",");
+
+ if (parts.length == 3) {
+ String negative = parts[2].replaceAll(" ", "");
+ String[] blacklist = negative.split(",");
+ ruleset.add(new BrandRule(name, sequence, blacklist));
+ } else {
+ ruleset.add(new BrandRule(name, sequence, null));
+ }
}
+ }
+ /**
+ * Removes punctuation and urls.
+ */
+ private String removePunct(String text) {
+ //text = text.replaceAll("https?://\\S*", "");
+ text = text.replaceAll(",", " ");
+ return text;
}
private class BrandRule {
@@ -111,8 +139,8 @@ public class BrandChecker {
/**
* The words that should be in the text.
*/
- private final ArrayList<String> names;
-
+ private final HashMap<String, Boolean> names;
+
/**
* A blacklist of words that are not interesting.
*/
@@ -131,8 +159,16 @@ public class BrandChecker {
*/
public BrandRule(final String brandname, final String[] names, final String[] blacklist) {
this.brand = brandname;
- this.names = new ArrayList(Arrays.asList(names));
- this.blacklist = new HashSet(Arrays.asList(blacklist));
+ this.names = new HashMap();
+ if (blacklist != null) {
+ this.blacklist = new HashSet(Arrays.asList(blacklist));
+ } else {
+ this.blacklist = null;
+ }
+
+ for (String name : names) {
+ this.names.put(name, Boolean.FALSE);
+ }
}
/**
@@ -141,27 +177,36 @@ public class BrandChecker {
* @param words A list of words in a line.
*/
public boolean analyze(String[] words) {
+ reset();
+
int found = 0;
for (String word : words) {
- if (blacklist.contains(word)) {
- return false;
- }
-
- if (names.contains(word)) {
- found++;
+ if (blacklist != null) {
+ if (blacklist.contains(word)) {
+ return false;
+ }
}
- }
- if (found == names.size()) {
- return true;
+ if (names.containsKey(word)) {
+ if (names.get(word) == false) {
+ found++;
+ names.put(word, Boolean.TRUE);
+ }
+ }
}
- return false;
+ return found == names.size();
}
public String getBrand() {
return brand;
}
+
+ private void reset() {
+ for (String name : this.names.keySet()) {
+ this.names.put(name, Boolean.FALSE);
+ }
+ }
}
}
diff --git a/test/analysis/BrandCheckerTest.java b/test/analysis/BrandCheckerTest.java
index 06818ac..23d8445 100644
--- a/test/analysis/BrandCheckerTest.java
+++ b/test/analysis/BrandCheckerTest.java
@@ -74,13 +74,18 @@ public class BrandCheckerTest {
@Test
public void testMultiple() {
doTest("QBD - Black in Ear Earphones. 3.5mm Jack Plug for Apple iPod, "
- + "IPhone 4, 4S, 5, 5S, 5C, Ipad Air, Ipad Mini",
- new String[]{"iphone 4", "iphone 4s", "iphone 5S", "iphone 5c"});
+ + "IPhone 4, 4S, 5, 5S, 5C, Ipad Air, Ipad Mini",
+ new String[]{"iphone 4", "iphone 4s", "iphone 5s", "iphone 5c", "iphone 5"});
}
@Test
public void testBullshit() {
doTest("This applepie is delicious", new String[]{});
}
+
+ @Test
+ public void multipleBrands() {
+ doTest("This tweet contains both iphone 4s,galaxy s5 and iphone", new String[]{"iphone 4s","galaxy s5"});
+ }
}