summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authors123188 <s123188@S123188.campus.tue.nl>2014-06-11 14:02:06 +0200
committers123188 <s123188@S123188.campus.tue.nl>2014-06-11 14:02:06 +0200
commit7c501d799ace9ff2cf615a5e91b9040f2311f2c4 (patch)
tree649f838091d7b2e979457a35bd8c8432bec27217 /src
parent6f3574c5059a1edd73faa037dbb846507b5019ae (diff)
downloadGoldfarmer-7c501d799ace9ff2cf615a5e91b9040f2311f2c4.tar.gz
fixed.4
Diffstat (limited to 'src')
-rw-r--r--src/main/Analyzor.java47
1 files changed, 36 insertions, 11 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 819f7c9..05bc6b8 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -521,32 +521,57 @@ public class Analyzor {
String line;
String[] values;
+ Boolean printed;
+ HashSet<String> used;
//for every line
while(sc.hasNextLine()){
//get the values (and so the word)
line = sc.nextLine();
values = line.split(",");
+ printed = false;
//divide into categories
- if(toCategory.containsKey(values[0])){
- values[0] = toCategory.get(values[0]);
+ //substring
+ used = new HashSet<>();
+ for(String key : toCategory.keySet()){
+ if(values[0].contains(key) && !used.contains(toCategory.get(key))){
+ used.add(toCategory.get(key));
+ String[] newValues = values;
+ newValues[0] = toCategory.get(key);
+ //print it
+ writer.println(csvLine(newValues));
+ printed = true;
+ }
+ }
+ //exact word
+ if(toCategory.containsKey("#" + values[0])){
+ values[0] = toCategory.get("#" + values[0]);
}
//print it
- int length = values.length;
- int index = 0;
- writer.println("");
- for(String s : values){
- writer.print(s);
- if(!(index == length - 1)){
- writer.print(",");
- }
- index++;
+ if(!printed){
+ writer.println(csvLine(values));
}
}
writer.close();
}
+
+ String csvLine(String[] values){
+ int length = values.length;
+ int index = 0;
+ String result = "";
+
+ for(String s : values){
+ result += s;
+ if(!(index == length - 1)){
+ result += ",";
+ }
+ index++;
+ }
+
+ return result;
+ }
//replaces punctuation so it will be splitted
//also removes urls