summaryrefslogtreecommitdiff
path: root/src/main/Analyzor.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/main/Analyzor.java')
-rw-r--r--src/main/Analyzor.java50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index 7b9def5..7685cdb 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -472,6 +472,56 @@ public class Analyzor {
writer.close();
}
+ void categorize(String file) throws FileNotFoundException, UnsupportedEncodingException{
+
+ //get the division in categories
+ InputStream inFile = new FileInputStream("categories.txt");
+ Scanner readFile = new Scanner(inFile);
+ HashMap<String,String> toCategory = new HashMap<>();
+
+ while (readFile.hasNextLine()) {
+ String line = readFile.nextLine();
+ if(line.split(",").length>1){
+ toCategory.put(line.split(",")[0], line.split(",")[1]);
+ }
+ }
+
+
+ //read the csv
+ Scanner sc = new Scanner(new File(file));
+
+ PrintWriter writer = new PrintWriter("categorised.csv", "UTF-8");
+ //copy the first line
+ writer.println(sc.nextLine());
+
+ String line;
+ String[] values;
+
+ //for every line
+ while(sc.hasNextLine()){
+ //get the values (and so the word)
+ line = sc.nextLine();
+ values = line.split(",");
+
+ //divide into categories
+ if(toCategory.containsKey(values[0])){
+ values[0] = toCategory.get(values[0]);
+ }
+
+ //print it
+ int length = values.length;
+ int index = 0;
+ for(String s : values){
+ writer.print(s);
+ if(!(index == length - 1)){
+ writer.print(",");
+ }
+ index++;
+ }
+ }
+ writer.close();
+ }
+
//replaces punctuation so it will be splitted
//also removes urls
private String splitPunctToWords(String text) {