diff options
-rw-r--r-- | nbproject/project.properties | 162 | ||||
-rw-r--r-- | src/main/Analyzor.java | 78 | ||||
-rw-r--r-- | src/main/FarmShell.java | 4 |
3 files changed, 163 insertions, 81 deletions
diff --git a/nbproject/project.properties b/nbproject/project.properties index ab8ae05..b262ab6 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -1,81 +1,81 @@ -annotation.processing.enabled=true -annotation.processing.enabled.in.editor=false -annotation.processing.processors.list= -annotation.processing.run.all.processors=true -annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output -application.title=Goldfarmer -application.vendor=maurice -build.classes.dir=${build.dir}/classes -build.classes.excludes=**/*.java,**/*.form -# This directory is removed when the project is cleaned: -build.dir=build -build.generated.dir=${build.dir}/generated -build.generated.sources.dir=${build.dir}/generated-sources -# Only compile against the classpath explicitly listed here: -build.sysclasspath=ignore -build.test.classes.dir=${build.dir}/test/classes -build.test.results.dir=${build.dir}/test/results -# Uncomment to specify the preferred debugger connection transport: -#debug.transport=dt_socket -debug.classpath=\ - ${run.classpath} -debug.test.classpath=\ - ${run.test.classpath} -# Files in build.classes.dir which should be excluded from distribution jar -dist.archive.excludes= -# This directory is removed when the project is cleaned: -dist.dir=dist -dist.jar=${dist.dir}/Goldfarmer.jar -dist.javadoc.dir=${dist.dir}/javadoc -endorsed.classpath= -excludes= -file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar -file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar -includes=** -jar.compress=false -javac.classpath=\ - ${file.reference.joda-time-2.3.jar}:\ - ${file.reference.postgresql-9.3-1101.jdbc41.jar} -# Space-separated list of extra javac options -javac.compilerargs= -javac.deprecation=false -javac.processorpath=\ - ${javac.classpath} -javac.source=1.7 -javac.target=1.7 -javac.test.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir}:\ - ${libs.junit_4.classpath} -javac.test.processorpath=\ - ${javac.test.classpath} -javadoc.additionalparam= -javadoc.author=false -javadoc.encoding=${source.encoding} -javadoc.noindex=false -javadoc.nonavbar=false -javadoc.notree=false -javadoc.private=false -javadoc.splitindex=true -javadoc.use=true -javadoc.version=false -javadoc.windowtitle= -main.class=main.Main -manifest.file=manifest.mf -meta.inf.dir=${src.dir}/META-INF -mkdist.disabled=false -platform.active=default_platform -project.licensePath=./nbproject/licenseheader.txt -run.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir} -# Space-separated list of JVM arguments used when running the project. -# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value. -# To set system properties for unit tests define test-sys-prop.name=value: -run.jvmargs= -run.test.classpath=\ - ${javac.test.classpath}:\ - ${build.test.classes.dir} -source.encoding=UTF-8 -src.dir=src -test.src.dir=test +annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=Goldfarmer
+application.vendor=maurice
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# Files in build.classes.dir which should be excluded from distribution jar
+dist.archive.excludes=
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/Goldfarmer.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.joda-time-2.3.jar=lib/joda-time-2.3.jar
+file.reference.postgresql-9.3-1101.jdbc41.jar=lib/postgresql-9.3-1101.jdbc41.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${file.reference.joda-time-2.3.jar}:\
+ ${file.reference.postgresql-9.3-1101.jdbc41.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}:\
+ ${libs.junit_4.classpath}
+javac.test.processorpath=\
+ ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=main.Main
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+project.licensePath=./nbproject/licenseheader.txt
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=src
+test.src.dir=test
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index d0a2b31..128b078 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -566,6 +566,84 @@ public class Analyzor { } writer.close(); } + + void categorize(String file) throws FileNotFoundException, UnsupportedEncodingException{ + + //get the division in categories + InputStream inFile = new FileInputStream("categories.txt"); + Scanner readFile = new Scanner(inFile); + HashMap<String,String> toCategory = new HashMap<>(); + + while (readFile.hasNextLine()) { + String line = readFile.nextLine(); + if(line.split(",").length>1){ + for(String element:line.split(",")[1].split(" ")){ + toCategory.put(element, line.split(",")[0]); + } + } + } + + + //read the csv + Scanner sc = new Scanner(new File(file)); + + PrintWriter writer = new PrintWriter("categorised.csv", "UTF-8"); + //copy the first line + writer.println(sc.nextLine()); + + String line; + String[] values; + Boolean printed; + HashSet<String> used; + + //for every line + while(sc.hasNextLine()){ + //get the values (and so the word) + line = sc.nextLine(); + values = line.split(","); + printed = false; + + //divide into categories + //substring + used = new HashSet<>(); + for(String key : toCategory.keySet()){ + if(values[0].contains(key) && !used.contains(toCategory.get(key))){ + used.add(toCategory.get(key)); + String[] newValues = values; + newValues[0] = toCategory.get(key); + //print it + writer.println(csvLine(newValues)); + printed = true; + } + } + //exact word + if(toCategory.containsKey("#" + values[0])){ + values[0] = toCategory.get("#" + values[0]); + } + + //print it + if(!printed){ + writer.println(csvLine(values)); + } + } + writer.close(); + } + + String csvLine(String[] values){ + int length = values.length; + int index = 0; + String result = ""; + + for(String s : values){ + result += s; + if(!(index == length - 1)){ + result += ","; + } + index++; + } + + return result; + } //replaces punctuation so it will be splitted //also removes urls diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index d624a71..61878d7 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -145,6 +145,9 @@ public class FarmShell { case newsspread: getAnalyzor().newsSpread(params[0]); break; + case categorize: + getAnalyzor().categorize(params[0]); + break; case getBrands: String query = params.length > 0 ? params[0].trim() : ""; String[] args = query.split("\\s+", 2); @@ -194,6 +197,7 @@ public class FarmShell { disco("makes a outputfile for disco", 1), posneg("makes a csv for a histogram for positive or negative tweets", 1), newsspread("makes a csv for disco to show a news spread process", 1), + categorize("categorizes words in csv (first column) as defined in categorize.txt", 1), exit("Returns to shell"), help("Get help"); |