summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-15 15:34:21 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-15 16:09:25 +0200
commitef264519937f41a728fbba068121946be03aa985 (patch)
tree75bbd417c56e2b671739996fe55cbcbc6a9c2f36 /src
parent8a5d104cfe02f83d4ff9256b06c21f1d0051a609 (diff)
downloadGoldfarmer-ef264519937f41a728fbba068121946be03aa985.tar.gz
Analyzer: cleanup, documentation, do not re-read lexicon file
Diffstat (limited to 'src')
-rw-r--r--src/main/Analyzor.java55
1 files changed, 24 insertions, 31 deletions
diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java
index cf01563..0fe880a 100644
--- a/src/main/Analyzor.java
+++ b/src/main/Analyzor.java
@@ -1,13 +1,7 @@
-/*
- * To change this license header, choose License Headers in Project Properties.
- * To change this template file, choose Tools | Templates
- * and open the template in the editor.
- */
package main;
import database.NamedPreparedStatement;
import database.QueryUtils;
-import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.Connection;
@@ -16,18 +10,15 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.Scanner;
-import java.util.logging.Level;
-import java.util.logging.Logger;
/**
- *
- * @author s123188
+ * Analyze tweets with a lexicon.
*/
public class Analyzor {
//maps for the lexicons
- HashMap<String, Double> unimap = new HashMap<String, Double>(); // Map for uni
- HashMap<String, Double> bimap = new HashMap<String, Double>(); // Map for bi
+ HashMap<String, Double> unimap = new HashMap<>(); // Map for uni
+ HashMap<String, Double> bimap = new HashMap<>(); // Map for bi
//the resultset of the query or the import
ResultSet data;
@@ -39,26 +30,29 @@ public class Analyzor {
//reads the lexicons
void readLexicon() throws FileNotFoundException {
-
- File uniFile = new File("unigrams-pmilexicon.txt"); // get uni
- File biFile = new File("bigrams-pmilexicon.txt"); // get bi
-
- Scanner uniScanner = new Scanner(uniFile);
- Scanner biScanner = new Scanner(biFile);
-
- //Fill the map of unigrams
- while (uniScanner.hasNext()) {
- unimap.put(uniScanner.next(), Double.parseDouble(uniScanner.next()));
- if (uniScanner.hasNextLine()) {
- uniScanner.nextLine();
- }
+ if (!unimap.isEmpty()) {
+ // data is already read.
+ return;
}
+ // A unigram is in the format (WS = whitespace):
+ // word <WS> rating <WS> ??? <WS> ??
+ // A bigram has an two WS-separated words instead of one.
+ try (Scanner uniScanner = new Scanner("unigrams-pmilexicon.txt");
+ Scanner biScanner = new Scanner("bigrams-pmilexicon.txt");) {
+ //Fill the map of unigrams
+ while (uniScanner.hasNext()) {
+ unimap.put(uniScanner.next(), uniScanner.nextDouble());
+ if (uniScanner.hasNextLine()) {
+ uniScanner.nextLine();
+ }
+ }
- //fill the map of bigrams
- while (biScanner.hasNext()) {
- bimap.put(biScanner.next() + " " + biScanner.next(), Double.parseDouble(biScanner.next()));
- if (biScanner.hasNextLine()) {
- biScanner.nextLine();
+ //fill the map of bigrams
+ while (biScanner.hasNext()) {
+ bimap.put(biScanner.next() + " " + biScanner.next(), biScanner.nextDouble());
+ if (biScanner.hasNextLine()) {
+ biScanner.nextLine();
+ }
}
}
}
@@ -66,7 +60,6 @@ public class Analyzor {
//query the database
//fills the ResultSet data
void query(String query) throws SQLException {
-
PreparedStatement statement;
//make a connection to the database and execute the query
statement = connection.prepareStatement(query);