From 4f4c7cb2635c63a3eaf7941713e79ad6ae39e409 Mon Sep 17 00:00:00 2001 From: s123188 Date: Thu, 15 May 2014 14:17:47 +0200 Subject: added a method to set the pointer of the resultset before the first row --- src/main/Analyzor.java | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 8f11186..050d5c3 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -76,12 +76,24 @@ public class Analyzor { } } + //sets the pointer before the first element of the dataset + void goToFirstRow(){ + try{ + data.beforeFirst(); + } + catch(SQLException ex){ + System.err.print("something went wrong with the dataset: SQLException"); + return; + } + catch(NullPointerException ex){ + System.err.print("dataset is null, try querying first"); + return; + } + } //analyzes the tweet on their positivity //this is just a base version void sentimentAnalysis() { - - - + //read the lexicons try{ readLexicon(); } @@ -89,6 +101,8 @@ public class Analyzor { System.out.println("could not find the lexicons, please try again"); return; } + //go to the start of te dataset + goToFirstRow(); Double value; String text; -- cgit v1.2.1 From 627b3dd7aa3597eb049f9996492198686dc138d2 Mon Sep 17 00:00:00 2001 From: s123188 Date: Thu, 15 May 2014 14:50:53 +0200 Subject: better exception handling (a.o. removed goToFirstRow), started with wordcloud --- src/main/Analyzor.java | 130 ++++++++++++++++++++++-------------------------- src/main/FarmShell.java | 5 +- 2 files changed, 63 insertions(+), 72 deletions(-) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 050d5c3..e893529 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -9,6 +9,7 @@ import database.NamedPreparedStatement; import database.QueryUtils; import java.io.File; import java.io.FileNotFoundException; +import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -27,13 +28,13 @@ public class Analyzor { //maps for the lexicons HashMap unimap = new HashMap(); // Map for uni HashMap bimap = new HashMap(); // Map for bi - + //the resultset of the query or the import ResultSet data; Connection connection; - + //reads the lexicons - void readLexicon() throws FileNotFoundException{ + void readLexicon() throws FileNotFoundException { File uniFile = new File("unigrams-pmilexicon.txt"); // get uni File biFile = new File("bigrams-pmilexicon.txt"); // get bi @@ -57,88 +58,75 @@ public class Analyzor { } } } - + //query the database - //fills the ResultSet - void Query(String query){ - + //fills the ResultSet data + void Query(String query) throws SQLException { + PreparedStatement statement; - - try { - connection = Main.cb.create(); - statement = connection.prepareStatement(query); - data = statement.executeQuery(); - - - } - catch(SQLException ex){ - System.err.println("could not make a connection with the database"+ex); - } + //make a connection to the database and execute the query + connection = Main.cb.create(); + statement = connection.prepareStatement(query); + data = statement.executeQuery(); } - //sets the pointer before the first element of the dataset - void goToFirstRow(){ - try{ - data.beforeFirst(); - } - catch(SQLException ex){ - System.err.print("something went wrong with the dataset: SQLException"); - return; - } - catch(NullPointerException ex){ - System.err.print("dataset is null, try querying first"); - return; - } - } //analyzes the tweet on their positivity //this is just a base version - void sentimentAnalysis() { + void sentimentAnalysis() throws SQLException, IOException { //read the lexicons - try{ - readLexicon(); - } - catch(FileNotFoundException ex){ - System.out.println("could not find the lexicons, please try again"); + readLexicon(); + + //go to the start of te dataset + if (data == null) { + System.err.println("data is empty, try querying first"); return; } - //go to the start of te dataset - goToFirstRow(); - + data.beforeFirst(); + Double value; String text; - try { - //for all tuples - while (data.next()) { - //get the text - text = data.getString("text"); - // test is the tweet text you are going to analyze - String[] words = text.split("\\s+"); // text splitted into separate words - double positiverate = 0; // positive rating - - // Rate the text with unigrams - for (String word : words) { - value = unimap.get(word); - if(value != null){ - positiverate += unimap.get(word); - } + + //for all tuples + while (data.next()) { + //get the text + text = data.getString("text"); + // test is the tweet text you are going to analyze + String[] words = text.split("\\s+"); // text splitted into separate words + double positiverate = 0; // positive rating + + // Rate the text with unigrams + for (String word : words) { + value = unimap.get(word); + if (value != null) { + positiverate += unimap.get(word); } - // Rate the text with bigrams - for (int i = 0; i < words.length - 1; i++) { - String pair = words[i] + " " + words[i + 1]; - value = bimap.get(pair); - if (value != null) { - positiverate += bimap.get(pair); - } + } + // Rate the text with bigrams + for (int i = 0; i < words.length - 1; i++) { + String pair = words[i] + " " + words[i + 1]; + value = bimap.get(pair); + if (value != null) { + positiverate += bimap.get(pair); } - NamedPreparedStatement m_insertRating; - m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating); - QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"),data.getString("brand"), (int)(positiverate * 10)); - m_insertRating.executeUpdate(); - //don't print the rate - //System.out.println(text + ": " + (int) (positiverate * 10)); } - } catch (SQLException ex) { - System.err.println("text not found"); + //insert the rating into the database + NamedPreparedStatement m_insertRating; + m_insertRating = new NamedPreparedStatement(connection, QueryUtils.insertRating); + QueryUtils.setInsertParams(m_insertRating, data.getLong("tweetid"), data.getString("brand"), (int) (positiverate * 10)); + m_insertRating.executeUpdate(); + //don't print the rate + //System.out.println(text + ": " + (int) (positiverate * 10)); + } + } + + //makes a wordcloud of the tweets in the ResultSet data + void makeWordCloud() throws SQLException { + //go to the start of the ResultSet data + if (data == null) { + System.err.println("data is empty, try querying first"); + return; } + + data.beforeFirst(); } } diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index daab973..55585c2 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -1,6 +1,7 @@ package main; import java.io.IOException; +import java.sql.SQLException; import java.util.Arrays; import java.util.NoSuchElementException; import java.util.Scanner; @@ -81,6 +82,8 @@ public class FarmShell { } catch (NoSuchElementException ex) { // thrown by the "exit" command to signal exit return false; + } catch (SQLException ex){ + System.err.println("such SQLException"); } // another satisfied customer, next! return true; @@ -131,7 +134,7 @@ public class FarmShell { "Available commands:" }; - private void execute(Command command, String[] params) throws IOException { + private void execute(Command command, String[] params) throws SQLException, IOException { if (params.length < command.getParamCount()) { throw new IllegalArgumentException("Expected " + command.getParamCount() + " parameters, got only " -- cgit v1.2.1 From d8162eb3e3d5b6008314cfdafd714c90632e0354 Mon Sep 17 00:00:00 2001 From: S129778 Date: Thu, 15 May 2014 15:01:29 +0200 Subject: punctuation in text --- src/main/Analyzor.java | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index e893529..4b30134 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -129,4 +129,12 @@ public class Analyzor { data.beforeFirst(); } + + private String replacePunct(String text) { + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[!?):;\"']", " $0"); + text = text.replaceAll("[.,-](\\s|$)", " $0"); + text = text.replaceAll("\\s[(\"']", "$0 "); + return text; + } } -- cgit v1.2.1 From 87908edbd832f8e8d63b11159a6ec331592521ed Mon Sep 17 00:00:00 2001 From: s123188 Date: Thu, 15 May 2014 15:20:15 +0200 Subject: such SQLException --- src/main/Analyzor.java | 11 +++++++++++ src/main/FarmShell.java | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 4b30134..1d0e8aa 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -90,6 +90,7 @@ public class Analyzor { while (data.next()) { //get the text text = data.getString("text"); + text = replacePunct(text); // test is the tweet text you are going to analyze String[] words = text.split("\\s+"); // text splitted into separate words double positiverate = 0; // positive rating @@ -126,10 +127,20 @@ public class Analyzor { System.err.println("data is empty, try querying first"); return; } + + //make the hashmap with the words and their frequency + HashMap wordcloud = new HashMap<>(); + //set the pointer at the start of the ResultSet data.beforeFirst(); + + while(data.next()){ + //get the text + + } } + //replaces punctuation so it will be splitted private String replacePunct(String text) { text = text.replaceAll("https?://\\S*", ""); text = text.replaceAll("[!?):;\"']", " $0"); diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index 55585c2..d56c72a 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -83,7 +83,7 @@ public class FarmShell { // thrown by the "exit" command to signal exit return false; } catch (SQLException ex){ - System.err.println("such SQLException"); + System.err.println("such " + ex); } // another satisfied customer, next! return true; -- cgit v1.2.1 From c6a31e656283ce85cf47e539c767d312b6e84d50 Mon Sep 17 00:00:00 2001 From: s123188 Date: Thu, 15 May 2014 15:55:45 +0200 Subject: wordcloud implemented, still needs output tho --- src/main/Analyzor.java | 42 ++++++++++++++++++++++++++++++++++++------ src/main/FarmShell.java | 14 ++++++++------ 2 files changed, 44 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/main/Analyzor.java b/src/main/Analyzor.java index 1d0e8aa..51e080e 100644 --- a/src/main/Analyzor.java +++ b/src/main/Analyzor.java @@ -61,7 +61,7 @@ public class Analyzor { //query the database //fills the ResultSet data - void Query(String query) throws SQLException { + void query(String query) throws SQLException { PreparedStatement statement; //make a connection to the database and execute the query @@ -72,7 +72,10 @@ public class Analyzor { //analyzes the tweet on their positivity //this is just a base version - void sentimentAnalysis() throws SQLException, IOException { + void sentimentAnalysis(String query) throws SQLException, IOException { + + query(query); + //read the lexicons readLexicon(); @@ -121,7 +124,9 @@ public class Analyzor { } //makes a wordcloud of the tweets in the ResultSet data - void makeWordCloud() throws SQLException { + void makeWordCloud(String query) throws SQLException { + + query(query); //go to the start of the ResultSet data if (data == null) { System.err.println("data is empty, try querying first"); @@ -130,17 +135,34 @@ public class Analyzor { //make the hashmap with the words and their frequency HashMap wordcloud = new HashMap<>(); - - //set the pointer at the start of the ResultSet - data.beforeFirst(); + + String text; + String[] words; + Integer value; while(data.next()){ //get the text + text = data.getString("text"); + //remove punctuation, convert to lowercase and split on words + text = removePunct(text); + text = text.toLowerCase(); + words = text.split("\\s+"); + //count the words + for(String word : words){ + value = wordcloud.get(word); + if(value == null){ + wordcloud.put(word, 1); + } + else{ + wordcloud.put(word, value++); + } + } } } //replaces punctuation so it will be splitted + //also removes urls private String replacePunct(String text) { text = text.replaceAll("https?://\\S*", ""); text = text.replaceAll("[!?):;\"']", " $0"); @@ -148,4 +170,12 @@ public class Analyzor { text = text.replaceAll("\\s[(\"']", "$0 "); return text; } + + //removes punctuation + //also removes urls + private String removePunct(String text){ + text = text.replaceAll("https?://\\S*", ""); + text = text.replaceAll("[.,!?()-:;\"']", " "); + return text; + } } diff --git a/src/main/FarmShell.java b/src/main/FarmShell.java index d56c72a..93cb928 100644 --- a/src/main/FarmShell.java +++ b/src/main/FarmShell.java @@ -92,8 +92,9 @@ public class FarmShell { enum Command { query("make a query to the database; needed to do analysis", 1), - filterbots("marks all users as bot or not"), - sentiment("analyzes all tweets on positivity (about a brand)"), + filterbots("marks all users as bot or not", 1), + sentiment("analyzes all tweets on positivity (about a brand)", 1), + wordcloud("makes a wordcloud of the text of the tweets", 1), exit("Returns to shell"), help("Get help"); @@ -142,15 +143,16 @@ public class FarmShell { } switch (command) { case query: - //make a new Analyzor - analyzor = new Analyzor(); - analyzor.Query(params[0]); + System.err.println("isn't supported anymore, now enter query after analysis type"); break; case filterbots: System.out.println("not yet implemented"); break; case sentiment: - analyzor.sentimentAnalysis(); + analyzor.sentimentAnalysis(params[0]); + break; + case wordcloud: + analyzor.makeWordCloud(params[0]); break; case help: for (String line : HELP) { -- cgit v1.2.1