summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaurice Laveaux <m.laveaux@student.tue.nl>2014-06-02 11:53:07 +0200
committerMaurice Laveaux <m.laveaux@student.tue.nl>2014-06-02 11:53:07 +0200
commitbd0bcc6344baff3a80568a15e60f03810ade5971 (patch)
tree5608251a95f6001de187225c5cef516b3388eb24
parent4578e3dab9f38f91dc8169ea7e99f452218dc858 (diff)
downloadDatafiller-bd0bcc6344baff3a80568a15e60f03810ade5971.tar.gz
Removes null bytes from tweet text and unused brandchecker
-rw-r--r--src/main/DataFiller.java46
-rw-r--r--src/main/Main.java32
2 files changed, 32 insertions, 46 deletions
diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java
index fa013aa..e6ac6f4 100644
--- a/src/main/DataFiller.java
+++ b/src/main/DataFiller.java
@@ -70,6 +70,13 @@ public class DataFiller {
processTweet(tweet.retweeted_status);
}
+ /**
+ * Replace all null bytes, non valid UTF-8 *
+ */
+ if (tweet.text.contains("\0")) {
+ tweet.text = tweet.text.replaceAll("\0", "");
+ }
+
try {
// assume that no previous transaction was started.
saveTweet(tweet);
@@ -83,6 +90,7 @@ public class DataFiller {
private void saveTweet(Tweet tweet) throws SQLException {
String text = sanitizeTweetText(tweet.text);
+
// ensure that the user and tweet are known before adding relations
QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet, text);
m_insertProfile.executeUpdate();
@@ -115,37 +123,11 @@ public class DataFiller {
// determine the user's perception of the brand
/*
- List<String> brands = getBrands(tweet);
- for (String brand : brands) {
- QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand);
- m_insertBrand.executeUpdate();
- }
- */
- }
-
- ArrayList<String> getBrands(Tweet tweet) {
- ArrayList<String> result = new ArrayList<>();
- String text = tweet.text.toLowerCase();
- if (text.contains("samsung") || text.contains("galaxy")) {
- result.add("Samsung");
- }
- if (text.contains("htc") || text.contains("one")) {
- result.add("HTC");
- }
- if (text.contains("apple") || text.contains("iphone")) {
- result.add("Apple");
- }
- if (text.contains("sony") || text.contains("xperia")) {
- result.add("Sony");
- }
- if (text.contains("huawei") || text.contains("ascend")) {
- result.add("Huawei");
- }
- if (text.contains("lg")) {
- result.add("LG");
- }
-
- // TODO: WTF IS THIS PILE OF SHIT?!
- return result;
+ List<String> brands = getBrands(tweet);
+ for (String brand : brands) {
+ QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand);
+ m_insertBrand.executeUpdate();
+ }
+ */
}
}
diff --git a/src/main/Main.java b/src/main/Main.java
index 43e5619..7ea6bc3 100644
--- a/src/main/Main.java
+++ b/src/main/Main.java
@@ -117,23 +117,27 @@ public class Main implements Callable<Boolean> {
/* create the object that fills the database */
DataFiller filler = new DataFiller(connection);
while ((tweet = reader.getTweet()) != null) {
- filler.processTweet(tweet);
- ++tweetNo;
+ try {
+ filler.processTweet(tweet);
+ ++tweetNo;
+ } catch (JsonParseException ex) {
+ if (tweet != null) {
+ System.err.println("Faulty tweet " + tweetNo + ": " + tweet);
+ }
+ Logger.getLogger(Main.class.getName()).log(Level.SEVERE,
+ "Tweet read error", ex);
+ } catch (SQLException ex) {
+ if (tweet != null) {
+ System.err.println("Faulty tweet " + tweetNo + ": " + tweet);
+ }
+ Logger.getLogger(Main.class.getName()).log(Level.SEVERE,
+ "DB error", ex);
+ }
}
return true;
- } catch (JsonParseException | IOException ex) {
- if (tweet != null) {
- System.err.println("Faulty tweet " + tweetNo + ": " + tweet);
- }
- Logger.getLogger(Main.class.getName()).log(Level.SEVERE,
- "Tweet read error", ex);
} catch (SQLException ex) {
- if (tweet != null) {
- System.err.println("Faulty tweet " + tweetNo + ": " + tweet);
- }
- Logger.getLogger(Main.class.getName()).log(Level.SEVERE,
- "DB error", ex);
- }
+ Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
+ }
return false;
}