From bd0bcc6344baff3a80568a15e60f03810ade5971 Mon Sep 17 00:00:00 2001 From: Maurice Laveaux Date: Mon, 2 Jun 2014 11:53:07 +0200 Subject: Removes null bytes from tweet text and unused brandchecker --- src/main/DataFiller.java | 46 ++++++++++++++-------------------------------- src/main/Main.java | 32 ++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 46 deletions(-) diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java index fa013aa..e6ac6f4 100644 --- a/src/main/DataFiller.java +++ b/src/main/DataFiller.java @@ -70,6 +70,13 @@ public class DataFiller { processTweet(tweet.retweeted_status); } + /** + * Replace all null bytes, non valid UTF-8 * + */ + if (tweet.text.contains("\0")) { + tweet.text = tweet.text.replaceAll("\0", ""); + } + try { // assume that no previous transaction was started. saveTweet(tweet); @@ -83,6 +90,7 @@ public class DataFiller { private void saveTweet(Tweet tweet) throws SQLException { String text = sanitizeTweetText(tweet.text); + // ensure that the user and tweet are known before adding relations QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet, text); m_insertProfile.executeUpdate(); @@ -115,37 +123,11 @@ public class DataFiller { // determine the user's perception of the brand /* - List brands = getBrands(tweet); - for (String brand : brands) { - QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); - m_insertBrand.executeUpdate(); - } - */ - } - - ArrayList getBrands(Tweet tweet) { - ArrayList result = new ArrayList<>(); - String text = tweet.text.toLowerCase(); - if (text.contains("samsung") || text.contains("galaxy")) { - result.add("Samsung"); - } - if (text.contains("htc") || text.contains("one")) { - result.add("HTC"); - } - if (text.contains("apple") || text.contains("iphone")) { - result.add("Apple"); - } - if (text.contains("sony") || text.contains("xperia")) { - result.add("Sony"); - } - if (text.contains("huawei") || text.contains("ascend")) { - result.add("Huawei"); - } - if (text.contains("lg")) { - result.add("LG"); - } - - // TODO: WTF IS THIS PILE OF SHIT?! - return result; + List brands = getBrands(tweet); + for (String brand : brands) { + QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); + m_insertBrand.executeUpdate(); + } + */ } } diff --git a/src/main/Main.java b/src/main/Main.java index 43e5619..7ea6bc3 100644 --- a/src/main/Main.java +++ b/src/main/Main.java @@ -117,23 +117,27 @@ public class Main implements Callable { /* create the object that fills the database */ DataFiller filler = new DataFiller(connection); while ((tweet = reader.getTweet()) != null) { - filler.processTweet(tweet); - ++tweetNo; + try { + filler.processTweet(tweet); + ++tweetNo; + } catch (JsonParseException ex) { + if (tweet != null) { + System.err.println("Faulty tweet " + tweetNo + ": " + tweet); + } + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, + "Tweet read error", ex); + } catch (SQLException ex) { + if (tweet != null) { + System.err.println("Faulty tweet " + tweetNo + ": " + tweet); + } + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, + "DB error", ex); + } } return true; - } catch (JsonParseException | IOException ex) { - if (tweet != null) { - System.err.println("Faulty tweet " + tweetNo + ": " + tweet); - } - Logger.getLogger(Main.class.getName()).log(Level.SEVERE, - "Tweet read error", ex); } catch (SQLException ex) { - if (tweet != null) { - System.err.println("Faulty tweet " + tweetNo + ": " + tweet); - } - Logger.getLogger(Main.class.getName()).log(Level.SEVERE, - "DB error", ex); - } + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); + } return false; } -- cgit v1.2.1