diff options
author | Maurice Laveaux <m.laveaux@student.tue.nl> | 2014-06-02 11:53:07 +0200 |
---|---|---|
committer | Maurice Laveaux <m.laveaux@student.tue.nl> | 2014-06-02 11:53:07 +0200 |
commit | bd0bcc6344baff3a80568a15e60f03810ade5971 (patch) | |
tree | 5608251a95f6001de187225c5cef516b3388eb24 | |
parent | 4578e3dab9f38f91dc8169ea7e99f452218dc858 (diff) | |
download | Datafiller-bd0bcc6344baff3a80568a15e60f03810ade5971.tar.gz |
Removes null bytes from tweet text and unused brandchecker
-rw-r--r-- | src/main/DataFiller.java | 46 | ||||
-rw-r--r-- | src/main/Main.java | 32 |
2 files changed, 32 insertions, 46 deletions
diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java index fa013aa..e6ac6f4 100644 --- a/src/main/DataFiller.java +++ b/src/main/DataFiller.java @@ -70,6 +70,13 @@ public class DataFiller { processTweet(tweet.retweeted_status); } + /** + * Replace all null bytes, non valid UTF-8 * + */ + if (tweet.text.contains("\0")) { + tweet.text = tweet.text.replaceAll("\0", ""); + } + try { // assume that no previous transaction was started. saveTweet(tweet); @@ -83,6 +90,7 @@ public class DataFiller { private void saveTweet(Tweet tweet) throws SQLException { String text = sanitizeTweetText(tweet.text); + // ensure that the user and tweet are known before adding relations QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet, text); m_insertProfile.executeUpdate(); @@ -115,37 +123,11 @@ public class DataFiller { // determine the user's perception of the brand /* - List<String> brands = getBrands(tweet); - for (String brand : brands) { - QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); - m_insertBrand.executeUpdate(); - } - */ - } - - ArrayList<String> getBrands(Tweet tweet) { - ArrayList<String> result = new ArrayList<>(); - String text = tweet.text.toLowerCase(); - if (text.contains("samsung") || text.contains("galaxy")) { - result.add("Samsung"); - } - if (text.contains("htc") || text.contains("one")) { - result.add("HTC"); - } - if (text.contains("apple") || text.contains("iphone")) { - result.add("Apple"); - } - if (text.contains("sony") || text.contains("xperia")) { - result.add("Sony"); - } - if (text.contains("huawei") || text.contains("ascend")) { - result.add("Huawei"); - } - if (text.contains("lg")) { - result.add("LG"); - } - - // TODO: WTF IS THIS PILE OF SHIT?! - return result; + List<String> brands = getBrands(tweet); + for (String brand : brands) { + QueryUtils.setInsertBrandParams(m_insertBrand, tweet.id, brand); + m_insertBrand.executeUpdate(); + } + */ } } diff --git a/src/main/Main.java b/src/main/Main.java index 43e5619..7ea6bc3 100644 --- a/src/main/Main.java +++ b/src/main/Main.java @@ -117,23 +117,27 @@ public class Main implements Callable<Boolean> { /* create the object that fills the database */ DataFiller filler = new DataFiller(connection); while ((tweet = reader.getTweet()) != null) { - filler.processTweet(tweet); - ++tweetNo; + try { + filler.processTweet(tweet); + ++tweetNo; + } catch (JsonParseException ex) { + if (tweet != null) { + System.err.println("Faulty tweet " + tweetNo + ": " + tweet); + } + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, + "Tweet read error", ex); + } catch (SQLException ex) { + if (tweet != null) { + System.err.println("Faulty tweet " + tweetNo + ": " + tweet); + } + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, + "DB error", ex); + } } return true; - } catch (JsonParseException | IOException ex) { - if (tweet != null) { - System.err.println("Faulty tweet " + tweetNo + ": " + tweet); - } - Logger.getLogger(Main.class.getName()).log(Level.SEVERE, - "Tweet read error", ex); } catch (SQLException ex) { - if (tweet != null) { - System.err.println("Faulty tweet " + tweetNo + ": " + tweet); - } - Logger.getLogger(Main.class.getName()).log(Level.SEVERE, - "DB error", ex); - } + Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex); + } return false; } |