From 91d7fce947edc8a766965bad41382c2e51178f9d Mon Sep 17 00:00:00 2001 From: Maurice Laveaux Date: Mon, 2 Jun 2014 17:21:44 +0200 Subject: Added --cat command, fixed null bytes in usernames --- src/database/QueryUtils.java | 2 +- src/main/DataFiller.java | 13 ++++++++++++- src/main/Main.java | 13 +++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java index 4af6c09..95e7e14 100644 --- a/src/database/QueryUtils.java +++ b/src/database/QueryUtils.java @@ -23,7 +23,7 @@ public class QueryUtils { "tweetid", "createdat::timestamptz", "favcount", "retweetcount", "text", "coordinates::point", "language", "retweetid", "replytweetid", "place", - "userid"); + "userid", "category"); public final static String insertHash = buildQuery("hashtag", null, "tweetid", "hashtag"); diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java index e6ac6f4..a400975 100644 --- a/src/main/DataFiller.java +++ b/src/main/DataFiller.java @@ -29,13 +29,16 @@ public class DataFiller { private final NamedPreparedStatement m_insertUserUrl; private final NamedPreparedStatement m_insertMentions; + private final int m_category; + /** * Create the datafiller object. * * @param connection The database connection to use. * @throws java.sql.SQLException on error preparing the database connection. */ - public DataFiller(Connection connection) throws SQLException { + public DataFiller(Connection connection, int category) throws SQLException { + m_category = category; m_connection = connection; m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet); m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile); @@ -76,6 +79,13 @@ public class DataFiller { if (tweet.text.contains("\0")) { tweet.text = tweet.text.replaceAll("\0", ""); } + + User user = tweet.user; + + if (user.name.contains("\0")) { + user.name = user.name.replaceAll("\0", ""); + } + try { // assume that no previous transaction was started. @@ -92,6 +102,7 @@ public class DataFiller { String text = sanitizeTweetText(tweet.text); // ensure that the user and tweet are known before adding relations + m_insertTweet.setInt("category", m_category); QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet, text); m_insertProfile.executeUpdate(); m_insertTweet.executeUpdate(); diff --git a/src/main/Main.java b/src/main/Main.java index 5338003..f85c5df 100644 --- a/src/main/Main.java +++ b/src/main/Main.java @@ -79,6 +79,7 @@ public class Main implements Callable { */ private boolean skipDb; private Integer statusInterval; + private int category; public Main(String[] args) { // default connection properties @@ -89,9 +90,14 @@ public class Main implements Callable { .setDbName("twitter"); skipDb = false; statusInterval = 2; - + category = 0; + /* parse the global options. */ parseGlobalOptions(args); + + if (category == 0) { + throw new IllegalArgumentException("Please provide the --cat CATEGORY option."); + } } /** @@ -115,7 +121,7 @@ public class Main implements Callable { try (Connection connection = cb.create()) { System.err.println("Connected, starting to read tweets."); /* create the object that fills the database */ - DataFiller filler = new DataFiller(connection); + DataFiller filler = new DataFiller(connection, category); while ((tweet = reader.getTweet()) != null) { filler.processTweet(tweet); ++tweetNo; @@ -196,6 +202,8 @@ public class Main implements Callable { skipDb = true; } else if ("--status".equals(args[i])) { statusInterval = Integer.valueOf(getArg(args, ++i, "--status")); + } else if ("--cat".equals(args[i])) { + category = Integer.valueOf(getArg(args, ++i, "--cat")); } else if (args[i].startsWith("-")) { throw new IllegalArgumentException("Invalid option: " + args[i]); } else { @@ -238,6 +246,7 @@ public class Main implements Callable { " --skipdb Do not contact the database at all, just print data.", " --status SECS The interval in which import status information", " should be printed, zero disables it (defaults to 2)", + " --cat CATEGORY Set the category of filled tweets", "", "If no tweets file is given, data will be read from standard input." }; -- cgit v1.2.1