summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaurice Laveaux <m.laveaux@student.tue.nl>2014-06-02 17:21:44 +0200
committerMaurice Laveaux <m.laveaux@student.tue.nl>2014-06-02 17:21:44 +0200
commit91d7fce947edc8a766965bad41382c2e51178f9d (patch)
treed8697ef34dd9d3f2b33d2ec64951d398aaabf50a
parent71cdb034f1f1218874d46cb57adac02d60793050 (diff)
downloadDatafiller-91d7fce947edc8a766965bad41382c2e51178f9d.tar.gz
Added --cat command, fixed null bytes in usernames
-rw-r--r--src/database/QueryUtils.java2
-rw-r--r--src/main/DataFiller.java13
-rw-r--r--src/main/Main.java13
3 files changed, 24 insertions, 4 deletions
diff --git a/src/database/QueryUtils.java b/src/database/QueryUtils.java
index 4af6c09..95e7e14 100644
--- a/src/database/QueryUtils.java
+++ b/src/database/QueryUtils.java
@@ -23,7 +23,7 @@ public class QueryUtils {
"tweetid", "createdat::timestamptz", "favcount", "retweetcount",
"text", "coordinates::point",
"language", "retweetid", "replytweetid", "place",
- "userid");
+ "userid", "category");
public final static String insertHash
= buildQuery("hashtag", null, "tweetid", "hashtag");
diff --git a/src/main/DataFiller.java b/src/main/DataFiller.java
index e6ac6f4..a400975 100644
--- a/src/main/DataFiller.java
+++ b/src/main/DataFiller.java
@@ -29,13 +29,16 @@ public class DataFiller {
private final NamedPreparedStatement m_insertUserUrl;
private final NamedPreparedStatement m_insertMentions;
+ private final int m_category;
+
/**
* Create the datafiller object.
*
* @param connection The database connection to use.
* @throws java.sql.SQLException on error preparing the database connection.
*/
- public DataFiller(Connection connection) throws SQLException {
+ public DataFiller(Connection connection, int category) throws SQLException {
+ m_category = category;
m_connection = connection;
m_insertTweet = new NamedPreparedStatement(m_connection, QueryUtils.insertTweet);
m_insertProfile = new NamedPreparedStatement(m_connection, QueryUtils.insertProfile);
@@ -76,6 +79,13 @@ public class DataFiller {
if (tweet.text.contains("\0")) {
tweet.text = tweet.text.replaceAll("\0", "");
}
+
+ User user = tweet.user;
+
+ if (user.name.contains("\0")) {
+ user.name = user.name.replaceAll("\0", "");
+ }
+
try {
// assume that no previous transaction was started.
@@ -92,6 +102,7 @@ public class DataFiller {
String text = sanitizeTweetText(tweet.text);
// ensure that the user and tweet are known before adding relations
+ m_insertTweet.setInt("category", m_category);
QueryUtils.setInsertParams(m_insertTweet, m_insertProfile, tweet, text);
m_insertProfile.executeUpdate();
m_insertTweet.executeUpdate();
diff --git a/src/main/Main.java b/src/main/Main.java
index 5338003..f85c5df 100644
--- a/src/main/Main.java
+++ b/src/main/Main.java
@@ -79,6 +79,7 @@ public class Main implements Callable<Boolean> {
*/
private boolean skipDb;
private Integer statusInterval;
+ private int category;
public Main(String[] args) {
// default connection properties
@@ -89,9 +90,14 @@ public class Main implements Callable<Boolean> {
.setDbName("twitter");
skipDb = false;
statusInterval = 2;
-
+ category = 0;
+
/* parse the global options. */
parseGlobalOptions(args);
+
+ if (category == 0) {
+ throw new IllegalArgumentException("Please provide the --cat CATEGORY option.");
+ }
}
/**
@@ -115,7 +121,7 @@ public class Main implements Callable<Boolean> {
try (Connection connection = cb.create()) {
System.err.println("Connected, starting to read tweets.");
/* create the object that fills the database */
- DataFiller filler = new DataFiller(connection);
+ DataFiller filler = new DataFiller(connection, category);
while ((tweet = reader.getTweet()) != null) {
filler.processTweet(tweet);
++tweetNo;
@@ -196,6 +202,8 @@ public class Main implements Callable<Boolean> {
skipDb = true;
} else if ("--status".equals(args[i])) {
statusInterval = Integer.valueOf(getArg(args, ++i, "--status"));
+ } else if ("--cat".equals(args[i])) {
+ category = Integer.valueOf(getArg(args, ++i, "--cat"));
} else if (args[i].startsWith("-")) {
throw new IllegalArgumentException("Invalid option: " + args[i]);
} else {
@@ -238,6 +246,7 @@ public class Main implements Callable<Boolean> {
" --skipdb Do not contact the database at all, just print data.",
" --status SECS The interval in which import status information",
" should be printed, zero disables it (defaults to 2)",
+ " --cat CATEGORY Set the category of filled tweets",
"",
"If no tweets file is given, data will be read from standard input."
};