From 1a5cc150a9f4207eb439a6c2af72bc624702a8ad Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 20 May 2014 23:45:58 +0200 Subject: Add SQL that generates links.csv and users.csv To get a CSV with psql, run the following with ... replaced by SQL: COPY (...) TO STDOUT WITH CSV HEADER; --- query-links.sql | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 query-links.sql diff --git a/query-links.sql b/query-links.sql new file mode 100644 index 0000000..e455941 --- /dev/null +++ b/query-links.sql @@ -0,0 +1,29 @@ +/* +../run-psql csv "$(awk '/^)/{exit}p;/user''s/{p=1}' query-links.sql)" +*/ +WITH users AS ( + SELECT + userid, + tweetname as name, + COUNT(*) as tweetcount, + ROUND(RANDOM()) as isspam + FROM tweet JOIN twitteruser + USING (userid) + GROUP BY userid,tweetname + ORDER BY tweetCount DESC + --100k is effectively everything... + LIMIT 200000 + --OFFSET 1000 +) +SELECT + userid AS source, + replyid AS target, + COUNT(*) as value +FROM tweet s +WHERE + replyid <> 0 AND + userid <> replyid AND + (userid IN (SELECT userid FROM users) AND + replyid IN (SELECT userid FROM users)) +GROUP BY source, target +ORDER BY value DESC -- cgit v1.2.1