diff options
author | Peter Wu <peter@lekensteyn.nl> | 2014-05-20 23:45:58 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2014-05-20 23:45:58 +0200 |
commit | 1a5cc150a9f4207eb439a6c2af72bc624702a8ad (patch) | |
tree | 82f0bb7a3c7a4840ead832a1bbb216834cfe7167 /query-links.sql | |
parent | c5deea9a782602989c0c5a9004e53f617cc642d8 (diff) | |
download | d3viz-1a5cc150a9f4207eb439a6c2af72bc624702a8ad.tar.gz |
Add SQL that generates links.csv and users.csv
To get a CSV with psql, run the following with ... replaced by SQL:
COPY (...) TO STDOUT WITH CSV HEADER;
Diffstat (limited to 'query-links.sql')
-rw-r--r-- | query-links.sql | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/query-links.sql b/query-links.sql new file mode 100644 index 0000000..e455941 --- /dev/null +++ b/query-links.sql @@ -0,0 +1,29 @@ +/* +../run-psql csv "$(awk '/^)/{exit}p;/user''s/{p=1}' query-links.sql)" +*/ +WITH users AS ( + SELECT + userid, + tweetname as name, + COUNT(*) as tweetcount, + ROUND(RANDOM()) as isspam + FROM tweet JOIN twitteruser + USING (userid) + GROUP BY userid,tweetname + ORDER BY tweetCount DESC + --100k is effectively everything... + LIMIT 200000 + --OFFSET 1000 +) +SELECT + userid AS source, + replyid AS target, + COUNT(*) as value +FROM tweet s +WHERE + replyid <> 0 AND + userid <> replyid AND + (userid IN (SELECT userid FROM users) AND + replyid IN (SELECT userid FROM users)) +GROUP BY source, target +ORDER BY value DESC |