summaryrefslogtreecommitdiff
path: root/query-links.sql
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-20 23:45:58 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-20 23:45:58 +0200
commit1a5cc150a9f4207eb439a6c2af72bc624702a8ad (patch)
tree82f0bb7a3c7a4840ead832a1bbb216834cfe7167 /query-links.sql
parentc5deea9a782602989c0c5a9004e53f617cc642d8 (diff)
downloadd3viz-1a5cc150a9f4207eb439a6c2af72bc624702a8ad.tar.gz
Add SQL that generates links.csv and users.csv
To get a CSV with psql, run the following with ... replaced by SQL: COPY (...) TO STDOUT WITH CSV HEADER;
Diffstat (limited to 'query-links.sql')
-rw-r--r--query-links.sql29
1 files changed, 29 insertions, 0 deletions
diff --git a/query-links.sql b/query-links.sql
new file mode 100644
index 0000000..e455941
--- /dev/null
+++ b/query-links.sql
@@ -0,0 +1,29 @@
+/*
+../run-psql csv "$(awk '/^)/{exit}p;/user''s/{p=1}' query-links.sql)"
+*/
+WITH users AS (
+ SELECT
+ userid,
+ tweetname as name,
+ COUNT(*) as tweetcount,
+ ROUND(RANDOM()) as isspam
+ FROM tweet JOIN twitteruser
+ USING (userid)
+ GROUP BY userid,tweetname
+ ORDER BY tweetCount DESC
+ --100k is effectively everything...
+ LIMIT 200000
+ --OFFSET 1000
+)
+SELECT
+ userid AS source,
+ replyid AS target,
+ COUNT(*) as value
+FROM tweet s
+WHERE
+ replyid <> 0 AND
+ userid <> replyid AND
+ (userid IN (SELECT userid FROM users) AND
+ replyid IN (SELECT userid FROM users))
+GROUP BY source, target
+ORDER BY value DESC