diff options
Diffstat (limited to 'query-links.sql')
-rw-r--r-- | query-links.sql | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/query-links.sql b/query-links.sql new file mode 100644 index 0000000..e455941 --- /dev/null +++ b/query-links.sql @@ -0,0 +1,29 @@ +/* +../run-psql csv "$(awk '/^)/{exit}p;/user''s/{p=1}' query-links.sql)" +*/ +WITH users AS ( + SELECT + userid, + tweetname as name, + COUNT(*) as tweetcount, + ROUND(RANDOM()) as isspam + FROM tweet JOIN twitteruser + USING (userid) + GROUP BY userid,tweetname + ORDER BY tweetCount DESC + --100k is effectively everything... + LIMIT 200000 + --OFFSET 1000 +) +SELECT + userid AS source, + replyid AS target, + COUNT(*) as value +FROM tweet s +WHERE + replyid <> 0 AND + userid <> replyid AND + (userid IN (SELECT userid FROM users) AND + replyid IN (SELECT userid FROM users)) +GROUP BY source, target +ORDER BY value DESC |