diff options
Diffstat (limited to 'query-links.sql')
-rw-r--r-- | query-links.sql | 22 |
1 files changed, 13 insertions, 9 deletions
diff --git a/query-links.sql b/query-links.sql index e455941..746a0fc 100644 --- a/query-links.sql +++ b/query-links.sql @@ -6,24 +6,28 @@ WITH users AS ( userid, tweetname as name, COUNT(*) as tweetcount, - ROUND(RANDOM()) as isspam - FROM tweet JOIN twitteruser + isspam AS isspam + --ROUND(RANDOM()) as isspam + FROM tweet t + JOIN twitteruser u USING (userid) - GROUP BY userid,tweetname + GROUP BY t.userid, u.userid ORDER BY tweetCount DESC --100k is effectively everything... LIMIT 200000 --OFFSET 1000 ) SELECT - userid AS source, - replyid AS target, + s.userid AS source, + r.userid AS target, COUNT(*) as value FROM tweet s +JOIN tweet r +ON s.replytweetid = r.tweetid WHERE - replyid <> 0 AND - userid <> replyid AND - (userid IN (SELECT userid FROM users) AND - replyid IN (SELECT userid FROM users)) + s.replytweetid <> 0 AND + s.userid <> r.userid AND + (s.userid IN (SELECT userid FROM users) AND + r.userid IN (SELECT userid FROM users)) GROUP BY source, target ORDER BY value DESC |