From bb111a9be2c80c1a3feab3770c2c38a5acfe2317 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Mon, 2 Jun 2014 18:50:24 +0200 Subject: Update for new scheme --- query-links.sql | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'query-links.sql') diff --git a/query-links.sql b/query-links.sql index e455941..746a0fc 100644 --- a/query-links.sql +++ b/query-links.sql @@ -6,24 +6,28 @@ WITH users AS ( userid, tweetname as name, COUNT(*) as tweetcount, - ROUND(RANDOM()) as isspam - FROM tweet JOIN twitteruser + isspam AS isspam + --ROUND(RANDOM()) as isspam + FROM tweet t + JOIN twitteruser u USING (userid) - GROUP BY userid,tweetname + GROUP BY t.userid, u.userid ORDER BY tweetCount DESC --100k is effectively everything... LIMIT 200000 --OFFSET 1000 ) SELECT - userid AS source, - replyid AS target, + s.userid AS source, + r.userid AS target, COUNT(*) as value FROM tweet s +JOIN tweet r +ON s.replytweetid = r.tweetid WHERE - replyid <> 0 AND - userid <> replyid AND - (userid IN (SELECT userid FROM users) AND - replyid IN (SELECT userid FROM users)) + s.replytweetid <> 0 AND + s.userid <> r.userid AND + (s.userid IN (SELECT userid FROM users) AND + r.userid IN (SELECT userid FROM users)) GROUP BY source, target ORDER BY value DESC -- cgit v1.2.1