blob: 746a0fc14da59f6ad511250b94633288fbefe934 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
/*
../run-psql csv "$(awk '/^)/{exit}p;/user''s/{p=1}' query-links.sql)"
*/
WITH users AS (
SELECT
userid,
tweetname as name,
COUNT(*) as tweetcount,
isspam AS isspam
--ROUND(RANDOM()) as isspam
FROM tweet t
JOIN twitteruser u
USING (userid)
GROUP BY t.userid, u.userid
ORDER BY tweetCount DESC
--100k is effectively everything...
LIMIT 200000
--OFFSET 1000
)
SELECT
s.userid AS source,
r.userid AS target,
COUNT(*) as value
FROM tweet s
JOIN tweet r
ON s.replytweetid = r.tweetid
WHERE
s.replytweetid <> 0 AND
s.userid <> r.userid AND
(s.userid IN (SELECT userid FROM users) AND
r.userid IN (SELECT userid FROM users))
GROUP BY source, target
ORDER BY value DESC
|