diff options
Diffstat (limited to 'preprocess.js')
-rw-r--r-- | preprocess.js | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/preprocess.js b/preprocess.js index 0bd13e5..7de2717 100644 --- a/preprocess.js +++ b/preprocess.js @@ -6,14 +6,14 @@ 'use strict'; /* find user nodes and remove invalid edges */ -function filterEdges(data) { +function filterEdges(data, ratelimit_max) { // map userID to nodes var users = {}; data.nodes.forEach(function (user, i) { users[user.group] = user; }); - var ratelimit_count = 0, ratelimit_max = 10; + var ratelimit_count = 0; function ratelimit() { return ++ratelimit_count <= ratelimit_max; } @@ -37,7 +37,7 @@ function filterEdges(data) { } return !invalid; }); - if (ratelimit_count > ratelimit_max) { + if (ratelimit_max > 0 && ratelimit_count > ratelimit_max) { console.log('Supressed', ratelimit_count, 'messages'); } } @@ -45,9 +45,19 @@ function filterEdges(data) { function preprocess(data, options) { console.log('Initial nodes count:', data.nodes.length); console.log('Initial edges count:', data.edges.length); - filterEdges(data); + filterEdges(data, 10); console.log('Valid edges count:', data.edges.length); + if (options.minTweetCount > 0) { + /* filter away users with almost no tweets */ + data.nodes = data.nodes.filter(function (node) { + return node.tweetCount >= options.minTweetCount; + }); + console.log('Nodes count (ignoring users with fewer than', + options.minTweetCount, 'tweets):', data.nodes.length); + filterEdges(data, 0); + } + // find all related users by userID var hasRelations = {}; data.edges.forEach(function (link) { |