summaryrefslogtreecommitdiff
path: root/preprocess.js
diff options
context:
space:
mode:
Diffstat (limited to 'preprocess.js')
-rw-r--r--preprocess.js18
1 files changed, 14 insertions, 4 deletions
diff --git a/preprocess.js b/preprocess.js
index 0bd13e5..7de2717 100644
--- a/preprocess.js
+++ b/preprocess.js
@@ -6,14 +6,14 @@
'use strict';
/* find user nodes and remove invalid edges */
-function filterEdges(data) {
+function filterEdges(data, ratelimit_max) {
// map userID to nodes
var users = {};
data.nodes.forEach(function (user, i) {
users[user.group] = user;
});
- var ratelimit_count = 0, ratelimit_max = 10;
+ var ratelimit_count = 0;
function ratelimit() {
return ++ratelimit_count <= ratelimit_max;
}
@@ -37,7 +37,7 @@ function filterEdges(data) {
}
return !invalid;
});
- if (ratelimit_count > ratelimit_max) {
+ if (ratelimit_max > 0 && ratelimit_count > ratelimit_max) {
console.log('Supressed', ratelimit_count, 'messages');
}
}
@@ -45,9 +45,19 @@ function filterEdges(data) {
function preprocess(data, options) {
console.log('Initial nodes count:', data.nodes.length);
console.log('Initial edges count:', data.edges.length);
- filterEdges(data);
+ filterEdges(data, 10);
console.log('Valid edges count:', data.edges.length);
+ if (options.minTweetCount > 0) {
+ /* filter away users with almost no tweets */
+ data.nodes = data.nodes.filter(function (node) {
+ return node.tweetCount >= options.minTweetCount;
+ });
+ console.log('Nodes count (ignoring users with fewer than',
+ options.minTweetCount, 'tweets):', data.nodes.length);
+ filterEdges(data, 0);
+ }
+
// find all related users by userID
var hasRelations = {};
data.edges.forEach(function (link) {