From d7964c9cc38ca171c31b945ef5d075ca7889fd21 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Wed, 21 May 2014 14:15:47 +0200 Subject: Option to ignore users with fewer than X tweets --- bubble.js | 5 ++++- preprocess.js | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/bubble.js b/bubble.js index 773e481..81435e2 100644 --- a/bubble.js +++ b/bubble.js @@ -7,6 +7,8 @@ var I_GOT_MONEY = 0; // whether to ignore lonely users var KILL_LONERS = true; +// if positive, then users with fewer than this number of tweets will be ignored +var USER_MIN_TWEET_COUNT = 0; // if true, enable sticky nodes unless Ctrl is held. If false, stick only if // ctrl is held (the inverse). var STICKY_DEFAULT = true; @@ -138,7 +140,8 @@ function processData(data) { var infoPane = d3.select('#infobox'); preprocess(data, { - kill_loners: KILL_LONERS + kill_loners: KILL_LONERS, + minTweetCount: USER_MIN_TWEET_COUNT }); infoPane.select('.node-count').text(data.nodes.length); infoPane.select('.edge-count').text(data.edges.length); diff --git a/preprocess.js b/preprocess.js index 0bd13e5..7de2717 100644 --- a/preprocess.js +++ b/preprocess.js @@ -6,14 +6,14 @@ 'use strict'; /* find user nodes and remove invalid edges */ -function filterEdges(data) { +function filterEdges(data, ratelimit_max) { // map userID to nodes var users = {}; data.nodes.forEach(function (user, i) { users[user.group] = user; }); - var ratelimit_count = 0, ratelimit_max = 10; + var ratelimit_count = 0; function ratelimit() { return ++ratelimit_count <= ratelimit_max; } @@ -37,7 +37,7 @@ function filterEdges(data) { } return !invalid; }); - if (ratelimit_count > ratelimit_max) { + if (ratelimit_max > 0 && ratelimit_count > ratelimit_max) { console.log('Supressed', ratelimit_count, 'messages'); } } @@ -45,9 +45,19 @@ function filterEdges(data) { function preprocess(data, options) { console.log('Initial nodes count:', data.nodes.length); console.log('Initial edges count:', data.edges.length); - filterEdges(data); + filterEdges(data, 10); console.log('Valid edges count:', data.edges.length); + if (options.minTweetCount > 0) { + /* filter away users with almost no tweets */ + data.nodes = data.nodes.filter(function (node) { + return node.tweetCount >= options.minTweetCount; + }); + console.log('Nodes count (ignoring users with fewer than', + options.minTweetCount, 'tweets):', data.nodes.length); + filterEdges(data, 0); + } + // find all related users by userID var hasRelations = {}; data.edges.forEach(function (link) { -- cgit v1.2.1