summaryrefslogtreecommitdiff
path: root/preprocess.js
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-05-21 15:13:16 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-05-21 15:13:16 +0200
commit0334aa5e0051b59ce4050fd306b26119466e2991 (patch)
tree656de6f8ab686b3340abdd4dd86d196048d47ff3 /preprocess.js
parent0d2a9c9fd6c16ced9c684354690cc62dfd3d58f0 (diff)
downloadd3viz-0334aa5e0051b59ce4050fd306b26119466e2991.tar.gz
Move scripts to subdir
Our scripts end up in js/, third-party libraries end up in lib/.
Diffstat (limited to 'preprocess.js')
-rw-r--r--preprocess.js98
1 files changed, 0 insertions, 98 deletions
diff --git a/preprocess.js b/preprocess.js
deleted file mode 100644
index d65c7db..0000000
--- a/preprocess.js
+++ /dev/null
@@ -1,98 +0,0 @@
-/* "Optimizes" nodes and edges by dropping uninteresting ones. (for example,
- * nodes with no edges).
- */
-/* jshint devel:true */
-
-'use strict';
-
-/* find user nodes and remove invalid edges */
-function filterEdges(data, ratelimit_max) {
- // map userID to nodes
- var users = {};
- //data.nodes.length = 10000;
- data.nodes.forEach(function (user, i) {
- users[user.group] = user;
- });
-
- var ratelimit_count = 0;
- function ratelimit() {
- return ++ratelimit_count <= ratelimit_max;
- }
- // filter away invalid edges
- data.edges = data.edges.filter(function (link, i) {
- var invalid = false;
- if (!(link.source in users)) {
- if (ratelimit()) console.warn('Dropping invalid source user',
- link.source, 'at line', (i + 1), link);
- invalid = true;
- }
- if (!(link.target in users)) {
- if (ratelimit()) console.warn('Dropping invalid target user',
- link.target, 'at line', (i + 1), link);
- invalid = true;
- }
- if (link.source === link.target) {
- if (ratelimit()) console.warn('Dropping self-referencing user',
- link.target, 'at line', (i + 1), link);
- invalid = true;
- }
- return !invalid;
- });
- if (ratelimit_max > 0 && ratelimit_count > ratelimit_max) {
- console.log('Supressed', ratelimit_count, 'messages');
- }
-}
-
-function preprocess(data, options) {
- console.log('Initial nodes count:', data.nodes.length);
- console.log('Initial edges count:', data.edges.length);
- filterEdges(data, 10);
- console.log('Valid edges count:', data.edges.length);
-
- if (options.minTweetCount > 0) {
- /* filter away users with almost no tweets */
- data.nodes = data.nodes.filter(function (node) {
- return node.tweetCount >= options.minTweetCount;
- });
- console.log('Nodes count (ignoring users with fewer than',
- options.minTweetCount, 'tweets):', data.nodes.length);
- filterEdges(data, 0);
- }
-
- // find all related users by userID
- var hasRelations = {};
- data.edges.forEach(function (link) {
- hasRelations[link.target] = 1;
- hasRelations[link.source] = 1;
- });
-
- if (options.kill_loners) {
- var hasRelated = {};
- data.nodes = data.nodes.filter(function (d) {
- /* Uncomment if it seems to be useful.
- if (!(d.group in hasRelations) && d.tweetCount >= 100) {
- console.log('Keeping lonely user with', d.tweetCount, 'tweets');
- return true;
- }
- */
- return d.group in hasRelations;
- });
- console.log('Nodes count (after dropping loners):', data.nodes.length);
- }
-
- // prepare data for force layout: map user IDs to indices
- var userIds_indices = {};
- data.nodes.forEach(function (user, i) {
- userIds_indices[user.group] = i;
- });
- console.log('UserID to index map:', userIds_indices);
-
- // change userID of relation edges to indices
- data.edges.map(function (link) {
- link.source = userIds_indices[link.source];
- link.target = userIds_indices[link.target];
- // for faster lookup, store neighboring nodes per node
- data.nodes[link.source].relatedTo.push(link.target);
- data.nodes[link.target].relatedFrom.push(link.source);
- });
-}