summaryrefslogtreecommitdiff
path: root/src/Chapter4/util
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-04-23 12:22:20 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-04-23 12:22:20 +0200
commit14d7547cd31c5be878e377a4a5370f604c8d59d4 (patch)
tree003840f1a21d39b07d45cd3112c38b6eed40e3ab /src/Chapter4/util
downloadTwitterDataAnalytics-14d7547cd31c5be878e377a4a5370f604c8d59d4.tar.gz
Initial commit
build.xml, etc. are modified a bit after opening in Netbeans 7.4.
Diffstat (limited to 'src/Chapter4/util')
-rw-r--r--src/Chapter4/util/BetweennessScorer.java25
-rw-r--r--src/Chapter4/util/EigenVectorScorer.java64
-rw-r--r--src/Chapter4/util/InDegreeScorer.java30
-rw-r--r--src/Chapter4/util/TweetFileProcessor.java76
-rw-r--r--src/Chapter4/util/TweetFileToGraph.java77
5 files changed, 272 insertions, 0 deletions
diff --git a/src/Chapter4/util/BetweennessScorer.java b/src/Chapter4/util/BetweennessScorer.java
new file mode 100644
index 0000000..0926d34
--- /dev/null
+++ b/src/Chapter4/util/BetweennessScorer.java
@@ -0,0 +1,25 @@
+package util;
+
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
+import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath;
+import edu.uci.ics.jung.graph.Graph;
+import edu.uci.ics.jung.graph.Hypergraph;
+
+public class BetweennessScorer implements VertexScorer<UserNode, Double>{
+
+ public BetweennessScorer(Hypergraph<UserNode, RetweetEdge> graph){
+ /*
+ * Step 1: Calculate the shortest path between each pair of nodes.
+ */
+ DijkstraShortestPath<UserNode, RetweetEdge> paths = new DijkstraShortestPath<UserNode, RetweetEdge>((Graph<UserNode, RetweetEdge>) graph);
+// paths.getDistance(source, target);
+ }
+
+ public Double getVertexScore(UserNode arg0) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/src/Chapter4/util/EigenVectorScorer.java b/src/Chapter4/util/EigenVectorScorer.java
new file mode 100644
index 0000000..da0c1a8
--- /dev/null
+++ b/src/Chapter4/util/EigenVectorScorer.java
@@ -0,0 +1,64 @@
+package Chapter4.util;
+
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import cern.colt.matrix.DoubleMatrix2D;
+import cern.colt.matrix.impl.SparseDoubleMatrix2D;
+import cern.colt.matrix.linalg.EigenvalueDecomposition;
+import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
+import edu.uci.ics.jung.graph.Hypergraph;
+
+/**
+ * This is a Jung Node Scorer that computes the Eigenvector Centrality for each node.
+ */
+public class EigenVectorScorer implements VertexScorer<UserNode, Double> {
+
+ private UserNode[] users;
+ private DoubleMatrix2D eigenVectors;
+ private int dominantEigenvectorIdx;
+
+ public EigenVectorScorer(Hypergraph<UserNode, RetweetEdge> graph){
+ users = new UserNode[graph.getVertexCount()];
+ graph.getVertices().toArray(users);
+
+ /* Step 1: Create the adjacency matrix.
+ *
+ * An adjacency matrix is a matrix with N users and N columns,
+ * where N is the number of nodes in the network.
+ * An entry in the matrix is 1 when node i connects to node j,
+ * and 0 otherwise.
+ */
+ SparseDoubleMatrix2D matrix = new SparseDoubleMatrix2D(users.length, users.length);
+ for(int i = 0; i < users.length; i++){
+ for(int j = 0; j < users.length; j++){
+ matrix.setQuick(i, j, graph.containsEdge(new RetweetEdge(users[i], users[j])) ? 1 : 0);
+ }
+ }
+
+ /* Step 2: Find the principle eigenvector.
+ * For more information on eigen-decomposition please see
+ * http://mathworld.wolfram.com/EigenDecomposition.html
+ */
+ EigenvalueDecomposition eig = new EigenvalueDecomposition(matrix);
+ DoubleMatrix2D eigenVals = eig.getD();
+ eigenVectors = eig.getV();
+
+ dominantEigenvectorIdx = 0;
+ for(int i = 1; i < eigenVals.columns(); i++){
+ if(eigenVals.getQuick(dominantEigenvectorIdx, dominantEigenvectorIdx) <
+ eigenVals.getQuick(i, i)){
+ dominantEigenvectorIdx = i;
+ }
+ }
+ }
+
+ public Double getVertexScore(UserNode arg0) {
+ for(int i = 0; i < users.length; i++){
+ if(users[i].equals(arg0)){
+ return Math.abs(eigenVectors.getQuick(i, dominantEigenvectorIdx));
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/src/Chapter4/util/InDegreeScorer.java b/src/Chapter4/util/InDegreeScorer.java
new file mode 100644
index 0000000..014adc6
--- /dev/null
+++ b/src/Chapter4/util/InDegreeScorer.java
@@ -0,0 +1,30 @@
+package Chapter4.util;
+
+import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
+import edu.uci.ics.jung.graph.Hypergraph;
+
+/**
+ * This is a Jung Node Scorer that computes the
+ * In-Degree Centrality of nodes.
+ */
+public class InDegreeScorer<T> implements VertexScorer<T, Double>{
+
+ //The graph representation in JUNG.
+ private Hypergraph<T, ?> graph;
+
+ /**
+ * Initialize the graph scorer.
+ * @param graph
+ * The graph we wish to score.
+ */
+ public InDegreeScorer(Hypergraph<T, ?> graph){
+ this.graph = graph;
+ }
+
+ /**
+ * @return The In-Degree Centrality of the vertex.
+ */
+ public Double getVertexScore(T node) {
+ return (double) graph.getInEdges(node).size();
+ }
+} \ No newline at end of file
diff --git a/src/Chapter4/util/TweetFileProcessor.java b/src/Chapter4/util/TweetFileProcessor.java
new file mode 100644
index 0000000..9b6b99c
--- /dev/null
+++ b/src/Chapter4/util/TweetFileProcessor.java
@@ -0,0 +1,76 @@
+package Chapter4.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.Iterator;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class TweetFileProcessor implements Iterator<JSONObject>{
+
+ protected BufferedReader fileBuffer;
+ protected boolean endOfFile;
+ protected String nextLine;
+
+ public TweetFileProcessor(File f){
+
+ endOfFile = false;
+
+ InputStreamReader isr;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(new FileInputStream(f), "UTF-8");
+ br = new BufferedReader(isr);
+ nextLine = br.readLine();
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ endOfFile = true;
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ endOfFile = true;
+ } catch (IOException e) {
+ e.printStackTrace();
+ endOfFile = true;
+ }
+ finally{
+ fileBuffer = br;
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !endOfFile;
+ }
+
+ @Override
+ public JSONObject next() {
+ JSONObject obj = null;
+ try {
+ obj = new JSONObject(nextLine);
+ } catch (JSONException ex) {
+ Logger.getLogger(TweetFileProcessor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ nextLine = fileBuffer.readLine();
+ if(nextLine == null){
+ endOfFile = true;
+ }
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return obj;
+ }
+
+ @Override
+ public void remove() throws UnsupportedOperationException{
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/src/Chapter4/util/TweetFileToGraph.java b/src/Chapter4/util/TweetFileToGraph.java
new file mode 100644
index 0000000..6cf2e3a
--- /dev/null
+++ b/src/Chapter4/util/TweetFileToGraph.java
@@ -0,0 +1,77 @@
+package Chapter4.util;
+
+import java.io.File;
+
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+
+import edu.uci.ics.jung.graph.DirectedGraph;
+import edu.uci.ics.jung.graph.DirectedSparseGraph;
+import edu.uci.ics.jung.graph.util.EdgeType;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+/**
+ * Some basic functionality to convert files collected
+ * in Chapter 2 to JUNG graphs.
+ */
+public class TweetFileToGraph {
+
+ public static DirectedGraph<UserNode, RetweetEdge> getRetweetNetwork(File tweetFile){
+
+ JSONObject tmp;
+
+ TweetFileProcessor tfp = new TweetFileProcessor(tweetFile);
+ DirectedSparseGraph<UserNode, RetweetEdge> dsg = new DirectedSparseGraph<UserNode, RetweetEdge>();
+
+ while (tfp.hasNext()){
+ tmp = tfp.next();
+ if(tmp==null)
+ {
+ continue;
+ }
+ //get the author
+ String user=null;
+ try {
+ user = tmp.getJSONObject("user").getString("screen_name");
+ } catch (JSONException ex) {
+ Logger.getLogger(TweetFileToGraph.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ if(user==null)
+ {
+ continue;
+ }
+ //get the retweeted user
+ try{
+ JSONObject retweet = tmp.getJSONObject("retweeted_status");
+ String retweeted_user = retweet.getJSONObject("user").getString("screen_name");
+
+ //make an edge or increment the weight if it exists.
+ UserNode toUser = new UserNode(retweeted_user);
+ UserNode fromUser = new UserNode(user);
+
+ dsg.addVertex(toUser);
+ dsg.addVertex(fromUser);
+
+ RetweetEdge edge = new RetweetEdge(toUser, fromUser);
+
+ if(dsg.containsEdge(edge)){
+ dsg.findEdge(fromUser, toUser).incrementRTCount();
+ }
+ else{
+ dsg.addEdge(edge, fromUser, toUser);
+ }
+ dsg.addEdge(edge, fromUser, toUser, EdgeType.DIRECTED);
+ }
+ catch(JSONException ex){
+ //the tweet is not a retweet. this is not a problem.
+ }
+
+
+ }
+
+ return dsg;
+ }
+}