diff options
author | Peter Wu <peter@lekensteyn.nl> | 2014-04-23 12:22:20 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2014-04-23 12:22:20 +0200 |
commit | 14d7547cd31c5be878e377a4a5370f604c8d59d4 (patch) | |
tree | 003840f1a21d39b07d45cd3112c38b6eed40e3ab /src/Chapter4/util | |
download | TwitterDataAnalytics-14d7547cd31c5be878e377a4a5370f604c8d59d4.tar.gz |
Initial commit
build.xml, etc. are modified a bit after opening in Netbeans 7.4.
Diffstat (limited to 'src/Chapter4/util')
-rw-r--r-- | src/Chapter4/util/BetweennessScorer.java | 25 | ||||
-rw-r--r-- | src/Chapter4/util/EigenVectorScorer.java | 64 | ||||
-rw-r--r-- | src/Chapter4/util/InDegreeScorer.java | 30 | ||||
-rw-r--r-- | src/Chapter4/util/TweetFileProcessor.java | 76 | ||||
-rw-r--r-- | src/Chapter4/util/TweetFileToGraph.java | 77 |
5 files changed, 272 insertions, 0 deletions
diff --git a/src/Chapter4/util/BetweennessScorer.java b/src/Chapter4/util/BetweennessScorer.java new file mode 100644 index 0000000..0926d34 --- /dev/null +++ b/src/Chapter4/util/BetweennessScorer.java @@ -0,0 +1,25 @@ +package util; + +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.algorithms.scoring.VertexScorer; +import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath; +import edu.uci.ics.jung.graph.Graph; +import edu.uci.ics.jung.graph.Hypergraph; + +public class BetweennessScorer implements VertexScorer<UserNode, Double>{ + + public BetweennessScorer(Hypergraph<UserNode, RetweetEdge> graph){ + /* + * Step 1: Calculate the shortest path between each pair of nodes. + */ + DijkstraShortestPath<UserNode, RetweetEdge> paths = new DijkstraShortestPath<UserNode, RetweetEdge>((Graph<UserNode, RetweetEdge>) graph); +// paths.getDistance(source, target); + } + + public Double getVertexScore(UserNode arg0) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/Chapter4/util/EigenVectorScorer.java b/src/Chapter4/util/EigenVectorScorer.java new file mode 100644 index 0000000..da0c1a8 --- /dev/null +++ b/src/Chapter4/util/EigenVectorScorer.java @@ -0,0 +1,64 @@ +package Chapter4.util; + +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import cern.colt.matrix.DoubleMatrix2D; +import cern.colt.matrix.impl.SparseDoubleMatrix2D; +import cern.colt.matrix.linalg.EigenvalueDecomposition; +import edu.uci.ics.jung.algorithms.scoring.VertexScorer; +import edu.uci.ics.jung.graph.Hypergraph; + +/** + * This is a Jung Node Scorer that computes the Eigenvector Centrality for each node. + */ +public class EigenVectorScorer implements VertexScorer<UserNode, Double> { + + private UserNode[] users; + private DoubleMatrix2D eigenVectors; + private int dominantEigenvectorIdx; + + public EigenVectorScorer(Hypergraph<UserNode, RetweetEdge> graph){ + users = new UserNode[graph.getVertexCount()]; + graph.getVertices().toArray(users); + + /* Step 1: Create the adjacency matrix. + * + * An adjacency matrix is a matrix with N users and N columns, + * where N is the number of nodes in the network. + * An entry in the matrix is 1 when node i connects to node j, + * and 0 otherwise. + */ + SparseDoubleMatrix2D matrix = new SparseDoubleMatrix2D(users.length, users.length); + for(int i = 0; i < users.length; i++){ + for(int j = 0; j < users.length; j++){ + matrix.setQuick(i, j, graph.containsEdge(new RetweetEdge(users[i], users[j])) ? 1 : 0); + } + } + + /* Step 2: Find the principle eigenvector. + * For more information on eigen-decomposition please see + * http://mathworld.wolfram.com/EigenDecomposition.html + */ + EigenvalueDecomposition eig = new EigenvalueDecomposition(matrix); + DoubleMatrix2D eigenVals = eig.getD(); + eigenVectors = eig.getV(); + + dominantEigenvectorIdx = 0; + for(int i = 1; i < eigenVals.columns(); i++){ + if(eigenVals.getQuick(dominantEigenvectorIdx, dominantEigenvectorIdx) < + eigenVals.getQuick(i, i)){ + dominantEigenvectorIdx = i; + } + } + } + + public Double getVertexScore(UserNode arg0) { + for(int i = 0; i < users.length; i++){ + if(users[i].equals(arg0)){ + return Math.abs(eigenVectors.getQuick(i, dominantEigenvectorIdx)); + } + } + return null; + } + +} diff --git a/src/Chapter4/util/InDegreeScorer.java b/src/Chapter4/util/InDegreeScorer.java new file mode 100644 index 0000000..014adc6 --- /dev/null +++ b/src/Chapter4/util/InDegreeScorer.java @@ -0,0 +1,30 @@ +package Chapter4.util; + +import edu.uci.ics.jung.algorithms.scoring.VertexScorer; +import edu.uci.ics.jung.graph.Hypergraph; + +/** + * This is a Jung Node Scorer that computes the + * In-Degree Centrality of nodes. + */ +public class InDegreeScorer<T> implements VertexScorer<T, Double>{ + + //The graph representation in JUNG. + private Hypergraph<T, ?> graph; + + /** + * Initialize the graph scorer. + * @param graph + * The graph we wish to score. + */ + public InDegreeScorer(Hypergraph<T, ?> graph){ + this.graph = graph; + } + + /** + * @return The In-Degree Centrality of the vertex. + */ + public Double getVertexScore(T node) { + return (double) graph.getInEdges(node).size(); + } +}
\ No newline at end of file diff --git a/src/Chapter4/util/TweetFileProcessor.java b/src/Chapter4/util/TweetFileProcessor.java new file mode 100644 index 0000000..9b6b99c --- /dev/null +++ b/src/Chapter4/util/TweetFileProcessor.java @@ -0,0 +1,76 @@ +package Chapter4.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.util.Iterator; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONException; +import org.json.JSONObject; + +public class TweetFileProcessor implements Iterator<JSONObject>{ + + protected BufferedReader fileBuffer; + protected boolean endOfFile; + protected String nextLine; + + public TweetFileProcessor(File f){ + + endOfFile = false; + + InputStreamReader isr; + BufferedReader br = null; + try { + isr = new InputStreamReader(new FileInputStream(f), "UTF-8"); + br = new BufferedReader(isr); + nextLine = br.readLine(); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + endOfFile = true; + } catch (FileNotFoundException e) { + e.printStackTrace(); + endOfFile = true; + } catch (IOException e) { + e.printStackTrace(); + endOfFile = true; + } + finally{ + fileBuffer = br; + } + } + + @Override + public boolean hasNext() { + return !endOfFile; + } + + @Override + public JSONObject next() { + JSONObject obj = null; + try { + obj = new JSONObject(nextLine); + } catch (JSONException ex) { + Logger.getLogger(TweetFileProcessor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + nextLine = fileBuffer.readLine(); + if(nextLine == null){ + endOfFile = true; + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return obj; + } + + @Override + public void remove() throws UnsupportedOperationException{ + throw new UnsupportedOperationException(); + } +} diff --git a/src/Chapter4/util/TweetFileToGraph.java b/src/Chapter4/util/TweetFileToGraph.java new file mode 100644 index 0000000..6cf2e3a --- /dev/null +++ b/src/Chapter4/util/TweetFileToGraph.java @@ -0,0 +1,77 @@ +package Chapter4.util; + +import java.io.File; + +import GraphElements.RetweetEdge; +import GraphElements.UserNode; + +import edu.uci.ics.jung.graph.DirectedGraph; +import edu.uci.ics.jung.graph.DirectedSparseGraph; +import edu.uci.ics.jung.graph.util.EdgeType; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONException; +import org.json.JSONObject; + +/** + * Some basic functionality to convert files collected + * in Chapter 2 to JUNG graphs. + */ +public class TweetFileToGraph { + + public static DirectedGraph<UserNode, RetweetEdge> getRetweetNetwork(File tweetFile){ + + JSONObject tmp; + + TweetFileProcessor tfp = new TweetFileProcessor(tweetFile); + DirectedSparseGraph<UserNode, RetweetEdge> dsg = new DirectedSparseGraph<UserNode, RetweetEdge>(); + + while (tfp.hasNext()){ + tmp = tfp.next(); + if(tmp==null) + { + continue; + } + //get the author + String user=null; + try { + user = tmp.getJSONObject("user").getString("screen_name"); + } catch (JSONException ex) { + Logger.getLogger(TweetFileToGraph.class.getName()).log(Level.SEVERE, null, ex); + } + if(user==null) + { + continue; + } + //get the retweeted user + try{ + JSONObject retweet = tmp.getJSONObject("retweeted_status"); + String retweeted_user = retweet.getJSONObject("user").getString("screen_name"); + + //make an edge or increment the weight if it exists. + UserNode toUser = new UserNode(retweeted_user); + UserNode fromUser = new UserNode(user); + + dsg.addVertex(toUser); + dsg.addVertex(fromUser); + + RetweetEdge edge = new RetweetEdge(toUser, fromUser); + + if(dsg.containsEdge(edge)){ + dsg.findEdge(fromUser, toUser).incrementRTCount(); + } + else{ + dsg.addEdge(edge, fromUser, toUser); + } + dsg.addEdge(edge, fromUser, toUser, EdgeType.DIRECTED); + } + catch(JSONException ex){ + //the tweet is not a retweet. this is not a problem. + } + + + } + + return dsg; + } +} |