summaryrefslogtreecommitdiff
path: root/src/Chapter5
diff options
context:
space:
mode:
Diffstat (limited to 'src/Chapter5')
-rw-r--r--src/Chapter5/network/CreateD3Network.java716
-rw-r--r--src/Chapter5/network/ExtractUserTagNetwork.java173
-rw-r--r--src/Chapter5/support/DateInfo.java30
-rw-r--r--src/Chapter5/support/HashTagDS.java18
-rw-r--r--src/Chapter5/support/NetworkNode.java49
-rw-r--r--src/Chapter5/support/NodeIDComparator.java32
-rw-r--r--src/Chapter5/support/NodeSizeComparator.java29
-rw-r--r--src/Chapter5/support/ToNodeInfo.java23
-rw-r--r--src/Chapter5/support/Tweet.java21
-rw-r--r--src/Chapter5/text/EventSummaryExtractor.java269
-rw-r--r--src/Chapter5/text/ExtractTopKeywords.java151
-rw-r--r--src/Chapter5/trends/ControlChartExample.java144
-rw-r--r--src/Chapter5/trends/DateInfo.java29
-rw-r--r--src/Chapter5/trends/ExtractDatasetTrend.java120
-rw-r--r--src/Chapter5/trends/SparkLineExample.java163
-rw-r--r--src/Chapter5/trends/TCDateInfo.java31
-rw-r--r--src/Chapter5/trends/TrendComparisonExample.java155
17 files changed, 0 insertions, 2153 deletions
diff --git a/src/Chapter5/network/CreateD3Network.java b/src/Chapter5/network/CreateD3Network.java
deleted file mode 100644
index d4c25af..0000000
--- a/src/Chapter5/network/CreateD3Network.java
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.network;
-
-
-import Chapter5.support.HashTagDS;
-import Chapter5.support.NetworkNode;
-import Chapter5.support.NodeIDComparator;
-import Chapter5.support.NodeSizeComparator;
-import Chapter5.support.ToNodeInfo;
-import Chapter5.support.Tweet;
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-import utils.TextUtils;
-
-/**
- *
- * @author shamanth
- */
-public class CreateD3Network
-{
- static final String DEF_INFILENAME = "ows.json";
- private String RTPATTERN = "rt @[_a-zA-Z0-9]+";
- private final int DEFAULT_NODE_SIZE = 0;
-// private final int NODE_COUNT_LIMIT = 1;
- //private final String[] node_color_scheme = new String[]{"#FFFFD9","#EDF8B1","#C7E9B4","#7FCDBB","#41B6C4","#1D91C0","#225EA8","#253494","#081D58"};
- //private final String[] node_color_scheme = new String[]{"#A6BDDB","#74A9CF","#3690C0","#0570B0","#045A8D","#023858"};
-
- /**
- * Extracts the users who have been retweeted using the RTPATTERN
- * @param text
- * @return
- */
- public ArrayList<String> GetRTUsers(String text)
- {
- Pattern p = Pattern.compile(RTPATTERN, Pattern.CASE_INSENSITIVE);
- Matcher m = p.matcher(text);
- ArrayList<String> rtusers = new ArrayList<String>();
- while(m.find())
- {
- String nuser = text.substring(m.start(),m.end());
- nuser = nuser.replaceAll("rt @|RT @", "");
-// nuser = nuser.replaceAll("RT @", "");
- rtusers.add(nuser.toLowerCase());
- }
- return rtusers;
- }
-
- /**
- * Identifies the category to which the tweet belongs. Each category is defined by a group of words/hashtags
- * @param tweet
- * @param usercategories
- * @return
- */
- public int GetCategory(String tweet, HashTagDS[] usercategories)
- {
- HashMap<Integer,Integer> categoryvotes = new HashMap<Integer,Integer>();
- tweet = tweet.toLowerCase();
- int i=0;
- for(HashTagDS cat:usercategories)
- {
-
- for(String s :cat.tags)
- {
- if(tweet.indexOf(s)!=-1)
- {
- if(categoryvotes.containsKey(i))
- {
- categoryvotes.put(i, categoryvotes.get(i)+1);
- }
- else
- {
- categoryvotes.put(i, 1);
- }
- }
- }
- i++;
- }
- Set<Integer> keyset = categoryvotes.keySet();
- int maxvote = 0;
- //by default the tweet will be in the first category
- int maxcategoryindex = 0;
- for(int key:keyset)
- {
- if(categoryvotes.get(key)>maxvote)
- {
- maxvote = categoryvotes.get(key);
- maxcategoryindex = key;
- }
- }
- return maxcategoryindex;
- }
-
- /**
- * Converts the input jsonobject containing category descriptions to an array for processing.
- * @param hashtagcoll JSONObject containing the list of hashtags, color, and the topic information
- * @return An array of hashtags
- */
- public HashTagDS[] ConvertJSONArrayToArray(JSONObject hashtagcoll)
- {
- HashTagDS[] hashtags = new HashTagDS[hashtagcoll.length()];
- int j=0;
- try{
- if(hashtagcoll!=null)
- {
- Iterator keyit = hashtagcoll.keys();
- while(keyit.hasNext())
- {
- HashTagDS ht = new HashTagDS();
- JSONObject tags = (JSONObject) hashtagcoll.get((String)keyit.next());
- ht.groupname = keyit.toString();
- ht.color = tags.getString("color");
- JSONArray tagjson = tags.getJSONArray("hts");
- ht.tags = new String[tagjson.length()];
- for(int i=0;i<tagjson.length();i++)
- {
- ht.tags[i] = tagjson.getString(i);
- }
- hashtags[j++] = ht;
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- return hashtags;
- }
-
- /**
- * Identifies the category of a node based on the content of his tweets(each tweet can be assigned a category based on it's text). A simple majority is sufficient to make this decision.
- * @param tnfs
- * @param hashtagarray
- * @return
- */
- public int GetMajorityTopicColor(NetworkNode tnfs,HashTagDS[] hashtagarray)
- {
- HashMap<Integer,Integer> catcount = new HashMap<Integer,Integer>();
- //if the node has no tolinks then look at the node that it retweeted to decide the color of the node
- for(String tweet:tnfs.data)
- {
- int id = this.GetCategory(tweet, hashtagarray);
- if(catcount.containsKey(id))
- {
- catcount.put(id, catcount.get(id)+1);
- }
- else
- catcount.put(id, 1);
- }
- Set<Integer> keys = catcount.keySet();
- int maxcatID = -1;
- int maxcount = 0;
- for(int k:keys)
- {
- if(maxcatID==-1)
- {
- maxcatID = k;
- maxcount = catcount.get(k);
- }
- else
- {
- if(maxcount<catcount.get(k))
- {
- maxcount = catcount.get(k);
- maxcatID = k;
- }
- }
- }
- return maxcatID;
- }
-
- /**
- * Takes as input a JSON file and reads through the file sequentially to process and create a retweet network from the tweets.
- * @param inFilename
- * @param numNodeClasses
- * @param hashtags category info containing hashtags
- * @param num_nodes number of seed nodes to be included in the network
- * @return a JSONObject consisting of nodes and links of the network
- */
- public JSONObject ConvertTweetsToDiffusionPath(String inFilename,int numNodeClasses,
- JSONObject hashtags, int num_nodes)
- {
- HashMap<String,NetworkNode> userconnections = new HashMap<String,NetworkNode>();
-// HashMap<String,Integer> tweet_class_codes = new HashMap<String,Integer>();
-// int tweet_class_counter = 1;
- HashTagDS[] hashtagarray = ConvertJSONArrayToArray(hashtags);
- BufferedReader br = null;
- try{
- br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- JSONObject tweetobj;
- try {
- tweetobj = new JSONObject(temp);
- } catch (JSONException ex) {
- ex.printStackTrace();
- continue;
- }
- //Extract the tweet first
- Tweet t = new Tweet();
- String text="";
- try {
- text = TextUtils.GetCleanText(tweetobj.getString("text")).toLowerCase();
- } catch (JSONException ex) {
- ex.printStackTrace();
- continue;
- }
- //Check that the tweet matches at least one of the topics
- boolean groupmatch = false;
- for(HashTagDS ht:hashtagarray)
- {
- String[] tags = ht.tags;
- for(String tg:tags)
- {
- if(text.contains(tg))
- {
- groupmatch = true;
- break;
- }
- }
- if(groupmatch)
- {
- break;
- }
- }
- if(!groupmatch)
- {
- continue;
- }
- //
- ArrayList<String> fromusers = new ArrayList<String>();
- if(!tweetobj.isNull("retweeted_status"))
- {
- JSONObject rtstatus;
- try {
- rtstatus = tweetobj.getJSONObject("retweeted_status");
- if(rtstatus.isNull("user"))
- {
- JSONObject rtuserobj = rtstatus.getJSONObject("user");
- try{
- fromusers.add(rtuserobj.get("screen_name").toString());
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- else
- {
- //use the tweet text to retrieve the pattern "RT @username:"
- fromusers = GetRTUsers(text);
- }
- if(fromusers.isEmpty())
- {
- continue;
- }
-
- //identify the class values to be applied to all the nodes and
- //edges.
-// String prunedtext = TextUtils.RemoveTwitterElements(text);
-// Integer class_code = tweet_class_codes.get(prunedtext);
-// if(class_code==null)
-// {
-// class_code = tweet_class_counter;
-// tweet_class_codes.put(prunedtext, class_code); //set the unique id for this tweet
-// tweet_class_counter++;
-// }
- t.text = TextUtils.RemoveRTElements(text);
- if(!tweetobj.isNull("user"))
- {
- JSONObject userobj;
- try {
- userobj = tweetobj.getJSONObject("user");
- t.user = userobj.getString("screen_name").toLowerCase();
- } catch (JSONException ex) {
- Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
-// try {
-// t.pubdate = String.valueOf(tweetobj.get("timestamp"));
-// } catch (JSONException ex) {
-// Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
-// }
- t.catColor = hashtagarray[t.catID].color;
- //update the size of the from fromuser
- int cur_level = 0;
- for(int i=fromusers.size()-1;i>=0;i--)
- {
- String touser = "";
- if(i==0)
- {//if this is the last user in the retweet sequence then use the user of the tweet as the next link
- touser = t.user;
- }
- else
- { //if there are still fromuser in the retweet chain then use them as the next link
- touser = fromusers.get(i-1);
- }
- //don't add any selflinks
- if(fromusers.get(i).equals(touser))
- {
- continue;
- }
- NetworkNode fromuser = null;
- if(userconnections.containsKey(fromusers.get(i)))
- {
- //from node already exists simply add this new connection to it
- fromuser = userconnections.get(fromusers.get(i));
- }
- else
- {
- //the from user was not found. add the node
- fromuser = new NetworkNode();
- // fromuser.id = nodeid++;
- fromuser.username = fromusers.get(i);
- fromuser.tonodes = new ArrayList<ToNodeInfo>();
- fromuser.class_codes = new ArrayList<Integer>();
- fromuser.size = DEFAULT_NODE_SIZE;
- fromuser.level = cur_level;
- fromuser.data = new ArrayList<String>();
- fromuser.data.add(t.text);
- //fromuser.category = ;
- }
-// if(!fromuser.class_codes.contains(class_code))
-// {
-// //add the marker to from node if it does not have it already
-// fromuser.class_codes.add(class_code);
-// }
- //if to node is not in the list then create it
- NetworkNode tonode = null;
- if(!userconnections.containsKey(touser))
- {
- tonode = new NetworkNode();
- // System.out.println(touser+" "+nodeid);
- // tonode.id= nodeid++;
- tonode.username = touser;
- tonode.tonodes= new ArrayList<ToNodeInfo>();
- tonode.class_codes = new ArrayList<Integer>();
- tonode.catID = t.catID;
- tonode.catColor = t.catColor;
- tonode.size = DEFAULT_NODE_SIZE;
- tonode.data= new ArrayList<String>();
- tonode.data.add(t.text);
- tonode.level = cur_level+1;
- //add the classcode to the node if it doesn't already exist
-// if(!tonode.class_codes.contains(class_code))
-// {
-// tonode.class_codes.add(class_code);
-// }
- //add the touser info
- userconnections.put(touser, tonode);
- }
- else
- {
- tonode = userconnections.get(touser);
- tonode.data.add(t.text);
- if(tonode.level<cur_level+1)
- {
- tonode.level = cur_level;
- }
- //add the classcode to the node if it doesn't already exist
-// if(!tonode.class_codes.contains(class_code))
-// {
-// tonode.class_codes.add(class_code);
-// }
- }
- ToNodeInfo inf = new ToNodeInfo();
- inf.tonodeid = tonode.id;
- inf.text = t.text;
-// inf.date = t.pubdate;
-// inf.class_code = class_code;
- inf.tousername = touser;
- inf.catID = t.catID;
- inf.catColor = t.catColor;
- fromuser.tonodes.add(inf);
- //update from node size
- fromuser.size++;
- //add back updated fromuser
- userconnections.put(fromusers.get(i), fromuser);
- //update the level for next iteration
- cur_level++;
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }
- Set<String> keys = userconnections.keySet();
- ArrayList<NetworkNode> returnnodes = new ArrayList<NetworkNode>();
- //its +1 because nodes with size 0 are not going to be used to calculate the class
- int min = DEFAULT_NODE_SIZE+1;
- int max = DEFAULT_NODE_SIZE+1;
- for(String k:keys)
- {
- NetworkNode n = userconnections.get(k);
- int maxcat = GetMajorityTopicColor(n,hashtagarray);
- n.catID = maxcat;
- n.catColor = hashtagarray[maxcat].color;
- userconnections.put(k, n);
- //
-// if(n.size==0)
-// {//mark the node as a zero node
-// n.class_codes.add(-1);
-// }
-// else
-// {
- if(n.size>max)
- {
- max = n.size;
- }
- if(n.size<min)
- {
- min = n.size;
- }
-// }
- returnnodes.add(n);
- }
- //create node groups to assign unique colors to nodes in different Categories based upon the number of connections
- ArrayList<NetworkNode> nodes = ComputeGroupsSqrt(returnnodes, max, min, numNodeClasses);
- Collections.sort(nodes,Collections.reverseOrder(new NodeSizeComparator()));
- //select how many nodes to show.
- int nodes_to_visit = 0;
- if(nodes.size()>=num_nodes)
- {
- nodes_to_visit = num_nodes;
- }
- else
- {
- nodes_to_visit = nodes.size();
- }
-
- HashMap<String,NetworkNode> prunednodes = new HashMap<String,NetworkNode>();
- HashMap<String,Integer> nodeidlist = new HashMap<String,Integer>();
- int nodeid = 0; //node nodeid counter
- for(int k=0;k<nodes_to_visit;k++)
- {
- NetworkNode nd = nodes.get(k);
-// System.out.println("visiting node "+nd.username);
- nd.level = 0;
- HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections,nd,new HashMap<String,NetworkNode>());
- Set<String> names = rtnodes.keySet();
- for(String n:names)
- {
- if(!prunednodes.containsKey(n))
- {
- NetworkNode newnode = rtnodes.get(n);
- if(newnode.size>0)
- {
- prunednodes.put(n, newnode);
- nodeidlist.put(n, nodeid++);
- }
- }
- }
- }
-
- /** We now have all the nodes of the network. compute their ids sequentially
- * and assign them to the respective nodes. Simultaneously compact the nodes
- * of the network to remove all nodes which have not been retweeted and are
- * of size 0
- */
-
- Set<String> allnodes = prunednodes.keySet();
-// System.out.println(prunednodes.size());
- ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>();
-// HashMap<Integer,ArrayList<Integer>> conninfo = new HashMap<Integer,ArrayList<Integer>>();
- for(String n:allnodes)
- {
- NetworkNode nd = prunednodes.get(n);
- nd.id = nodeidlist.get(nd.username);
- ArrayList<Integer> connids = new ArrayList<Integer>();
-// ArrayList<ToNodeInfo> compact_To_nodes = new ArrayList<ToNodeInfo>();
- int counter = 0;
- for(ToNodeInfo tnf: nd.tonodes)
- {
- //user has never been retweeted. the chain terminates here, so remove it
- if(nodeidlist.containsKey(tnf.tousername))
- {
- tnf.tonodeid = nodeidlist.get(tnf.tousername);
- connids.add(tnf.tonodeid);
- nd.tonodes.set(counter, tnf);
- counter++;
- }
- }
- finalnodes.add(nd);
- //store the connections to compute the clusterids later
-// if(!conninfo.containsKey(nd.id))
-// {
-// conninfo.put(nd.id, connids);
-// }
- }
- //generate the clusterids
-// ArrayList<Integer>[] clusterids = (ArrayList<Integer>[])new ArrayList[allnodes.size()];
-// Set<Integer> idkeys = conninfo.keySet();
-// for(int id:idkeys)
-// {
-// for(int x:conninfo.get(id))
-// {
-// if(clusterids[x]==null)
-// {
-// ArrayList<Integer> toclusterid = new ArrayList<Integer>();
-// toclusterid.add(id);
-// clusterids[x] = toclusterid;
-// }
-// else
-// {
-// ArrayList<Integer> toclusterid = clusterids[x];
-// if(!toclusterid.contains(id))
-// {
-// toclusterid.add(id);
-// clusterids[x] = toclusterid;
-// }
-// }
-// }
-// }
- //now create the final node list with the clusterids
-// for(String n:allnodes)
-// {
-// NetworkNode nd = prunednodes.get(n);
-// ArrayList<Integer> cids = clusterids[nd.id];
-// if(cids!=null)
-// {
-// int size = cids.size();
-// nd.clusterID = new int[size+1];
-// int counter=0;
-// nd.clusterID[counter++] = nd.id;
-// for(int c:cids)
-// {
-// nd.clusterID[counter++] = c;
-// }
-// }
- //System.out.println(nd.class_codes.toString());
-// finalnodes.add(nd);
-// }
- Collections.sort(finalnodes,new NodeIDComparator());
- System.out.println(finalnodes.size());
- for(NetworkNode node:finalnodes)
- {
- System.out.println(node.id+" "+node.username+" "+node.level+" "+node.size+" "+node.catColor+node.data.get(0));
- }
- return GetD3Structure(finalnodes);
- }
-
- /**
- * Creates a D3 representation of the nodes, consisting of two JSONArray a set of nodes and a set of links between the nodes
- * @param finalnodes
- * @return
- */
- public JSONObject GetD3Structure(ArrayList<NetworkNode> finalnodes)
- {
- JSONObject alltweets = new JSONObject();
- try {
- JSONArray nodes = new JSONArray();
- JSONArray links = new JSONArray();
- for (NetworkNode node : finalnodes)
- {
- try {
- //create adjacencies
- JSONArray nodedata = new JSONArray();
- for (ToNodeInfo tnf : node.tonodes) {
- JSONObject jsadj = new JSONObject();
- jsadj.put("source", node.id);
- jsadj.put("target", tnf.tonodeid);
- //weight of the edge
- jsadj.put("value", 1);
- //class code is a unique id corresponding to the text
- jsadj.put("data", tnf.class_code);
- links.put(jsadj);
- //create a data object for the node
- JSONObject jsdata = new JSONObject();
- jsdata.put("tonodeid", tnf.tonodeid);
- jsdata.put("nodefrom", node.username);
- jsdata.put("nodeto", tnf.tousername);
- jsdata.put("tweet", tnf.text);
-// jsdata.put("pubtime", tnf.date);
- //class code for tweet to be used to filter
-// jsdata.put("classcode", tnf.class_code);
- nodedata.put(jsdata);
- }
- //add node
- JSONObject nd = new JSONObject();
- nd.put("name", node.username);
- nd.put("group", node.group);
- nd.put("id", node.id);
- nd.put("size", node.size);
- nd.put("catColor", node.catColor);
- nd.put("catID", node.catID);
- nd.put("data", nodedata);
- nd.put("level", node.level);
- //clusterids for the node
-// JSONArray cids = new JSONArray();
-// if (node.clusterID != null) {
-// for (int code : node.clusterID) {
-// cids.put(code);
-// }
-// } else {
-// cids.put(node.id);
-// }
-// nd.put("clusterids", cids);
- //classcodes for the node
-// JSONArray codes = new JSONArray();
-// for (int c : node.class_codes) {
-// codes.put(c);
-// }
-// nd.put("classcodes", codes);
- nodes.put(nd);
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
- alltweets.put("nodes", nodes);
- alltweets.put("links", links);
- } catch (JSONException ex) {
- Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
- }
- return alltweets;
- }
-
- /**
- * Recursively traverses the list of nodes to identify all nodes reachable from a starting node.
- * @param userconnections A map containing the usernames as keys and the node information as value
- * @param cur_node Node currently being processed.
- * @param newnodes A list of nodes which can be reached from the current node
- * @return A map of the usernames and the node information for all nodes reachable
- */
- public HashMap<String,NetworkNode> GetNextHopConnections(HashMap<String,NetworkNode> userconnections,NetworkNode cur_node,HashMap<String,NetworkNode> newnodes)
- {
- cur_node.level = cur_node.level+1;
- newnodes.put(cur_node.username,cur_node);
- for(int i=0;i<cur_node.tonodes.size();i++)
- {
- ToNodeInfo tnf = cur_node.tonodes.get(i);
- if(newnodes.containsKey(tnf.tousername))
- {
- continue;
- }
-
- HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections, userconnections.get(tnf.tousername),newnodes);
- newnodes = rtnodes;
- }
- return newnodes;
- }
-
- /**
- * Divides a list of nodes into groups using the square root binning
- * technique. If a node has size x and there are y groups in total. Then the
- * group of the node is computed as ceil((sqrt(x)/sqrt(max))*y), where max is
- * the size of the largest node.
- * @param nodes A list of nodes
- * @param max The maximum size of a node
- * @param min The minimum size of a node
- * @param noofclasses Number of classes into which the nodes must be classified
- * @return A list of nodes along with their class
- */
- public ArrayList<NetworkNode> ComputeGroupsSqrt(ArrayList<NetworkNode> nodes, int max, int min, int noofclasses)
- {
- ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>();
- for(int i=0;i<nodes.size();i++)
- {
- NetworkNode node = nodes.get(i);
- int color_index = 0;
- if(node.size>0)
- {
- color_index = (int) Math.ceil(((double)Math.sqrt(node.size)/Math.sqrt(max))*noofclasses)-1;
-// node.size = color_index*6;
- }
- node.group = color_index;
- finalnodes.add(node);
- }
- return finalnodes;
- }
-
-
- //DEBUG use only
- public static void main(String[] args)
- {
- try {
- CreateD3Network cdn = new CreateD3Network();
- JSONObject jobj = new JSONObject();
- JSONObject obj = new JSONObject();
- obj.put("color", "#800000");
- JSONArray ja = new JSONArray();
- ja.put("zuccotti");
- obj.put("hts", ja);
- jobj.put("Group 1", obj);
- obj = new JSONObject();
- obj.put("color", "#0FFF00");
- ja = new JSONArray();
- ja.put("#nypd");
- obj.put("hts", ja);
- jobj.put("Group 2", obj);
- String filename = "D:\\Twitter Data Analytics\\Data\\testows.json";
- JSONObject nodes = cdn.ConvertTweetsToDiffusionPath(filename,7, jobj,5);
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
-}
diff --git a/src/Chapter5/network/ExtractUserTagNetwork.java b/src/Chapter5/network/ExtractUserTagNetwork.java
deleted file mode 100644
index 43ae680..0000000
--- a/src/Chapter5/network/ExtractUserTagNetwork.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.network;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class ExtractUserTagNetwork
-{
-
- static final String DEF_INFILENAME = "ows.json";
-
- /**
- * Extracts a map of all the hashtags a user has used in his tweets resulting in a bipartite network. The frequency of each tag is also returned in the form of a map.
- * @param inFilename File containing a list of tweets as JSON objects
- * @return A map containing the users as keys and a map containing the hashtags they use along with their frequency.
- */
- public HashMap<String,HashMap<String,Integer>> ExtractUserHashtagNetwork(String inFilename)
- {
- HashMap<String,HashMap<String,Integer>> usertagmap = new HashMap<String,HashMap<String,Integer>>();
- BufferedReader br = null;
- try{
- br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try{
- JSONObject tweetobj = new JSONObject(temp);
- String text;
- String username;
- HashMap<String,Integer> tags = new HashMap<String,Integer>();
- if(!tweetobj.isNull("entities"))
- {
- JSONObject entities = tweetobj.getJSONObject("entities");
- JSONArray hashtags;
- try {
- hashtags = entities.getJSONArray("hashtags");
- for(int i=0;i<hashtags.length();i++)
- {
- JSONObject tag = hashtags.getJSONObject(i);
- String tg = tag.getString("text").toLowerCase();
- if(!tags.containsKey(tg))
- {
- tags.put(tg,1);
- }
- else
- {
- tags.put(tg, tags.get(tg)+1);
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- else
- if(!tweetobj.isNull("text"))
- {
- text = tweetobj.getString("text");
- tags = ExtractHashTags(text);
- }
- if(!tweetobj.isNull("user"))
- {
- JSONObject userobj = tweetobj.getJSONObject("user");
- username = "@"+userobj.getString("screen_name").toLowerCase();
- if(usertagmap.containsKey(username))
- {
- HashMap<String,Integer> usertags = usertagmap.get(username);
- Set<String> keys = tags.keySet();
- for(String k:keys)
- {
- if(usertags.containsKey(k))
- {
- usertags.put(k, usertags.get(k)+tags.get(k));
- }
- else
- {
- usertags.put(k, tags.get(k));
- }
- }
- usertagmap.put(username, usertags);
- }
- else
- {
- usertagmap.put(username, tags);
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ExtractUserTagNetwork.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return usertagmap;
- }
-
- /**
- * Extracts all the hashtags mentioned in a tweet and creates a map with the frequency of their occurrence.
- * @param text
- * @return A map containing the hashtags as keys and their frequency as value
- */
- public HashMap<String,Integer> ExtractHashTags(String text)
- {
- Pattern p = Pattern.compile("#[a-zA-Z0-9]+");
- Matcher m = p.matcher(text);
- HashMap<String,Integer> tags = new HashMap<String,Integer>();
- while(m.find())
- {
- String tag = text.substring(m.start(),m.end()).toLowerCase();
- if(!tags.containsKey(tag))
- {
- tags.put(tag,1);
- }
- else
- {
- tags.put(tag, tags.get(tag)+1);
- }
- }
- return tags;
- }
-
- public static void main(String[] args)
- {
- ExtractUserTagNetwork eutn = new ExtractUserTagNetwork();
-
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- HashMap<String, HashMap<String,Integer>> usertagmap = eutn.ExtractUserHashtagNetwork(infilename);
- Set<String> keys = usertagmap.keySet();
- for(String key:keys)
- {
- System.out.println(key);
- HashMap<String,Integer> tags = usertagmap.get(key);
- Set<String> tagkeys = tags.keySet();
- for(String tag:tagkeys)
- {
- System.out.println(tag+","+tags.get(tag));
- }
- }
- }
-}
diff --git a/src/Chapter5/support/DateInfo.java b/src/Chapter5/support/DateInfo.java
deleted file mode 100644
index 9a32d4c..0000000
--- a/src/Chapter5/support/DateInfo.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.support;
-
-import java.util.Date;
-import java.util.HashMap;
-
-public class DateInfo implements Comparable
-{
- public Date d;
- public HashMap<String,Integer> catcounts = new HashMap<String,Integer>();
-
- public int compareTo(Object o) {
- DateInfo temp = (DateInfo) o;
- if(temp.d.after(this.d))
- {
- return 1;
- }
- else
- if(temp.d.before(this.d))
- {
- return -1;
- }
- else
- {
- return 0;
- }
- }
-}
diff --git a/src/Chapter5/support/HashTagDS.java b/src/Chapter5/support/HashTagDS.java
deleted file mode 100644
index b338b6d..0000000
--- a/src/Chapter5/support/HashTagDS.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-/**
- *
- * @author shamanth
- */
-public class HashTagDS
-{
- public String groupname;
- public String[] tags;
- public String color;
-
-}
diff --git a/src/Chapter5/support/NetworkNode.java b/src/Chapter5/support/NetworkNode.java
deleted file mode 100644
index 4f662e8..0000000
--- a/src/Chapter5/support/NetworkNode.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package Chapter5.support;
-
-
-import java.util.ArrayList;
-
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-/**
- *
- * @author shamanth
- */
-public class NetworkNode
-{
- public int id;
- public String username;
- public int size;
- public String catColor;
- public int group;
-// public int[] clusterID;
- public int catID;
-// public double lat;
-// public double lng;
- public ArrayList<String> data;
- public int level;
- public ArrayList<Integer> class_codes;
- public ArrayList<ToNodeInfo> tonodes;
-
- public NetworkNode Copy()
- {
- NetworkNode tempnode = new NetworkNode();
- tempnode.catColor = this.catColor;
- tempnode.id = this.id;
- tempnode.username= this.username;
- tempnode.size = this.size;
- tempnode.group = this.group;
-// tempnode.clusterID = this.clusterID;
- tempnode.catID = this.catID;
-// tempnode.lat = this.lat;
-// tempnode.lng = this.lng;
- tempnode.data = this.data;
-// tempnode.level = this.level;
- tempnode.class_codes = this.class_codes;
- tempnode.tonodes = this.tonodes;
- return tempnode;
- }
-}
diff --git a/src/Chapter5/support/NodeIDComparator.java b/src/Chapter5/support/NodeIDComparator.java
deleted file mode 100644
index 0b41ae7..0000000
--- a/src/Chapter5/support/NodeIDComparator.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-import java.util.Comparator;
-
-/**
- *
- * @author shamanth
- */
-public class NodeIDComparator implements Comparator
-{
-
- public int compare(Object o1, Object o2) {
- int id1 = ((NetworkNode) o1).id;
- int id2 = ((NetworkNode) o2).id;
- if(id1>id2)
- {
- return 1;
- }
- else
- if(id1<id2)
- return -1;
- else
- return 0;
- }
-
-
-}
diff --git a/src/Chapter5/support/NodeSizeComparator.java b/src/Chapter5/support/NodeSizeComparator.java
deleted file mode 100644
index 23ecb4e..0000000
--- a/src/Chapter5/support/NodeSizeComparator.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-import java.util.Comparator;
-
-/**
- *
- * @author shamanth
- */
-public class NodeSizeComparator implements Comparator
-{
- public int compare(Object o1, Object o2)
- {
- int size1 = ((NetworkNode) o1).size;
- int size2 = ((NetworkNode) o2).size;
- if(size1>size2)
- {
- return 1;
- }
- if(size1<size2)
- return -1;
- else
- return 0;
- }
-
-}
diff --git a/src/Chapter5/support/ToNodeInfo.java b/src/Chapter5/support/ToNodeInfo.java
deleted file mode 100644
index 725a10a..0000000
--- a/src/Chapter5/support/ToNodeInfo.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-/**
- *
- * @author shamanth
- */
-public class ToNodeInfo
-{
- public int tonodeid;
- public String text;
- public String tousername;
- public String date;
- public int class_code;
- public int catID;
- public String catColor;
- //this is the default direction invert option. If the library adds nodes to the adjacency then that should be set to true in the client side
-// public boolean direction = false;
-}
diff --git a/src/Chapter5/support/Tweet.java b/src/Chapter5/support/Tweet.java
deleted file mode 100644
index be53166..0000000
--- a/src/Chapter5/support/Tweet.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-/**
- *
- * @author shamanth
- */
-public class Tweet {
- public String text;
- public long id;
- public double lat;
- public double lng;
- public String pubdate;
- public String user;
- public int catID;
- public String catColor;
-}
diff --git a/src/Chapter5/text/EventSummaryExtractor.java b/src/Chapter5/text/EventSummaryExtractor.java
deleted file mode 100644
index e76f42e..0000000
--- a/src/Chapter5/text/EventSummaryExtractor.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.text;
-
-import Chapter5.support.DateInfo;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class EventSummaryExtractor
-{
-
- final String DEF_INFILENAME = "ows.json";
- HashMap<String,ArrayList<String>> CATEGORIES = new HashMap<String,ArrayList<String>>();
- SimpleDateFormat twittersdm = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy");
- SimpleDateFormat dayhoursdm = new SimpleDateFormat("yyyy-MM-dd:HH");
-// SimpleDateFormat daysdm = new SimpleDateFormat("MM/dd/yyyy");
- SimpleDateFormat hoursdm = new SimpleDateFormat("HH");
-
- /**
- *
- */
- public void InitializeCategories()
- {
- ArrayList<String> people = new ArrayList<String>();
- people.add("protesters");
- people.add("people");
- CATEGORIES.put("People",people);
- ArrayList<String> police = new ArrayList<String>();
- police.add("police");
- police.add("cops");
- police.add("nypd");
- police.add("raid");
- CATEGORIES.put("Police",police);
- ArrayList<String> media = new ArrayList<String>();
- media.add("press");
- media.add("news");
- media.add("media");
- CATEGORIES.put("Media",media);
- ArrayList<String> city = new ArrayList<String>();
- city.add("nyc");
- city.add("zucotti");
- city.add("park");
- CATEGORIES.put("Location",city);
- ArrayList<String> judiciary = new ArrayList<String>();
- judiciary.add("judge");
- judiciary.add("eviction");
- judiciary.add("order");
- judiciary.add("court");
- CATEGORIES.put("Judiciary", judiciary);
- }
-
- /**
- *
- * @param filename
- * @return
- */
- public JSONObject ExtractCategoryTrends(String filename)
- {
- JSONObject result = new JSONObject();
- try {
- BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
- String temp = "";
- Set<String> catkeys = CATEGORIES.keySet();
- HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
- while((temp = br.readLine())!=null)
- {
- Date d = new Date();
- try {
- JSONObject jobj = new JSONObject(temp);
- //Published time
- if(!jobj.isNull("created_at"))
- {
- String time = "";
- try {
- time = jobj.getString("created_at");
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- if(time.isEmpty())
- {
- continue;
- }
- else
- {
- try {
- d = twittersdm.parse(time);
- } catch (ParseException ex) {
- continue;
- }
- }
- }
- else
- if(!jobj.isNull("timestamp"))
- {
- long time = new Date().getTime();
- try{
- time = jobj.getLong("timestamp");
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- d = new Date();
- d.setTime(time);
- }
- String datestr = dayhoursdm.format(d);
- String text = jobj.getString("text").toLowerCase();
-// System.out.println(text);
- for(String key:catkeys)
- {
- ArrayList<String> words = CATEGORIES.get(key);
- for(String word:words)
- {
- if(text.contains(word))
- {
- HashMap<String,Integer> categorycount = new HashMap<String,Integer>();
- if(datecount.containsKey(datestr))
- {
- categorycount = datecount.get(datestr);
- }
- if(categorycount.containsKey(key))
- {
- categorycount.put(key, categorycount.get(key)+1);
- }
- else
- {
- categorycount.put(key, 1);
- }
- //update the categorycount for the specific date
- datecount.put(datestr, categorycount);
- break;
- }
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- //sort the dates
- Set<String> datekeys = datecount.keySet();
- ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
- for(String date:datekeys)
- {
- Date d = null;
- try {
- d = dayhoursdm.parse(date);
- } catch (ParseException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- if(d!=null)
- {
- DateInfo info = new DateInfo();
- info.d = d;
- info.catcounts = datecount.get(date);
- dinfos.add(info);
- }
- }
- Collections.sort(dinfos, Collections.reverseOrder());
- try {
- result.put("axisxstep", dinfos.size()-1);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("axisystep", CATEGORIES.size()-1);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- JSONArray xcoordinates = new JSONArray();
- JSONArray ycoordinates = new JSONArray();
- //now add the data and the axis labels
- JSONArray axisxlabels = new JSONArray();
- JSONArray axisylabels = new JSONArray();
- JSONArray data = new JSONArray();
- for(String key:catkeys)
- {
- axisylabels.put(key);
- }
- //counters to mark the indices of the values added to data field. i is the x coordinate and j is the y coordinate
- int i=0,j=0;
-
- for(DateInfo date:dinfos)
- {
- String strdate = hoursdm.format(date.d);
- axisxlabels.put(strdate);
- HashMap<String,Integer> catcounts = date.catcounts;
- for(String key:catkeys)
- {
- xcoordinates.put(j);
- ycoordinates.put(i++);
- if(catcounts.containsKey(key))
- {
- data.put(catcounts.get(key));
- }
- else
- {
- data.put(0);
- }
- }
- //reset the x coordinate as we move to the next y item
- i=0;
- j++;
- }
- try {
- result.put("xcoordinates", xcoordinates);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("ycoordinates", ycoordinates);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("axisxlabels", axisxlabels);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("axisylabels", axisylabels);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("data", data);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- EventSummaryExtractor ese = new EventSummaryExtractor();
- String infilename = ese.DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- ese.InitializeCategories();
- System.out.println(ese.ExtractCategoryTrends(infilename).toString());
- }
-}
diff --git a/src/Chapter5/text/ExtractTopKeywords.java b/src/Chapter5/text/ExtractTopKeywords.java
deleted file mode 100644
index 8ab412a..0000000
--- a/src/Chapter5/text/ExtractTopKeywords.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.text;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-import utils.Tags;
-import utils.TextUtils;
-
-public class ExtractTopKeywords
-{
-
- static final String DEF_INFILENAME = "ows.json";
- static final int DEF_K = 60;
-
- /**
- * Extracts the most frequently occurring keywords from the tweets by processing them sequentially. Stopwords are ignored.
- * @param inFilename File containing a list of tweets as JSON objects
- * @param K Count of the top keywords to return
- * @param ignoreHashtags If true, hashtags are not considered while counting the most frequent keywords
- * @param ignoreUsernames If true, usernames are not considered while counting the most frequent keywords
- * @param tu TextUtils object which handles the stopwords
- * @return a JSONArray containing an array of JSONObjects. Each object contains two elements "text" and "size" referring to the word and it's frequency
- */
- public JSONArray GetTopKeywords(String inFilename, int K, boolean ignoreHashtags, boolean ignoreUsernames, TextUtils tu)
- {
- HashMap<String, Integer> words = new HashMap<String,Integer>();
- BufferedReader br = null;
- try{
- br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try{
- JSONObject tweetobj = new JSONObject(temp);
- if(!tweetobj.isNull("text"))
- {
- String text = tweetobj.getString("text");
- //System.out.println(text);
- text = text.toLowerCase().replaceAll("\\s+", " ");
- /** Step 1: Tokenize tweets into individual words. and count their frequency in the corpus
- * Remove stop words and special characters. Ignore user names and hashtags if the user chooses to.
- */
- HashMap<String,Integer> tokens = tu.TokenizeText(text,ignoreHashtags,ignoreUsernames);
- Set<String> keys = tokens.keySet();
- for(String key:keys)
- {
- if(words.containsKey(key))
- {
- words.put(key, words.get(key)+tokens.get(key));
- }
- else
- {
- words.put(key, tokens.get(key));
- }
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ExtractTopKeywords.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- Set<String> keys = words.keySet();
- ArrayList<Tags> tags = new ArrayList<Tags>();
- for(String key:keys)
- {
- Tags tag = new Tags();
- tag.setKey(key);
- tag.setValue(words.get(key));
- tags.add(tag);
- }
- // Step 2: Sort the words in descending order of frequency
- Collections.sort(tags, Collections.reverseOrder());
- JSONArray cloudwords = new JSONArray();
- int numwords = K;
- if(tags.size()<numwords)
- {
- numwords = tags.size();
- }
- for(int i=0;i<numwords;i++)
- {
- JSONObject wordfreq = new JSONObject();
- Tags tag = tags.get(i);
- try{
- wordfreq.put("text", tag.getKey());
- wordfreq.put("size",tag.getValue());
- cloudwords.put(wordfreq);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- return cloudwords;
- }
-
- public static void main(String[] args)
- {
- ExtractTopKeywords etk = new ExtractTopKeywords();
-
- //Initialize the TextUtils class which handles all the processing of text.
- TextUtils tu = new TextUtils();
- tu.LoadStopWords("C:/tweettracker/stopwords.txt");
- String infilename = DEF_INFILENAME;
- int K = DEF_K;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- if(args.length>=2&&!args[1].isEmpty())
- {
- try{
- K = Integer.parseInt(args[1]);
- }catch(NumberFormatException ex)
- {
- ex.printStackTrace();
- }
- }
- }
- System.out.println(etk.GetTopKeywords(infilename, K, false,true,tu));
- }
-
-}
diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java
deleted file mode 100644
index 2df814f..0000000
--- a/src/Chapter5/trends/ControlChartExample.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class ControlChartExample
-{
- static final String DEF_INFILENAME = "ows.json";
- static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
-
- public JSONArray GenerateDataTrend(String inFilename)
- {
- BufferedReader br = null;
- JSONArray result = new JSONArray();
- HashMap<String,Integer> datecount = new HashMap<String,Integer>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- if(datecount.containsKey(strdate))
- {
- datecount.put(strdate, datecount.get(strdate)+1);
- }
- else
- {
- datecount.put(strdate, 1);
- }
- } catch (JSONException ex) {
- Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- DateInfo dinfo = new DateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.count = datecount.get(key);
- dinfos.add(dinfo);
- }
- double mean = this.GetMean(dinfos);
- double stddev = this.GetStandardDev(dinfos, mean);
- Collections.sort(dinfos);
- //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1
- for(DateInfo dinfo:dinfos)
- {
- try{
- JSONObject jobj = new JSONObject();
- jobj.put("date", SDM.format(dinfo.d));
- jobj.put("count", (dinfo.count-mean)/stddev);
- jobj.put("mean", 0);
- jobj.put("stdev+3", 3);
- jobj.put("stdev-3", -3);
- result.put(jobj);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public double GetStandardDev(ArrayList<DateInfo> dateinfos,double mean)
- {
- double intsum = 0;
- int numperiods = dateinfos.size();
- for(DateInfo dinfo:dateinfos)
- {
- intsum+=Math.pow((dinfo.count - mean),2);
- }
-// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size()));
- return Math.sqrt((double)intsum/numperiods);
- }
-
- public double GetMean(ArrayList<DateInfo> dateinfos)
- {
- int numperiods = dateinfos.size();
- int sum = 0;
- for(DateInfo dinfo:dateinfos)
- {
- sum +=dinfo.count;
- }
-// System.out.println((double)sum/numPeriods);
- return ((double)sum/numperiods);
- }
-
- public static void main(String[] args)
- {
- ControlChartExample cce = new ControlChartExample();
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- System.out.println(cce.GenerateDataTrend(infilename));
- }
-
-}
diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java
deleted file mode 100644
index 209f4a3..0000000
--- a/src/Chapter5/trends/DateInfo.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.util.Date;
-
-public class DateInfo implements Comparable
-{
- public Date d;
- public int count;
-
- public int compareTo(Object o) {
- DateInfo temp = (DateInfo) o;
- if(temp.d.after(this.d))
- {
- return -1;
- }
- else
- if(temp.d.before(this.d))
- {
- return 1;
- }
- else
- {
- return 0;
- }
- }
-}
diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java
deleted file mode 100644
index dad7f27..0000000
--- a/src/Chapter5/trends/ExtractDatasetTrend.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class ExtractDatasetTrend
-{
- static final String DEF_INFILENAME = "ows.json";
- // Date pattern used to count the volume of tweets
- final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
-
- public JSONArray GenerateDataTrend(String inFilename)
- {
- BufferedReader br = null;
- JSONArray result = new JSONArray();
- HashMap<String,Integer> datecount = new HashMap<String,Integer>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- if(datecount.containsKey(strdate))
- {
- datecount.put(strdate, datecount.get(strdate)+1);
- }
- else
- {
- datecount.put(strdate, 1);
- }
- } catch (JSONException ex) {
- Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- /** DateInfo consists of a date string and the corresponding count.
- * It also implements a Comparator for sorting by date
- */
- ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- DateInfo dinfo = new DateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.count = datecount.get(key);
- dinfos.add(dinfo);
- }
- Collections.sort(dinfos);
- // Format and return the date string and the corresponding count
- for(DateInfo dinfo:dinfos)
- {
- try{
- JSONObject jobj = new JSONObject();
- jobj.put("date", SDM.format(dinfo.d));
- jobj.put("count", dinfo.count);
- result.put(jobj);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- ExtractDatasetTrend edt = new ExtractDatasetTrend();
-
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- System.out.println(edt.GenerateDataTrend(infilename));
- }
-
-}
diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java
deleted file mode 100644
index 4a0164b..0000000
--- a/src/Chapter5/trends/SparkLineExample.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class SparkLineExample
-{
- static final String DEF_INFILENAME = "ows.json";
- static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH");
-
- public JSONObject GenerateDataTrend(String inFilename, ArrayList<String> keywords)
- {
- BufferedReader br = null;
- JSONObject result = new JSONObject();
- HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- String text = jobj.getString("text").toLowerCase();
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- for(String word:keywords)
- {
- if(text.contains(word))
- {
- HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
- if(datecount.containsKey(strdate))
- {
- wordcount = datecount.get(strdate);
- }
- if(wordcount.containsKey(word))
- {
- wordcount.put(word, wordcount.get(word)+1);
- }
- else
- {
- wordcount.put(word, 1);
- }
- //update the wordcount for the specific date
- datecount.put(strdate, wordcount);
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- //sort the dates
- ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- TCDateInfo dinfo = new TCDateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.wordcount = datecount.get(key);
- dinfos.add(dinfo);
- }
- Collections.sort(dinfos);
- JSONArray[] tseriesvals = new JSONArray[keywords.size()];
- for(int i=0;i<tseriesvals.length;i++)
- {
- tseriesvals[i] = new JSONArray();
- }
- //prepare the output
- for(TCDateInfo date:dinfos)
- {
- HashMap<String,Integer> wordcount = date.wordcount;
- int counter=0;
- for(String word:keywords)
- {
- if(wordcount.containsKey(word))
- {
- tseriesvals[counter].put(wordcount.get(word));
- }
- else
- {
- tseriesvals[counter].put(0);
- }
- counter++;
- }
- }
- int counter=0;
- for(String word:keywords)
- {
- try {
- result.put(word, tseriesvals[counter]);
- } catch (JSONException ex) {
- Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- counter++;
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- SparkLineExample sle = new SparkLineExample();
- ArrayList<String> words = new ArrayList<String>();
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- for(int i=1;i<args.length;i++)
- {
- if(args[i]!=null&&!args[i].isEmpty())
- {
- words.add(args[i]);
- }
- }
- }
- if(words.isEmpty())
- {
- words.add("#nypd");
- words.add("#ows");
- }
- System.out.println(sle.GenerateDataTrend(infilename,words));
- }
-
-}
diff --git a/src/Chapter5/trends/TCDateInfo.java b/src/Chapter5/trends/TCDateInfo.java
deleted file mode 100644
index 88450e9..0000000
--- a/src/Chapter5/trends/TCDateInfo.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.util.Date;
-import java.util.HashMap;
-
-public class TCDateInfo implements Comparable
-{
- public Date d;
- public HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
-
- public int compareTo(Object o) {
- TCDateInfo temp = (TCDateInfo) o;
- if(temp.d.after(this.d))
- {
- return -1;
- }
- else
- if(temp.d.before(this.d))
- {
- return 1;
- }
- else
- {
- return 0;
- }
- }
-
-}
diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java
deleted file mode 100644
index 20991cd..0000000
--- a/src/Chapter5/trends/TrendComparisonExample.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class TrendComparisonExample
-{
- static final String DEF_INFILENAME = "ows.json";
- static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
-
- public JSONArray GenerateDataTrend(String inFilename, ArrayList<String> keywords)
- {
- BufferedReader br = null;
- JSONArray result = new JSONArray();
- HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- String text = jobj.getString("text").toLowerCase();
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- for(String word:keywords)
- {
- if(text.contains(word))
- {
- HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
- if(datecount.containsKey(strdate))
- {
- wordcount = datecount.get(strdate);
- }
- if(wordcount.containsKey(word))
- {
- wordcount.put(word, wordcount.get(word)+1);
- }
- else
- {
- wordcount.put(word, 1);
- }
- //update the wordcount for the specific date
- datecount.put(strdate, wordcount);
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- //sort the dates
- ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- TCDateInfo dinfo = new TCDateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.wordcount = datecount.get(key);
- dinfos.add(dinfo);
- }
- Collections.sort(dinfos);
- //prepare the output
- for(TCDateInfo date:dinfos)
- {
- JSONObject item = new JSONObject();
- String strdate = SDM.format(date.d);
- try{
- item.put("date",strdate);
- HashMap<String,Integer> wordcount = date.wordcount;
- for(String word:keywords)
- {
- if(wordcount.containsKey(word))
- {
- item.put(word, wordcount.get(word));
- }
- else
- {
- item.put(word, 0);
- }
- }
- result.put(item);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- TrendComparisonExample tce = new TrendComparisonExample();
- ArrayList<String> words = new ArrayList<String>();
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- for(int i=1;i<args.length;i++)
- {
- if(args[i]!=null&&!args[i].isEmpty())
- {
- words.add(args[i]);
- }
- }
- }
- if(words.isEmpty())
- {
- words.add("#nypd");
- words.add("#ows");
- }
- System.out.println(tce.GenerateDataTrend(infilename,words));
- }
-
-}