diff options
Diffstat (limited to 'src/Chapter5')
-rw-r--r-- | src/Chapter5/network/CreateD3Network.java | 716 | ||||
-rw-r--r-- | src/Chapter5/network/ExtractUserTagNetwork.java | 173 | ||||
-rw-r--r-- | src/Chapter5/support/DateInfo.java | 30 | ||||
-rw-r--r-- | src/Chapter5/support/HashTagDS.java | 18 | ||||
-rw-r--r-- | src/Chapter5/support/NetworkNode.java | 49 | ||||
-rw-r--r-- | src/Chapter5/support/NodeIDComparator.java | 32 | ||||
-rw-r--r-- | src/Chapter5/support/NodeSizeComparator.java | 29 | ||||
-rw-r--r-- | src/Chapter5/support/ToNodeInfo.java | 23 | ||||
-rw-r--r-- | src/Chapter5/support/Tweet.java | 21 | ||||
-rw-r--r-- | src/Chapter5/text/EventSummaryExtractor.java | 269 | ||||
-rw-r--r-- | src/Chapter5/text/ExtractTopKeywords.java | 151 | ||||
-rw-r--r-- | src/Chapter5/trends/ControlChartExample.java | 144 | ||||
-rw-r--r-- | src/Chapter5/trends/DateInfo.java | 29 | ||||
-rw-r--r-- | src/Chapter5/trends/ExtractDatasetTrend.java | 120 | ||||
-rw-r--r-- | src/Chapter5/trends/SparkLineExample.java | 163 | ||||
-rw-r--r-- | src/Chapter5/trends/TCDateInfo.java | 31 | ||||
-rw-r--r-- | src/Chapter5/trends/TrendComparisonExample.java | 155 |
17 files changed, 0 insertions, 2153 deletions
diff --git a/src/Chapter5/network/CreateD3Network.java b/src/Chapter5/network/CreateD3Network.java deleted file mode 100644 index d4c25af..0000000 --- a/src/Chapter5/network/CreateD3Network.java +++ /dev/null @@ -1,716 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.network; - - -import Chapter5.support.HashTagDS; -import Chapter5.support.NetworkNode; -import Chapter5.support.NodeIDComparator; -import Chapter5.support.NodeSizeComparator; -import Chapter5.support.ToNodeInfo; -import Chapter5.support.Tweet; -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import utils.TextUtils; - -/** - * - * @author shamanth - */ -public class CreateD3Network -{ - static final String DEF_INFILENAME = "ows.json"; - private String RTPATTERN = "rt @[_a-zA-Z0-9]+"; - private final int DEFAULT_NODE_SIZE = 0; -// private final int NODE_COUNT_LIMIT = 1; - //private final String[] node_color_scheme = new String[]{"#FFFFD9","#EDF8B1","#C7E9B4","#7FCDBB","#41B6C4","#1D91C0","#225EA8","#253494","#081D58"}; - //private final String[] node_color_scheme = new String[]{"#A6BDDB","#74A9CF","#3690C0","#0570B0","#045A8D","#023858"}; - - /** - * Extracts the users who have been retweeted using the RTPATTERN - * @param text - * @return - */ - public ArrayList<String> GetRTUsers(String text) - { - Pattern p = Pattern.compile(RTPATTERN, Pattern.CASE_INSENSITIVE); - Matcher m = p.matcher(text); - ArrayList<String> rtusers = new ArrayList<String>(); - while(m.find()) - { - String nuser = text.substring(m.start(),m.end()); - nuser = nuser.replaceAll("rt @|RT @", ""); -// nuser = nuser.replaceAll("RT @", ""); - rtusers.add(nuser.toLowerCase()); - } - return rtusers; - } - - /** - * Identifies the category to which the tweet belongs. Each category is defined by a group of words/hashtags - * @param tweet - * @param usercategories - * @return - */ - public int GetCategory(String tweet, HashTagDS[] usercategories) - { - HashMap<Integer,Integer> categoryvotes = new HashMap<Integer,Integer>(); - tweet = tweet.toLowerCase(); - int i=0; - for(HashTagDS cat:usercategories) - { - - for(String s :cat.tags) - { - if(tweet.indexOf(s)!=-1) - { - if(categoryvotes.containsKey(i)) - { - categoryvotes.put(i, categoryvotes.get(i)+1); - } - else - { - categoryvotes.put(i, 1); - } - } - } - i++; - } - Set<Integer> keyset = categoryvotes.keySet(); - int maxvote = 0; - //by default the tweet will be in the first category - int maxcategoryindex = 0; - for(int key:keyset) - { - if(categoryvotes.get(key)>maxvote) - { - maxvote = categoryvotes.get(key); - maxcategoryindex = key; - } - } - return maxcategoryindex; - } - - /** - * Converts the input jsonobject containing category descriptions to an array for processing. - * @param hashtagcoll JSONObject containing the list of hashtags, color, and the topic information - * @return An array of hashtags - */ - public HashTagDS[] ConvertJSONArrayToArray(JSONObject hashtagcoll) - { - HashTagDS[] hashtags = new HashTagDS[hashtagcoll.length()]; - int j=0; - try{ - if(hashtagcoll!=null) - { - Iterator keyit = hashtagcoll.keys(); - while(keyit.hasNext()) - { - HashTagDS ht = new HashTagDS(); - JSONObject tags = (JSONObject) hashtagcoll.get((String)keyit.next()); - ht.groupname = keyit.toString(); - ht.color = tags.getString("color"); - JSONArray tagjson = tags.getJSONArray("hts"); - ht.tags = new String[tagjson.length()]; - for(int i=0;i<tagjson.length();i++) - { - ht.tags[i] = tagjson.getString(i); - } - hashtags[j++] = ht; - } - } - }catch(JSONException ex) - { - ex.printStackTrace(); - } - return hashtags; - } - - /** - * Identifies the category of a node based on the content of his tweets(each tweet can be assigned a category based on it's text). A simple majority is sufficient to make this decision. - * @param tnfs - * @param hashtagarray - * @return - */ - public int GetMajorityTopicColor(NetworkNode tnfs,HashTagDS[] hashtagarray) - { - HashMap<Integer,Integer> catcount = new HashMap<Integer,Integer>(); - //if the node has no tolinks then look at the node that it retweeted to decide the color of the node - for(String tweet:tnfs.data) - { - int id = this.GetCategory(tweet, hashtagarray); - if(catcount.containsKey(id)) - { - catcount.put(id, catcount.get(id)+1); - } - else - catcount.put(id, 1); - } - Set<Integer> keys = catcount.keySet(); - int maxcatID = -1; - int maxcount = 0; - for(int k:keys) - { - if(maxcatID==-1) - { - maxcatID = k; - maxcount = catcount.get(k); - } - else - { - if(maxcount<catcount.get(k)) - { - maxcount = catcount.get(k); - maxcatID = k; - } - } - } - return maxcatID; - } - - /** - * Takes as input a JSON file and reads through the file sequentially to process and create a retweet network from the tweets. - * @param inFilename - * @param numNodeClasses - * @param hashtags category info containing hashtags - * @param num_nodes number of seed nodes to be included in the network - * @return a JSONObject consisting of nodes and links of the network - */ - public JSONObject ConvertTweetsToDiffusionPath(String inFilename,int numNodeClasses, - JSONObject hashtags, int num_nodes) - { - HashMap<String,NetworkNode> userconnections = new HashMap<String,NetworkNode>(); -// HashMap<String,Integer> tweet_class_codes = new HashMap<String,Integer>(); -// int tweet_class_counter = 1; - HashTagDS[] hashtagarray = ConvertJSONArrayToArray(hashtags); - BufferedReader br = null; - try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - JSONObject tweetobj; - try { - tweetobj = new JSONObject(temp); - } catch (JSONException ex) { - ex.printStackTrace(); - continue; - } - //Extract the tweet first - Tweet t = new Tweet(); - String text=""; - try { - text = TextUtils.GetCleanText(tweetobj.getString("text")).toLowerCase(); - } catch (JSONException ex) { - ex.printStackTrace(); - continue; - } - //Check that the tweet matches at least one of the topics - boolean groupmatch = false; - for(HashTagDS ht:hashtagarray) - { - String[] tags = ht.tags; - for(String tg:tags) - { - if(text.contains(tg)) - { - groupmatch = true; - break; - } - } - if(groupmatch) - { - break; - } - } - if(!groupmatch) - { - continue; - } - // - ArrayList<String> fromusers = new ArrayList<String>(); - if(!tweetobj.isNull("retweeted_status")) - { - JSONObject rtstatus; - try { - rtstatus = tweetobj.getJSONObject("retweeted_status"); - if(rtstatus.isNull("user")) - { - JSONObject rtuserobj = rtstatus.getJSONObject("user"); - try{ - fromusers.add(rtuserobj.get("screen_name").toString()); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - } catch (JSONException ex) { - Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); - } - } - else - { - //use the tweet text to retrieve the pattern "RT @username:" - fromusers = GetRTUsers(text); - } - if(fromusers.isEmpty()) - { - continue; - } - - //identify the class values to be applied to all the nodes and - //edges. -// String prunedtext = TextUtils.RemoveTwitterElements(text); -// Integer class_code = tweet_class_codes.get(prunedtext); -// if(class_code==null) -// { -// class_code = tweet_class_counter; -// tweet_class_codes.put(prunedtext, class_code); //set the unique id for this tweet -// tweet_class_counter++; -// } - t.text = TextUtils.RemoveRTElements(text); - if(!tweetobj.isNull("user")) - { - JSONObject userobj; - try { - userobj = tweetobj.getJSONObject("user"); - t.user = userobj.getString("screen_name").toLowerCase(); - } catch (JSONException ex) { - Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); - } - } -// try { -// t.pubdate = String.valueOf(tweetobj.get("timestamp")); -// } catch (JSONException ex) { -// Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); -// } - t.catColor = hashtagarray[t.catID].color; - //update the size of the from fromuser - int cur_level = 0; - for(int i=fromusers.size()-1;i>=0;i--) - { - String touser = ""; - if(i==0) - {//if this is the last user in the retweet sequence then use the user of the tweet as the next link - touser = t.user; - } - else - { //if there are still fromuser in the retweet chain then use them as the next link - touser = fromusers.get(i-1); - } - //don't add any selflinks - if(fromusers.get(i).equals(touser)) - { - continue; - } - NetworkNode fromuser = null; - if(userconnections.containsKey(fromusers.get(i))) - { - //from node already exists simply add this new connection to it - fromuser = userconnections.get(fromusers.get(i)); - } - else - { - //the from user was not found. add the node - fromuser = new NetworkNode(); - // fromuser.id = nodeid++; - fromuser.username = fromusers.get(i); - fromuser.tonodes = new ArrayList<ToNodeInfo>(); - fromuser.class_codes = new ArrayList<Integer>(); - fromuser.size = DEFAULT_NODE_SIZE; - fromuser.level = cur_level; - fromuser.data = new ArrayList<String>(); - fromuser.data.add(t.text); - //fromuser.category = ; - } -// if(!fromuser.class_codes.contains(class_code)) -// { -// //add the marker to from node if it does not have it already -// fromuser.class_codes.add(class_code); -// } - //if to node is not in the list then create it - NetworkNode tonode = null; - if(!userconnections.containsKey(touser)) - { - tonode = new NetworkNode(); - // System.out.println(touser+" "+nodeid); - // tonode.id= nodeid++; - tonode.username = touser; - tonode.tonodes= new ArrayList<ToNodeInfo>(); - tonode.class_codes = new ArrayList<Integer>(); - tonode.catID = t.catID; - tonode.catColor = t.catColor; - tonode.size = DEFAULT_NODE_SIZE; - tonode.data= new ArrayList<String>(); - tonode.data.add(t.text); - tonode.level = cur_level+1; - //add the classcode to the node if it doesn't already exist -// if(!tonode.class_codes.contains(class_code)) -// { -// tonode.class_codes.add(class_code); -// } - //add the touser info - userconnections.put(touser, tonode); - } - else - { - tonode = userconnections.get(touser); - tonode.data.add(t.text); - if(tonode.level<cur_level+1) - { - tonode.level = cur_level; - } - //add the classcode to the node if it doesn't already exist -// if(!tonode.class_codes.contains(class_code)) -// { -// tonode.class_codes.add(class_code); -// } - } - ToNodeInfo inf = new ToNodeInfo(); - inf.tonodeid = tonode.id; - inf.text = t.text; -// inf.date = t.pubdate; -// inf.class_code = class_code; - inf.tousername = touser; - inf.catID = t.catID; - inf.catColor = t.catColor; - fromuser.tonodes.add(inf); - //update from node size - fromuser.size++; - //add back updated fromuser - userconnections.put(fromusers.get(i), fromuser); - //update the level for next iteration - cur_level++; - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - } - Set<String> keys = userconnections.keySet(); - ArrayList<NetworkNode> returnnodes = new ArrayList<NetworkNode>(); - //its +1 because nodes with size 0 are not going to be used to calculate the class - int min = DEFAULT_NODE_SIZE+1; - int max = DEFAULT_NODE_SIZE+1; - for(String k:keys) - { - NetworkNode n = userconnections.get(k); - int maxcat = GetMajorityTopicColor(n,hashtagarray); - n.catID = maxcat; - n.catColor = hashtagarray[maxcat].color; - userconnections.put(k, n); - // -// if(n.size==0) -// {//mark the node as a zero node -// n.class_codes.add(-1); -// } -// else -// { - if(n.size>max) - { - max = n.size; - } - if(n.size<min) - { - min = n.size; - } -// } - returnnodes.add(n); - } - //create node groups to assign unique colors to nodes in different Categories based upon the number of connections - ArrayList<NetworkNode> nodes = ComputeGroupsSqrt(returnnodes, max, min, numNodeClasses); - Collections.sort(nodes,Collections.reverseOrder(new NodeSizeComparator())); - //select how many nodes to show. - int nodes_to_visit = 0; - if(nodes.size()>=num_nodes) - { - nodes_to_visit = num_nodes; - } - else - { - nodes_to_visit = nodes.size(); - } - - HashMap<String,NetworkNode> prunednodes = new HashMap<String,NetworkNode>(); - HashMap<String,Integer> nodeidlist = new HashMap<String,Integer>(); - int nodeid = 0; //node nodeid counter - for(int k=0;k<nodes_to_visit;k++) - { - NetworkNode nd = nodes.get(k); -// System.out.println("visiting node "+nd.username); - nd.level = 0; - HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections,nd,new HashMap<String,NetworkNode>()); - Set<String> names = rtnodes.keySet(); - for(String n:names) - { - if(!prunednodes.containsKey(n)) - { - NetworkNode newnode = rtnodes.get(n); - if(newnode.size>0) - { - prunednodes.put(n, newnode); - nodeidlist.put(n, nodeid++); - } - } - } - } - - /** We now have all the nodes of the network. compute their ids sequentially - * and assign them to the respective nodes. Simultaneously compact the nodes - * of the network to remove all nodes which have not been retweeted and are - * of size 0 - */ - - Set<String> allnodes = prunednodes.keySet(); -// System.out.println(prunednodes.size()); - ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>(); -// HashMap<Integer,ArrayList<Integer>> conninfo = new HashMap<Integer,ArrayList<Integer>>(); - for(String n:allnodes) - { - NetworkNode nd = prunednodes.get(n); - nd.id = nodeidlist.get(nd.username); - ArrayList<Integer> connids = new ArrayList<Integer>(); -// ArrayList<ToNodeInfo> compact_To_nodes = new ArrayList<ToNodeInfo>(); - int counter = 0; - for(ToNodeInfo tnf: nd.tonodes) - { - //user has never been retweeted. the chain terminates here, so remove it - if(nodeidlist.containsKey(tnf.tousername)) - { - tnf.tonodeid = nodeidlist.get(tnf.tousername); - connids.add(tnf.tonodeid); - nd.tonodes.set(counter, tnf); - counter++; - } - } - finalnodes.add(nd); - //store the connections to compute the clusterids later -// if(!conninfo.containsKey(nd.id)) -// { -// conninfo.put(nd.id, connids); -// } - } - //generate the clusterids -// ArrayList<Integer>[] clusterids = (ArrayList<Integer>[])new ArrayList[allnodes.size()]; -// Set<Integer> idkeys = conninfo.keySet(); -// for(int id:idkeys) -// { -// for(int x:conninfo.get(id)) -// { -// if(clusterids[x]==null) -// { -// ArrayList<Integer> toclusterid = new ArrayList<Integer>(); -// toclusterid.add(id); -// clusterids[x] = toclusterid; -// } -// else -// { -// ArrayList<Integer> toclusterid = clusterids[x]; -// if(!toclusterid.contains(id)) -// { -// toclusterid.add(id); -// clusterids[x] = toclusterid; -// } -// } -// } -// } - //now create the final node list with the clusterids -// for(String n:allnodes) -// { -// NetworkNode nd = prunednodes.get(n); -// ArrayList<Integer> cids = clusterids[nd.id]; -// if(cids!=null) -// { -// int size = cids.size(); -// nd.clusterID = new int[size+1]; -// int counter=0; -// nd.clusterID[counter++] = nd.id; -// for(int c:cids) -// { -// nd.clusterID[counter++] = c; -// } -// } - //System.out.println(nd.class_codes.toString()); -// finalnodes.add(nd); -// } - Collections.sort(finalnodes,new NodeIDComparator()); - System.out.println(finalnodes.size()); - for(NetworkNode node:finalnodes) - { - System.out.println(node.id+" "+node.username+" "+node.level+" "+node.size+" "+node.catColor+node.data.get(0)); - } - return GetD3Structure(finalnodes); - } - - /** - * Creates a D3 representation of the nodes, consisting of two JSONArray a set of nodes and a set of links between the nodes - * @param finalnodes - * @return - */ - public JSONObject GetD3Structure(ArrayList<NetworkNode> finalnodes) - { - JSONObject alltweets = new JSONObject(); - try { - JSONArray nodes = new JSONArray(); - JSONArray links = new JSONArray(); - for (NetworkNode node : finalnodes) - { - try { - //create adjacencies - JSONArray nodedata = new JSONArray(); - for (ToNodeInfo tnf : node.tonodes) { - JSONObject jsadj = new JSONObject(); - jsadj.put("source", node.id); - jsadj.put("target", tnf.tonodeid); - //weight of the edge - jsadj.put("value", 1); - //class code is a unique id corresponding to the text - jsadj.put("data", tnf.class_code); - links.put(jsadj); - //create a data object for the node - JSONObject jsdata = new JSONObject(); - jsdata.put("tonodeid", tnf.tonodeid); - jsdata.put("nodefrom", node.username); - jsdata.put("nodeto", tnf.tousername); - jsdata.put("tweet", tnf.text); -// jsdata.put("pubtime", tnf.date); - //class code for tweet to be used to filter -// jsdata.put("classcode", tnf.class_code); - nodedata.put(jsdata); - } - //add node - JSONObject nd = new JSONObject(); - nd.put("name", node.username); - nd.put("group", node.group); - nd.put("id", node.id); - nd.put("size", node.size); - nd.put("catColor", node.catColor); - nd.put("catID", node.catID); - nd.put("data", nodedata); - nd.put("level", node.level); - //clusterids for the node -// JSONArray cids = new JSONArray(); -// if (node.clusterID != null) { -// for (int code : node.clusterID) { -// cids.put(code); -// } -// } else { -// cids.put(node.id); -// } -// nd.put("clusterids", cids); - //classcodes for the node -// JSONArray codes = new JSONArray(); -// for (int c : node.class_codes) { -// codes.put(c); -// } -// nd.put("classcodes", codes); - nodes.put(nd); - } catch (JSONException ex) { - ex.printStackTrace(); - } - } - alltweets.put("nodes", nodes); - alltweets.put("links", links); - } catch (JSONException ex) { - Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); - } - return alltweets; - } - - /** - * Recursively traverses the list of nodes to identify all nodes reachable from a starting node. - * @param userconnections A map containing the usernames as keys and the node information as value - * @param cur_node Node currently being processed. - * @param newnodes A list of nodes which can be reached from the current node - * @return A map of the usernames and the node information for all nodes reachable - */ - public HashMap<String,NetworkNode> GetNextHopConnections(HashMap<String,NetworkNode> userconnections,NetworkNode cur_node,HashMap<String,NetworkNode> newnodes) - { - cur_node.level = cur_node.level+1; - newnodes.put(cur_node.username,cur_node); - for(int i=0;i<cur_node.tonodes.size();i++) - { - ToNodeInfo tnf = cur_node.tonodes.get(i); - if(newnodes.containsKey(tnf.tousername)) - { - continue; - } - - HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections, userconnections.get(tnf.tousername),newnodes); - newnodes = rtnodes; - } - return newnodes; - } - - /** - * Divides a list of nodes into groups using the square root binning - * technique. If a node has size x and there are y groups in total. Then the - * group of the node is computed as ceil((sqrt(x)/sqrt(max))*y), where max is - * the size of the largest node. - * @param nodes A list of nodes - * @param max The maximum size of a node - * @param min The minimum size of a node - * @param noofclasses Number of classes into which the nodes must be classified - * @return A list of nodes along with their class - */ - public ArrayList<NetworkNode> ComputeGroupsSqrt(ArrayList<NetworkNode> nodes, int max, int min, int noofclasses) - { - ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>(); - for(int i=0;i<nodes.size();i++) - { - NetworkNode node = nodes.get(i); - int color_index = 0; - if(node.size>0) - { - color_index = (int) Math.ceil(((double)Math.sqrt(node.size)/Math.sqrt(max))*noofclasses)-1; -// node.size = color_index*6; - } - node.group = color_index; - finalnodes.add(node); - } - return finalnodes; - } - - - //DEBUG use only - public static void main(String[] args) - { - try { - CreateD3Network cdn = new CreateD3Network(); - JSONObject jobj = new JSONObject(); - JSONObject obj = new JSONObject(); - obj.put("color", "#800000"); - JSONArray ja = new JSONArray(); - ja.put("zuccotti"); - obj.put("hts", ja); - jobj.put("Group 1", obj); - obj = new JSONObject(); - obj.put("color", "#0FFF00"); - ja = new JSONArray(); - ja.put("#nypd"); - obj.put("hts", ja); - jobj.put("Group 2", obj); - String filename = "D:\\Twitter Data Analytics\\Data\\testows.json"; - JSONObject nodes = cdn.ConvertTweetsToDiffusionPath(filename,7, jobj,5); - } catch (JSONException ex) { - ex.printStackTrace(); - } - } -} diff --git a/src/Chapter5/network/ExtractUserTagNetwork.java b/src/Chapter5/network/ExtractUserTagNetwork.java deleted file mode 100644 index 43ae680..0000000 --- a/src/Chapter5/network/ExtractUserTagNetwork.java +++ /dev/null @@ -1,173 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.network; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ExtractUserTagNetwork -{ - - static final String DEF_INFILENAME = "ows.json"; - - /** - * Extracts a map of all the hashtags a user has used in his tweets resulting in a bipartite network. The frequency of each tag is also returned in the form of a map. - * @param inFilename File containing a list of tweets as JSON objects - * @return A map containing the users as keys and a map containing the hashtags they use along with their frequency. - */ - public HashMap<String,HashMap<String,Integer>> ExtractUserHashtagNetwork(String inFilename) - { - HashMap<String,HashMap<String,Integer>> usertagmap = new HashMap<String,HashMap<String,Integer>>(); - BufferedReader br = null; - try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try{ - JSONObject tweetobj = new JSONObject(temp); - String text; - String username; - HashMap<String,Integer> tags = new HashMap<String,Integer>(); - if(!tweetobj.isNull("entities")) - { - JSONObject entities = tweetobj.getJSONObject("entities"); - JSONArray hashtags; - try { - hashtags = entities.getJSONArray("hashtags"); - for(int i=0;i<hashtags.length();i++) - { - JSONObject tag = hashtags.getJSONObject(i); - String tg = tag.getString("text").toLowerCase(); - if(!tags.containsKey(tg)) - { - tags.put(tg,1); - } - else - { - tags.put(tg, tags.get(tg)+1); - } - } - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - else - if(!tweetobj.isNull("text")) - { - text = tweetobj.getString("text"); - tags = ExtractHashTags(text); - } - if(!tweetobj.isNull("user")) - { - JSONObject userobj = tweetobj.getJSONObject("user"); - username = "@"+userobj.getString("screen_name").toLowerCase(); - if(usertagmap.containsKey(username)) - { - HashMap<String,Integer> usertags = usertagmap.get(username); - Set<String> keys = tags.keySet(); - for(String k:keys) - { - if(usertags.containsKey(k)) - { - usertags.put(k, usertags.get(k)+tags.get(k)); - } - else - { - usertags.put(k, tags.get(k)); - } - } - usertagmap.put(username, usertags); - } - else - { - usertagmap.put(username, tags); - } - } - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractUserTagNetwork.class.getName()).log(Level.SEVERE, null, ex); - } - } - return usertagmap; - } - - /** - * Extracts all the hashtags mentioned in a tweet and creates a map with the frequency of their occurrence. - * @param text - * @return A map containing the hashtags as keys and their frequency as value - */ - public HashMap<String,Integer> ExtractHashTags(String text) - { - Pattern p = Pattern.compile("#[a-zA-Z0-9]+"); - Matcher m = p.matcher(text); - HashMap<String,Integer> tags = new HashMap<String,Integer>(); - while(m.find()) - { - String tag = text.substring(m.start(),m.end()).toLowerCase(); - if(!tags.containsKey(tag)) - { - tags.put(tag,1); - } - else - { - tags.put(tag, tags.get(tag)+1); - } - } - return tags; - } - - public static void main(String[] args) - { - ExtractUserTagNetwork eutn = new ExtractUserTagNetwork(); - - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - HashMap<String, HashMap<String,Integer>> usertagmap = eutn.ExtractUserHashtagNetwork(infilename); - Set<String> keys = usertagmap.keySet(); - for(String key:keys) - { - System.out.println(key); - HashMap<String,Integer> tags = usertagmap.get(key); - Set<String> tagkeys = tags.keySet(); - for(String tag:tagkeys) - { - System.out.println(tag+","+tags.get(tag)); - } - } - } -} diff --git a/src/Chapter5/support/DateInfo.java b/src/Chapter5/support/DateInfo.java deleted file mode 100644 index 9a32d4c..0000000 --- a/src/Chapter5/support/DateInfo.java +++ /dev/null @@ -1,30 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.support; - -import java.util.Date; -import java.util.HashMap; - -public class DateInfo implements Comparable -{ - public Date d; - public HashMap<String,Integer> catcounts = new HashMap<String,Integer>(); - - public int compareTo(Object o) { - DateInfo temp = (DateInfo) o; - if(temp.d.after(this.d)) - { - return 1; - } - else - if(temp.d.before(this.d)) - { - return -1; - } - else - { - return 0; - } - } -} diff --git a/src/Chapter5/support/HashTagDS.java b/src/Chapter5/support/HashTagDS.java deleted file mode 100644 index b338b6d..0000000 --- a/src/Chapter5/support/HashTagDS.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; - -/** - * - * @author shamanth - */ -public class HashTagDS -{ - public String groupname; - public String[] tags; - public String color; - -} diff --git a/src/Chapter5/support/NetworkNode.java b/src/Chapter5/support/NetworkNode.java deleted file mode 100644 index 4f662e8..0000000 --- a/src/Chapter5/support/NetworkNode.java +++ /dev/null @@ -1,49 +0,0 @@ -package Chapter5.support; - - -import java.util.ArrayList; - -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -/** - * - * @author shamanth - */ -public class NetworkNode -{ - public int id; - public String username; - public int size; - public String catColor; - public int group; -// public int[] clusterID; - public int catID; -// public double lat; -// public double lng; - public ArrayList<String> data; - public int level; - public ArrayList<Integer> class_codes; - public ArrayList<ToNodeInfo> tonodes; - - public NetworkNode Copy() - { - NetworkNode tempnode = new NetworkNode(); - tempnode.catColor = this.catColor; - tempnode.id = this.id; - tempnode.username= this.username; - tempnode.size = this.size; - tempnode.group = this.group; -// tempnode.clusterID = this.clusterID; - tempnode.catID = this.catID; -// tempnode.lat = this.lat; -// tempnode.lng = this.lng; - tempnode.data = this.data; -// tempnode.level = this.level; - tempnode.class_codes = this.class_codes; - tempnode.tonodes = this.tonodes; - return tempnode; - } -} diff --git a/src/Chapter5/support/NodeIDComparator.java b/src/Chapter5/support/NodeIDComparator.java deleted file mode 100644 index 0b41ae7..0000000 --- a/src/Chapter5/support/NodeIDComparator.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; - -import java.util.Comparator; - -/** - * - * @author shamanth - */ -public class NodeIDComparator implements Comparator -{ - - public int compare(Object o1, Object o2) { - int id1 = ((NetworkNode) o1).id; - int id2 = ((NetworkNode) o2).id; - if(id1>id2) - { - return 1; - } - else - if(id1<id2) - return -1; - else - return 0; - } - - -} diff --git a/src/Chapter5/support/NodeSizeComparator.java b/src/Chapter5/support/NodeSizeComparator.java deleted file mode 100644 index 23ecb4e..0000000 --- a/src/Chapter5/support/NodeSizeComparator.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; -import java.util.Comparator; - -/** - * - * @author shamanth - */ -public class NodeSizeComparator implements Comparator -{ - public int compare(Object o1, Object o2) - { - int size1 = ((NetworkNode) o1).size; - int size2 = ((NetworkNode) o2).size; - if(size1>size2) - { - return 1; - } - if(size1<size2) - return -1; - else - return 0; - } - -} diff --git a/src/Chapter5/support/ToNodeInfo.java b/src/Chapter5/support/ToNodeInfo.java deleted file mode 100644 index 725a10a..0000000 --- a/src/Chapter5/support/ToNodeInfo.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; - -/** - * - * @author shamanth - */ -public class ToNodeInfo -{ - public int tonodeid; - public String text; - public String tousername; - public String date; - public int class_code; - public int catID; - public String catColor; - //this is the default direction invert option. If the library adds nodes to the adjacency then that should be set to true in the client side -// public boolean direction = false; -} diff --git a/src/Chapter5/support/Tweet.java b/src/Chapter5/support/Tweet.java deleted file mode 100644 index be53166..0000000 --- a/src/Chapter5/support/Tweet.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; - -/** - * - * @author shamanth - */ -public class Tweet { - public String text; - public long id; - public double lat; - public double lng; - public String pubdate; - public String user; - public int catID; - public String catColor; -} diff --git a/src/Chapter5/text/EventSummaryExtractor.java b/src/Chapter5/text/EventSummaryExtractor.java deleted file mode 100644 index e76f42e..0000000 --- a/src/Chapter5/text/EventSummaryExtractor.java +++ /dev/null @@ -1,269 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.text; - -import Chapter5.support.DateInfo; -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class EventSummaryExtractor -{ - - final String DEF_INFILENAME = "ows.json"; - HashMap<String,ArrayList<String>> CATEGORIES = new HashMap<String,ArrayList<String>>(); - SimpleDateFormat twittersdm = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy"); - SimpleDateFormat dayhoursdm = new SimpleDateFormat("yyyy-MM-dd:HH"); -// SimpleDateFormat daysdm = new SimpleDateFormat("MM/dd/yyyy"); - SimpleDateFormat hoursdm = new SimpleDateFormat("HH"); - - /** - * - */ - public void InitializeCategories() - { - ArrayList<String> people = new ArrayList<String>(); - people.add("protesters"); - people.add("people"); - CATEGORIES.put("People",people); - ArrayList<String> police = new ArrayList<String>(); - police.add("police"); - police.add("cops"); - police.add("nypd"); - police.add("raid"); - CATEGORIES.put("Police",police); - ArrayList<String> media = new ArrayList<String>(); - media.add("press"); - media.add("news"); - media.add("media"); - CATEGORIES.put("Media",media); - ArrayList<String> city = new ArrayList<String>(); - city.add("nyc"); - city.add("zucotti"); - city.add("park"); - CATEGORIES.put("Location",city); - ArrayList<String> judiciary = new ArrayList<String>(); - judiciary.add("judge"); - judiciary.add("eviction"); - judiciary.add("order"); - judiciary.add("court"); - CATEGORIES.put("Judiciary", judiciary); - } - - /** - * - * @param filename - * @return - */ - public JSONObject ExtractCategoryTrends(String filename) - { - JSONObject result = new JSONObject(); - try { - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); - String temp = ""; - Set<String> catkeys = CATEGORIES.keySet(); - HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); - while((temp = br.readLine())!=null) - { - Date d = new Date(); - try { - JSONObject jobj = new JSONObject(temp); - //Published time - if(!jobj.isNull("created_at")) - { - String time = ""; - try { - time = jobj.getString("created_at"); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - if(time.isEmpty()) - { - continue; - } - else - { - try { - d = twittersdm.parse(time); - } catch (ParseException ex) { - continue; - } - } - } - else - if(!jobj.isNull("timestamp")) - { - long time = new Date().getTime(); - try{ - time = jobj.getLong("timestamp"); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - d = new Date(); - d.setTime(time); - } - String datestr = dayhoursdm.format(d); - String text = jobj.getString("text").toLowerCase(); -// System.out.println(text); - for(String key:catkeys) - { - ArrayList<String> words = CATEGORIES.get(key); - for(String word:words) - { - if(text.contains(word)) - { - HashMap<String,Integer> categorycount = new HashMap<String,Integer>(); - if(datecount.containsKey(datestr)) - { - categorycount = datecount.get(datestr); - } - if(categorycount.containsKey(key)) - { - categorycount.put(key, categorycount.get(key)+1); - } - else - { - categorycount.put(key, 1); - } - //update the categorycount for the specific date - datecount.put(datestr, categorycount); - break; - } - } - } - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - Set<String> datekeys = datecount.keySet(); - ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); - for(String date:datekeys) - { - Date d = null; - try { - d = dayhoursdm.parse(date); - } catch (ParseException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - if(d!=null) - { - DateInfo info = new DateInfo(); - info.d = d; - info.catcounts = datecount.get(date); - dinfos.add(info); - } - } - Collections.sort(dinfos, Collections.reverseOrder()); - try { - result.put("axisxstep", dinfos.size()-1); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("axisystep", CATEGORIES.size()-1); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - JSONArray xcoordinates = new JSONArray(); - JSONArray ycoordinates = new JSONArray(); - //now add the data and the axis labels - JSONArray axisxlabels = new JSONArray(); - JSONArray axisylabels = new JSONArray(); - JSONArray data = new JSONArray(); - for(String key:catkeys) - { - axisylabels.put(key); - } - //counters to mark the indices of the values added to data field. i is the x coordinate and j is the y coordinate - int i=0,j=0; - - for(DateInfo date:dinfos) - { - String strdate = hoursdm.format(date.d); - axisxlabels.put(strdate); - HashMap<String,Integer> catcounts = date.catcounts; - for(String key:catkeys) - { - xcoordinates.put(j); - ycoordinates.put(i++); - if(catcounts.containsKey(key)) - { - data.put(catcounts.get(key)); - } - else - { - data.put(0); - } - } - //reset the x coordinate as we move to the next y item - i=0; - j++; - } - try { - result.put("xcoordinates", xcoordinates); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("ycoordinates", ycoordinates); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("axisxlabels", axisxlabels); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("axisylabels", axisylabels); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("data", data); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - br.close(); - } catch (IOException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - return result; - } - - public static void main(String[] args) - { - EventSummaryExtractor ese = new EventSummaryExtractor(); - String infilename = ese.DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - ese.InitializeCategories(); - System.out.println(ese.ExtractCategoryTrends(infilename).toString()); - } -} diff --git a/src/Chapter5/text/ExtractTopKeywords.java b/src/Chapter5/text/ExtractTopKeywords.java deleted file mode 100644 index 8ab412a..0000000 --- a/src/Chapter5/text/ExtractTopKeywords.java +++ /dev/null @@ -1,151 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.text; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import utils.Tags; -import utils.TextUtils; - -public class ExtractTopKeywords -{ - - static final String DEF_INFILENAME = "ows.json"; - static final int DEF_K = 60; - - /** - * Extracts the most frequently occurring keywords from the tweets by processing them sequentially. Stopwords are ignored. - * @param inFilename File containing a list of tweets as JSON objects - * @param K Count of the top keywords to return - * @param ignoreHashtags If true, hashtags are not considered while counting the most frequent keywords - * @param ignoreUsernames If true, usernames are not considered while counting the most frequent keywords - * @param tu TextUtils object which handles the stopwords - * @return a JSONArray containing an array of JSONObjects. Each object contains two elements "text" and "size" referring to the word and it's frequency - */ - public JSONArray GetTopKeywords(String inFilename, int K, boolean ignoreHashtags, boolean ignoreUsernames, TextUtils tu) - { - HashMap<String, Integer> words = new HashMap<String,Integer>(); - BufferedReader br = null; - try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try{ - JSONObject tweetobj = new JSONObject(temp); - if(!tweetobj.isNull("text")) - { - String text = tweetobj.getString("text"); - //System.out.println(text); - text = text.toLowerCase().replaceAll("\\s+", " "); - /** Step 1: Tokenize tweets into individual words. and count their frequency in the corpus - * Remove stop words and special characters. Ignore user names and hashtags if the user chooses to. - */ - HashMap<String,Integer> tokens = tu.TokenizeText(text,ignoreHashtags,ignoreUsernames); - Set<String> keys = tokens.keySet(); - for(String key:keys) - { - if(words.containsKey(key)) - { - words.put(key, words.get(key)+tokens.get(key)); - } - else - { - words.put(key, tokens.get(key)); - } - } - } - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractTopKeywords.class.getName()).log(Level.SEVERE, null, ex); - } - } - Set<String> keys = words.keySet(); - ArrayList<Tags> tags = new ArrayList<Tags>(); - for(String key:keys) - { - Tags tag = new Tags(); - tag.setKey(key); - tag.setValue(words.get(key)); - tags.add(tag); - } - // Step 2: Sort the words in descending order of frequency - Collections.sort(tags, Collections.reverseOrder()); - JSONArray cloudwords = new JSONArray(); - int numwords = K; - if(tags.size()<numwords) - { - numwords = tags.size(); - } - for(int i=0;i<numwords;i++) - { - JSONObject wordfreq = new JSONObject(); - Tags tag = tags.get(i); - try{ - wordfreq.put("text", tag.getKey()); - wordfreq.put("size",tag.getValue()); - cloudwords.put(wordfreq); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - return cloudwords; - } - - public static void main(String[] args) - { - ExtractTopKeywords etk = new ExtractTopKeywords(); - - //Initialize the TextUtils class which handles all the processing of text. - TextUtils tu = new TextUtils(); - tu.LoadStopWords("C:/tweettracker/stopwords.txt"); - String infilename = DEF_INFILENAME; - int K = DEF_K; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - if(args.length>=2&&!args[1].isEmpty()) - { - try{ - K = Integer.parseInt(args[1]); - }catch(NumberFormatException ex) - { - ex.printStackTrace(); - } - } - } - System.out.println(etk.GetTopKeywords(infilename, K, false,true,tu)); - } - -} diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java deleted file mode 100644 index 2df814f..0000000 --- a/src/Chapter5/trends/ControlChartExample.java +++ /dev/null @@ -1,144 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ControlChartExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap<String,Integer> datecount = new HashMap<String,Integer>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - if(datecount.containsKey(strdate)) - { - datecount.put(strdate, datecount.get(strdate)+1); - } - else - { - datecount.put(strdate, 1); - } - } catch (JSONException ex) { - Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - DateInfo dinfo = new DateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.count = datecount.get(key); - dinfos.add(dinfo); - } - double mean = this.GetMean(dinfos); - double stddev = this.GetStandardDev(dinfos, mean); - Collections.sort(dinfos); - //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1 - for(DateInfo dinfo:dinfos) - { - try{ - JSONObject jobj = new JSONObject(); - jobj.put("date", SDM.format(dinfo.d)); - jobj.put("count", (dinfo.count-mean)/stddev); - jobj.put("mean", 0); - jobj.put("stdev+3", 3); - jobj.put("stdev-3", -3); - result.put(jobj); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public double GetStandardDev(ArrayList<DateInfo> dateinfos,double mean) - { - double intsum = 0; - int numperiods = dateinfos.size(); - for(DateInfo dinfo:dateinfos) - { - intsum+=Math.pow((dinfo.count - mean),2); - } -// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size())); - return Math.sqrt((double)intsum/numperiods); - } - - public double GetMean(ArrayList<DateInfo> dateinfos) - { - int numperiods = dateinfos.size(); - int sum = 0; - for(DateInfo dinfo:dateinfos) - { - sum +=dinfo.count; - } -// System.out.println((double)sum/numPeriods); - return ((double)sum/numperiods); - } - - public static void main(String[] args) - { - ControlChartExample cce = new ControlChartExample(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - System.out.println(cce.GenerateDataTrend(infilename)); - } - -} diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java deleted file mode 100644 index 209f4a3..0000000 --- a/src/Chapter5/trends/DateInfo.java +++ /dev/null @@ -1,29 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.util.Date; - -public class DateInfo implements Comparable -{ - public Date d; - public int count; - - public int compareTo(Object o) { - DateInfo temp = (DateInfo) o; - if(temp.d.after(this.d)) - { - return -1; - } - else - if(temp.d.before(this.d)) - { - return 1; - } - else - { - return 0; - } - } -} diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java deleted file mode 100644 index dad7f27..0000000 --- a/src/Chapter5/trends/ExtractDatasetTrend.java +++ /dev/null @@ -1,120 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ExtractDatasetTrend -{ - static final String DEF_INFILENAME = "ows.json"; - // Date pattern used to count the volume of tweets - final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap<String,Integer> datecount = new HashMap<String,Integer>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - if(datecount.containsKey(strdate)) - { - datecount.put(strdate, datecount.get(strdate)+1); - } - else - { - datecount.put(strdate, 1); - } - } catch (JSONException ex) { - Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); - } - } - /** DateInfo consists of a date string and the corresponding count. - * It also implements a Comparator for sorting by date - */ - ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - DateInfo dinfo = new DateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.count = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - // Format and return the date string and the corresponding count - for(DateInfo dinfo:dinfos) - { - try{ - JSONObject jobj = new JSONObject(); - jobj.put("date", SDM.format(dinfo.d)); - jobj.put("count", dinfo.count); - result.put(jobj); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - ExtractDatasetTrend edt = new ExtractDatasetTrend(); - - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - System.out.println(edt.GenerateDataTrend(infilename)); - } - -} diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java deleted file mode 100644 index 4a0164b..0000000 --- a/src/Chapter5/trends/SparkLineExample.java +++ /dev/null @@ -1,163 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class SparkLineExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH"); - - public JSONObject GenerateDataTrend(String inFilename, ArrayList<String> keywords) - { - BufferedReader br = null; - JSONObject result = new JSONObject(); - HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - String text = jobj.getString("text").toLowerCase(); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - for(String word:keywords) - { - if(text.contains(word)) - { - HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); - if(datecount.containsKey(strdate)) - { - wordcount = datecount.get(strdate); - } - if(wordcount.containsKey(word)) - { - wordcount.put(word, wordcount.get(word)+1); - } - else - { - wordcount.put(word, 1); - } - //update the wordcount for the specific date - datecount.put(strdate, wordcount); - } - } - } catch (JSONException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - TCDateInfo dinfo = new TCDateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.wordcount = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - JSONArray[] tseriesvals = new JSONArray[keywords.size()]; - for(int i=0;i<tseriesvals.length;i++) - { - tseriesvals[i] = new JSONArray(); - } - //prepare the output - for(TCDateInfo date:dinfos) - { - HashMap<String,Integer> wordcount = date.wordcount; - int counter=0; - for(String word:keywords) - { - if(wordcount.containsKey(word)) - { - tseriesvals[counter].put(wordcount.get(word)); - } - else - { - tseriesvals[counter].put(0); - } - counter++; - } - } - int counter=0; - for(String word:keywords) - { - try { - result.put(word, tseriesvals[counter]); - } catch (JSONException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - counter++; - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - SparkLineExample sle = new SparkLineExample(); - ArrayList<String> words = new ArrayList<String>(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - for(int i=1;i<args.length;i++) - { - if(args[i]!=null&&!args[i].isEmpty()) - { - words.add(args[i]); - } - } - } - if(words.isEmpty()) - { - words.add("#nypd"); - words.add("#ows"); - } - System.out.println(sle.GenerateDataTrend(infilename,words)); - } - -} diff --git a/src/Chapter5/trends/TCDateInfo.java b/src/Chapter5/trends/TCDateInfo.java deleted file mode 100644 index 88450e9..0000000 --- a/src/Chapter5/trends/TCDateInfo.java +++ /dev/null @@ -1,31 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.util.Date; -import java.util.HashMap; - -public class TCDateInfo implements Comparable -{ - public Date d; - public HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); - - public int compareTo(Object o) { - TCDateInfo temp = (TCDateInfo) o; - if(temp.d.after(this.d)) - { - return -1; - } - else - if(temp.d.before(this.d)) - { - return 1; - } - else - { - return 0; - } - } - -} diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java deleted file mode 100644 index 20991cd..0000000 --- a/src/Chapter5/trends/TrendComparisonExample.java +++ /dev/null @@ -1,155 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class TrendComparisonExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename, ArrayList<String> keywords) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - String text = jobj.getString("text").toLowerCase(); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - for(String word:keywords) - { - if(text.contains(word)) - { - HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); - if(datecount.containsKey(strdate)) - { - wordcount = datecount.get(strdate); - } - if(wordcount.containsKey(word)) - { - wordcount.put(word, wordcount.get(word)+1); - } - else - { - wordcount.put(word, 1); - } - //update the wordcount for the specific date - datecount.put(strdate, wordcount); - } - } - } catch (JSONException ex) { - Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - TCDateInfo dinfo = new TCDateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.wordcount = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - //prepare the output - for(TCDateInfo date:dinfos) - { - JSONObject item = new JSONObject(); - String strdate = SDM.format(date.d); - try{ - item.put("date",strdate); - HashMap<String,Integer> wordcount = date.wordcount; - for(String word:keywords) - { - if(wordcount.containsKey(word)) - { - item.put(word, wordcount.get(word)); - } - else - { - item.put(word, 0); - } - } - result.put(item); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - TrendComparisonExample tce = new TrendComparisonExample(); - ArrayList<String> words = new ArrayList<String>(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - for(int i=1;i<args.length;i++) - { - if(args[i]!=null&&!args[i].isEmpty()) - { - words.add(args[i]); - } - } - } - if(words.isEmpty()) - { - words.add("#nypd"); - words.add("#ows"); - } - System.out.println(tce.GenerateDataTrend(infilename,words)); - } - -} |