From 4f32eedd2bd49837cc297acce399c108e8b558a7 Mon Sep 17 00:00:00 2001 From: Maurice Laveaux Date: Thu, 22 May 2014 16:33:24 +0200 Subject: Removed unused source files * Removed the shitty examples. --- .../Location/LocationTranslationExample.java | 124 ---- src/Chapter2/openauthentication/OAuthExample.java | 79 --- src/Chapter2/restapi/RESTApiExample.java | 676 ------------------- src/Chapter2/restapi/RESTSearchExample.java | 311 --------- src/Chapter2/streamingapi/StreamingApiExample.java | 372 ----------- src/Chapter2/support/APIType.java | 12 - src/Chapter2/support/InfoType.java | 12 - src/Chapter2/support/Location.java | 28 - src/Chapter2/support/OAuthTokenSecret.java | 38 -- src/Chapter4/GraphElements/RetweetEdge.java | 53 -- src/Chapter4/GraphElements/UserNode.java | 34 - .../examples/BetweennessCentralityExample.java | 31 - .../examples/EigenvectorCentralityExample.java | 36 -- .../examples/InDegreeCentralityExample.java | 30 - .../examples/PageRankCentralityExample.java | 39 -- .../classification/bayes/Classification.java | 22 - src/Chapter4/classification/bayes/NBCxv.java | 60 -- .../bayes/NaiveBayesSentimentClassifier.java | 264 -------- .../classification/bayes/StopwordsList.java | 10 - src/Chapter4/classification/bayes/TestNBC.java | 49 -- .../classification/bayes/WordCountPair.java | 34 - .../graph/visualization/SimpleGraphViewer.java | 86 --- src/Chapter4/tweetlda/LDA.java | 89 --- src/Chapter4/tweetlda/PorterStemmer.java | 33 - src/Chapter4/tweetlda/Stemmer.java | 428 ------------ src/Chapter4/util/BetweennessScorer.java | 25 - src/Chapter4/util/EigenVectorScorer.java | 64 -- src/Chapter4/util/InDegreeScorer.java | 30 - src/Chapter4/util/TweetFileProcessor.java | 76 --- src/Chapter4/util/TweetFileToGraph.java | 77 --- src/Chapter5/network/CreateD3Network.java | 716 --------------------- src/Chapter5/network/ExtractUserTagNetwork.java | 173 ----- src/Chapter5/support/DateInfo.java | 30 - src/Chapter5/support/HashTagDS.java | 18 - src/Chapter5/support/NetworkNode.java | 49 -- src/Chapter5/support/NodeIDComparator.java | 32 - src/Chapter5/support/NodeSizeComparator.java | 29 - src/Chapter5/support/ToNodeInfo.java | 23 - src/Chapter5/support/Tweet.java | 21 - src/Chapter5/text/EventSummaryExtractor.java | 269 -------- src/Chapter5/text/ExtractTopKeywords.java | 151 ----- src/Chapter5/trends/ControlChartExample.java | 144 ----- src/Chapter5/trends/DateInfo.java | 29 - src/Chapter5/trends/ExtractDatasetTrend.java | 120 ---- src/Chapter5/trends/SparkLineExample.java | 163 ----- src/Chapter5/trends/TCDateInfo.java | 31 - src/Chapter5/trends/TrendComparisonExample.java | 155 ----- 47 files changed, 5375 deletions(-) delete mode 100644 src/Chapter2/Location/LocationTranslationExample.java delete mode 100644 src/Chapter2/openauthentication/OAuthExample.java delete mode 100644 src/Chapter2/restapi/RESTApiExample.java delete mode 100644 src/Chapter2/restapi/RESTSearchExample.java delete mode 100644 src/Chapter2/streamingapi/StreamingApiExample.java delete mode 100644 src/Chapter2/support/APIType.java delete mode 100644 src/Chapter2/support/InfoType.java delete mode 100644 src/Chapter2/support/Location.java delete mode 100644 src/Chapter2/support/OAuthTokenSecret.java delete mode 100644 src/Chapter4/GraphElements/RetweetEdge.java delete mode 100644 src/Chapter4/GraphElements/UserNode.java delete mode 100644 src/Chapter4/centrality/examples/BetweennessCentralityExample.java delete mode 100644 src/Chapter4/centrality/examples/EigenvectorCentralityExample.java delete mode 100644 src/Chapter4/centrality/examples/InDegreeCentralityExample.java delete mode 100644 src/Chapter4/centrality/examples/PageRankCentralityExample.java delete mode 100644 src/Chapter4/classification/bayes/Classification.java delete mode 100644 src/Chapter4/classification/bayes/NBCxv.java delete mode 100644 src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java delete mode 100644 src/Chapter4/classification/bayes/StopwordsList.java delete mode 100644 src/Chapter4/classification/bayes/TestNBC.java delete mode 100644 src/Chapter4/classification/bayes/WordCountPair.java delete mode 100644 src/Chapter4/graph/visualization/SimpleGraphViewer.java delete mode 100644 src/Chapter4/tweetlda/LDA.java delete mode 100644 src/Chapter4/tweetlda/PorterStemmer.java delete mode 100644 src/Chapter4/tweetlda/Stemmer.java delete mode 100644 src/Chapter4/util/BetweennessScorer.java delete mode 100644 src/Chapter4/util/EigenVectorScorer.java delete mode 100644 src/Chapter4/util/InDegreeScorer.java delete mode 100644 src/Chapter4/util/TweetFileProcessor.java delete mode 100644 src/Chapter4/util/TweetFileToGraph.java delete mode 100644 src/Chapter5/network/CreateD3Network.java delete mode 100644 src/Chapter5/network/ExtractUserTagNetwork.java delete mode 100644 src/Chapter5/support/DateInfo.java delete mode 100644 src/Chapter5/support/HashTagDS.java delete mode 100644 src/Chapter5/support/NetworkNode.java delete mode 100644 src/Chapter5/support/NodeIDComparator.java delete mode 100644 src/Chapter5/support/NodeSizeComparator.java delete mode 100644 src/Chapter5/support/ToNodeInfo.java delete mode 100644 src/Chapter5/support/Tweet.java delete mode 100644 src/Chapter5/text/EventSummaryExtractor.java delete mode 100644 src/Chapter5/text/ExtractTopKeywords.java delete mode 100644 src/Chapter5/trends/ControlChartExample.java delete mode 100644 src/Chapter5/trends/DateInfo.java delete mode 100644 src/Chapter5/trends/ExtractDatasetTrend.java delete mode 100644 src/Chapter5/trends/SparkLineExample.java delete mode 100644 src/Chapter5/trends/TCDateInfo.java delete mode 100644 src/Chapter5/trends/TrendComparisonExample.java diff --git a/src/Chapter2/Location/LocationTranslationExample.java b/src/Chapter2/Location/LocationTranslationExample.java deleted file mode 100644 index 69178dc..0000000 --- a/src/Chapter2/Location/LocationTranslationExample.java +++ /dev/null @@ -1,124 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter2.Location; - -import Chapter2.support.Location; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; -import java.net.MalformedURLException; -import java.net.URL; -import java.net.URLConnection; -import java.net.URLEncoder; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; - -public class LocationTranslationExample -{ - - /** - * Translates a location string to coordinates using the database or Nominatim Service - * @param loc - * @return - */ - public Location TranslateLoc(String loc) - { - if(loc!=null&&!loc.isEmpty()) - { - String encodedLoc=""; - try { - //Step 1: Encode the location name - encodedLoc = URLEncoder.encode(loc, "UTF-8"); - } catch (UnsupportedEncodingException ex) { - Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex); - } - //Step 2: Create a get request to MapQuest API with the name of the location - String url= "http://open.mapquestapi.com/nominatim/v1/search?q="+encodedLoc+"&format=json"; - String page = ReadHTML(url); - if(page!=null) - { - try{ - JSONArray results = new JSONArray(page); - if(results.length()>0) - { - //Step 3: Read and extract the coordinates of the location as a JSONObject - Location loca = new Location(results.getJSONObject(0).getDouble("lat"),results.getJSONObject(0).getDouble("lon")); - return loca; - } - }catch(JSONException ex) - { - Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - } - return null; - } - - /** - * Extracts the html content of a URL - * @param url - * @return html page - */ - public String ReadHTML(String url) - { - URLConnection conn = null; - URL theURL = null; - try - { - theURL = new URL(url); - } - catch ( MalformedURLException e) - { - System.out.println("Bad URL: " + theURL); - return null; - } - String page = ""; - try - { - conn = theURL.openConnection(); - HttpURLConnection huc = (HttpURLConnection) conn; - conn.setConnectTimeout(2000); - huc.setRequestProperty("User-Agent", "Mozilla/4.5"); - //Set your email address in the request so MapQuest knows how to reach you in the event of problems - huc.setRequestProperty("Email", "twitterdataanalytics@gmail.com"); - if(huc.getResponseCode()>=400&&huc.getResponseCode()<=404) - { - return null; - } - conn.connect(); - BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) conn.getContent())); - String temp=null; - while( (temp= bRead.readLine())!=null) - { - page = page+"\n"+temp; - } - bRead.close(); - } - catch (IOException e) { - //System.out.print("ReadHTML IO Error:" + e.getMessage()+" \n"); - return null; - } - return page; - } - - public static void main(String[] args) - { - LocationTranslationExample lte = new LocationTranslationExample(); - if(args!=null) - { - if(args.length>0) - { - for(int i=0;i Usernames = new ArrayList(); - OAuthConsumer Consumer; - - /** - * Creates a OAuthConsumer with the current consumer & user access tokens and secrets - * @return consumer - */ - public OAuthConsumer GetConsumer() - { - OAuthConsumer consumer = new DefaultOAuthConsumer(utils.Configuration.CONSUMER_KEY,utils.Configuration.CONSUMER_SECRET); - consumer.setTokenWithSecret(OAuthTokens.getAccessToken(),OAuthTokens.getAccessSecret()); - return consumer; - } - - /** - * Reads the file and loads the users in the file to be crawled - * @param filename - */ - public void ReadUsers(String filename) - { - BufferedReader br = null; - try { - br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - if(!temp.isEmpty()) - { - Usernames.add(temp); - } - } - } catch (IOException ex) { - ex.printStackTrace(); - } - finally{ - try { - br.close(); - } catch (IOException ex) { - ex.printStackTrace(); - } - } - } - - /** - * Load the User Access Token, and the User Access Secret - */ - public void LoadTwitterToken() - { - //Un-comment before release -// OAuthExample oae = new OAuthExample(); -// OAuthTokens = oae.GetUserAccessKeySecret(); - //Remove before release - OAuthTokens = OAuthExample.DEBUGUserAccessSecret(); - } - - public static void main(String[] args) - { - RESTApiExample rae = new RESTApiExample(); - rae.LoadTwitterToken(); - rae.Consumer = rae.GetConsumer(); -// System.out.println(rae.GetStatuses("twtanalyticsbk")); - System.out.println(rae.GetRateLimitStatus()); -// int apicode = InfoType.PROFILE_INFO; -// String infilename = rae.DEF_FILENAME; -// String outfilename = rae.DEF_OUTFILENAME; -// if(args!=null) -// { -// if(args.length>2) -// { -// apicode = Integer.parseInt(args[2]); -// outfilename = args[1]; -// infilename = args[0]; -// } -// if(args.length>1) -// { -// outfilename = args[1]; -// infilename = args[0]; -// } -// else -// if(args.length>0) -// { -// infilename = args[0]; -// } -// } -// rae.InitializeWriters(outfilename); -// rae.ReadUsers(infilename); -// if(apicode!=InfoType.PROFILE_INFO&&apicode!=InfoType.FOLLOWER_INFO&&apicode!=InfoType.FRIEND_INFO&&apicode!=InfoType.STATUSES_INFO) -// { -// System.out.println("Invalid API type: Use 0 for Profile, 1 for Followers, 2 for Friends, and 3 for Statuses"); -// System.exit(0); -// } -// if(rae.Usernames.size()>0) -// { -// //TO-DO: Print the possible API types and get user selection to crawl the users. -// rae.LoadTwitterToken(); -// for(String user:rae.Usernames) -// { -// if(apicode==InfoType.PROFILE_INFO) -// { -// JSONObject jobj = rae.GetProfile(user); -// if(jobj!=null&&jobj.length()==0) -// { -// rae.WriteToFile(user, jobj.toString()); -// } -// } -// else -// if(apicode==InfoType.FRIEND_INFO) -// { -// JSONArray statusarr = rae.GetFriends(user); -// if(statusarr.length()>0) -// { -// rae.WriteToFile(user, statusarr.toString()); -// } -// } -// else -// if(apicode == InfoType.FOLLOWER_INFO) -// { -// JSONArray statusarr = rae.GetFollowers(user); -// if(statusarr.length()>0) -// { -// rae.WriteToFile(user, statusarr.toString()); -// } -// } -// else -// if(apicode == InfoType.STATUSES_INFO) -// { -// JSONArray statusarr = rae.GetStatuses(user); -// if(statusarr.length()>0) -// { -// rae.GetStatuses(user); -// } -// } -// } -// } -//// now you can close the files as all the threads have finished -// rae.CleanupAfterFinish(); - } - - /** - * Retrieves the rate limit status of the application - * @return - */ - public JSONObject GetRateLimitStatus() - { - try{ - URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json"); - HttpURLConnection huc = (HttpURLConnection) url.openConnection(); - huc.setReadTimeout(5000); - Consumer.sign(huc); - huc.connect(); - BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); - StringBuffer page = new StringBuffer(); - String temp= ""; - while((temp = bRead.readLine())!=null) - { - page.append(temp); - } - bRead.close(); - return (new JSONObject(page.toString())); - } catch (JSONException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } catch (OAuthCommunicationException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } catch (OAuthMessageSignerException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } catch (OAuthExpectationFailedException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - }catch(IOException ex) - { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } - return null; - } - - /** - * Initialize the file writer - * @param path of the file - * @param outFilename name of the file - */ - public void InitializeWriters(String outFilename) { - try { - File fl = new File(outFilename); - if(!fl.exists()) - { - fl.createNewFile(); - } - /** - * Use UTF-8 encoding when saving files to avoid - * losing Unicode characters in the data - */ - OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8")); - } catch (IOException ex) { - ex.printStackTrace(); - } - } - - /** - * Close the opened filewriter to save the data - */ - public void CleanupAfterFinish() - { - try { - OutFileWriter.close(); - } catch (IOException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - - /** - * Writes the retrieved data to the output file - * @param data containing the retrived information in JSON - * @param user name of the user currently being written - */ - public void WriteToFile(String user, String data) - { - try - { - OutFileWriter.write(data); - OutFileWriter.newLine(); - } catch (IOException ex) { - ex.printStackTrace(); - } - } - - /** - * Retrives the profile information of the user - * @param username of the user whose profile needs to be retrieved - * @return the profile information as a JSONObject - */ - public JSONObject GetProfile(String username) - { - BufferedReader bRead = null; - JSONObject profile = null; - try { - System.out.println("Processing profile of "+username); - boolean flag = true; - URL url = new URL("https://api.twitter.com/1.1/users/show.json?screen_name="+username); - HttpURLConnection huc = (HttpURLConnection) url.openConnection(); - huc.setReadTimeout(5000); - // Step 2: Sign the request using the OAuth Secret - Consumer.sign(huc); - huc.connect(); - if(huc.getResponseCode()==404||huc.getResponseCode()==401) - { - System.out.println(huc.getResponseMessage()); - } - else - if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503) - { - try { - huc.disconnect(); - System.out.println(huc.getResponseMessage()); - Thread.sleep(3000); - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - } - else - // Step 3: If the requests have been exhausted, then wait until the quota is renewed - if(huc.getResponseCode()==429) - { - try { - huc.disconnect(); - Thread.sleep(this.GetWaitTime("/users/show/:id")); - flag = false; - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - } - if(!flag) - { - //recreate the connection because something went wrong the first time. - huc.connect(); - } - StringBuilder content=new StringBuilder(); - if(flag) - { - bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); - String temp= ""; - while((temp = bRead.readLine())!=null) - { - content.append(temp); - } - } - huc.disconnect(); - try { - profile = new JSONObject(content.toString()); - } catch (JSONException ex) { - ex.printStackTrace(); - } - } catch (OAuthCommunicationException ex) { - ex.printStackTrace(); - } catch (OAuthMessageSignerException ex) { - ex.printStackTrace(); - } catch (OAuthExpectationFailedException ex) { - ex.printStackTrace(); - } catch (IOException ex) { - ex.printStackTrace(); - } - return profile; - } - - /** - * Retrieves the followers of a user - * @param username the name of the user whose followers need to be retrieved - * @return a list of user objects corresponding to the followers of the user - */ - public JSONArray GetFollowers(String username) - { - BufferedReader bRead = null; - JSONArray followers = new JSONArray(); - try { - System.out.println(" followers user = "+username); - long cursor = -1; - while(true) - { - if(cursor==0) - { - break; - } - // Step 1: Create the APi request using the supplied username - URL url = new URL("https://api.twitter.com/1.1/followers/list.json?screen_name="+username+"&cursor=" + cursor); - HttpURLConnection huc = (HttpURLConnection) url.openConnection(); - huc.setReadTimeout(5000); - // Step 2: Sign the request using the OAuth Secret - Consumer.sign(huc); - huc.connect(); - if(huc.getResponseCode()==400||huc.getResponseCode()==404) - { - System.out.println(huc.getResponseMessage()); - break; - } - else - if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503||huc.getResponseCode()==504) - { - try{ - System.out.println(huc.getResponseMessage()); - huc.disconnect(); - Thread.sleep(3000); - continue; - } catch (InterruptedException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - else - // Step 3: If the requests have been exhausted, then wait until the quota is renewed - if(huc.getResponseCode()==429) - { - try { - huc.disconnect(); - Thread.sleep(this.GetWaitTime("/followers/list")); - continue; - } catch (InterruptedException ex) { - Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - // Step 4: Retrieve the followers list from Twitter - bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); - StringBuilder content = new StringBuilder(); - String temp = ""; - while((temp = bRead.readLine())!=null) - { - content.append(temp); - } - try { - JSONObject jobj = new JSONObject(content.toString()); - // Step 5: Retrieve the token for the next request - cursor = jobj.getLong("next_cursor"); - JSONArray idlist = jobj.getJSONArray("users"); - if(idlist.length()==0) - { - break; - } - for(int i=0;i queryTerms) - { - String OR_Operator = " OR "; - StringBuffer querystr = new StringBuffer(); - int count = 1; - for(String term:queryTerms) - { - if(count==1) - { - querystr.append(term); - } - else - { - querystr.append(OR_Operator).append(term); - } - } - return querystr.toString(); - } - - public static void main(String[] args) - { - RESTSearchExample rse = new RESTSearchExample(); - ArrayList queryterms = new ArrayList(); - String outfilename = rse.DEF_FILENAME; - if(args!=null) - { - if(args.length>0) - { - for(int i=0;i Keywords; - HashSet Geoboxes; - HashSet Userids; - final String CONFIG_FILE_PATH = "streaming/streaming.config"; - final String DEF_OUTPATH = "streaming/"; - - /** - * Loads the Twitter access token and secret for a user - */ - public void LoadTwitterToken() - { -// OAuthExample oae = new OAuthExample(); -// OAuthToken = oae.GetUserAccessKeySecret(); - OAuthToken = OAuthExample.DEBUGUserAccessSecret(); - } - - /** - * Creates a connection to the Streaming Filter API - * @param baseUrl the URL for Twitter Filter API - * @param outFilePath Location to place the exported file - */ - public void CreateStreamingConnection(String baseUrl, String outFilePath) - { - HttpClient httpClient = new DefaultHttpClient(); - httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, new Integer(90000)); - //Step 1: Initialize OAuth Consumer - OAuthConsumer consumer = new CommonsHttpOAuthConsumer(Configuration.CONSUMER_KEY,Configuration.CONSUMER_SECRET); - consumer.setTokenWithSecret(OAuthToken.getAccessToken(),OAuthToken.getAccessSecret()); - //Step 2: Create a new HTTP POST request and set parameters - HttpPost httppost = new HttpPost(baseUrl); - try { - httppost.setEntity(new UrlEncodedFormEntity(CreateRequestBody(), "UTF-8")); - } catch (UnsupportedEncodingException ex) { - ex.printStackTrace(); - } - try { - //Step 3: Sign the request - consumer.sign(httppost); - } catch (OAuthMessageSignerException ex) { - ex.printStackTrace(); - } catch (OAuthExpectationFailedException ex) { - ex.printStackTrace(); - } catch (OAuthCommunicationException ex) { - ex.printStackTrace(); - } - HttpResponse response; - InputStream is = null; - try { - //Step 4: Connect to the API - response = httpClient.execute(httppost); - if (response.getStatusLine().getStatusCode()!= HttpStatus.SC_OK) - { - throw new IOException("Got status " +response.getStatusLine().getStatusCode()); - } - else - { - System.out.println(OAuthToken.getAccessToken()+ ": Processing from " + baseUrl); - HttpEntity entity = response.getEntity(); - try { - is = entity.getContent(); - } catch (IOException ex) { - ex.printStackTrace(); - } catch (IllegalStateException ex) { - ex.printStackTrace(); - } - //Step 5: Process the incoming Tweet Stream - this.ProcessTwitterStream(is, outFilePath); - } - } catch (IOException ex) { - ex.printStackTrace(); - }finally { - // Abort the method, otherwise releaseConnection() will - // attempt to finish reading the never-ending response. - // These methods do not throw exceptions. - if(is!=null) - { - try { - is.close(); - } catch (IOException ex) { - ex.printStackTrace(); - } - } - } - } - - /** - * Processes a stream of tweets and writes them to a file one tweet per line. Each tweet here is represented by a JSON document. - * @param is input stream already connected to the streaming API - * @param outFilePath file to put the collected tweets in - * @throws InterruptedException - * @throws IOException - */ - public void ProcessTwitterStream(InputStream is, String outFilePath) - { - BufferedWriter bwrite = null; - try { - JSONTokener jsonTokener = new JSONTokener(new InputStreamReader(is, "UTF-8")); - ArrayList rawtweets = new ArrayList(); - int nooftweetsuploaded = 0; - while (true) { - try { - JSONObject temp = new JSONObject(jsonTokener); - rawtweets.add(temp); -// System.out.println(temp); - if (rawtweets.size() >= RECORDS_TO_PROCESS) - { - Calendar cal = Calendar.getInstance(); - String filename = outFilePath + "tweets_" + cal.getTimeInMillis() + ".json"; - bwrite = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8")); - nooftweetsuploaded += RECORDS_TO_PROCESS; - //Write the collected tweets to a file - for (JSONObject jobj : rawtweets) { - bwrite.write(jobj.toString()); - bwrite.newLine(); - } - System.out.println("Written "+nooftweetsuploaded+" records so far"); - bwrite.close(); - rawtweets.clear(); - } - } catch (JSONException ex) { - ex.printStackTrace(); - } - } - } catch (IOException ex) { - ex.printStackTrace(); - } - } - - public static void main(String[] args) - { - StreamingApiExample sae = new StreamingApiExample(); - sae.LoadTwitterToken(); - //load parameters from a TSV file - String filename = sae.CONFIG_FILE_PATH; - String outfilepath = sae.DEF_OUTPATH; - if(args!=null) - { - if(args.length>0) - { - filename = args[0]; - } - if(args.length>1) - { - File fl = new File(args[1]); - if(fl.exists()&&fl.isDirectory()) - { - outfilepath = args[1]; - } - } - } - sae.ReadParameters(filename); - sae.CreateStreamingConnection("https://stream.twitter.com/1.1/statuses/filter.json", outfilepath); - } - - /** - * Reads the file and loads the parameters to be crawled. Expects that the parameters are tab separated values and the - * @param filename - */ - public void ReadParameters(String filename) - { - BufferedReader br = null; - try { - br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); - String temp = ""; - int count = 1; - if(Userids==null) - { - Userids = new HashSet(); - } - if(Geoboxes==null) - { - Geoboxes = new HashSet(); - } - if(Keywords==null) - { - Keywords = new HashSet(); - } - while((temp = br.readLine())!=null) - { - if(!temp.isEmpty()) - { - if(count==1) - { - String[] keywords = temp.split("\t"); - HashSet temptags = new HashSet(); - for(String word:keywords) - { - if(!temptags.contains(word)) - { - temptags.add(word); - } - } - FilterKeywords(temptags); - } - else - if(count==2) - { - String[] geoboxes = temp.split("\t"); - HashSet tempboxes = new HashSet(); - for(String box:geoboxes) - { - if(!tempboxes.contains(box)) - { - tempboxes.add(box); - } - } - FilterGeoboxes(tempboxes); - } - else - if(count==3) - { - String[] userids = temp.split("\t"); - HashSet tempids = new HashSet(); - for(String id:userids) - { - if(!tempids.contains(id)) - { - tempids.add(id); - } - } - FilterUserids(tempids); - } - count++; - } - } - } catch (IOException ex) { - ex.printStackTrace(); - } - finally{ - try { - br.close(); - } catch (IOException ex) { - ex.printStackTrace(); - } - } - } - - private void FilterUserids(HashSet userids) - { - if(userids!=null) - { - int maxsize = MAX_USERS; - if(userids.size() geoboxes) - { - if(geoboxes!=null) - { - int maxsize = MAX_GEOBOXES; - if(geoboxes.size() hashtags) - { - if(hashtags!=null) - { - int maxsize = MAX_KEYWORDS; - if(hashtags.size() CreateRequestBody() - { - List params = new ArrayList(); - if(Userids != null&&Userids.size()>0) - { - params.add(CreateNameValuePair("follow", Userids)); - System.out.println("userids = "+Userids); - } - if (Geoboxes != null&&Geoboxes.size()>0) { - params.add(CreateNameValuePair("locations", Geoboxes)); - System.out.println("locations = "+Geoboxes); - - } - if (Keywords != null&&Keywords.size()>0) { - params.add(CreateNameValuePair("track", Keywords)); - System.out.println("keywords = "+Keywords); - } - return params; - } - - private NameValuePair CreateNameValuePair(String name, Collection items) - { - StringBuilder sb = new StringBuilder(); - boolean needComma = false; - for (String item : items) { - if (needComma) { - sb.append(','); - } - needComma = true; - sb.append(item); - } - return new BasicNameValuePair(name, sb.toString()); - } -} diff --git a/src/Chapter2/support/APIType.java b/src/Chapter2/support/APIType.java deleted file mode 100644 index 94449f8..0000000 --- a/src/Chapter2/support/APIType.java +++ /dev/null @@ -1,12 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter2.support; - -public class APIType -{ - public static String USER_TIMELINE = "/statuses/user_timeline"; - public static String FOLLOWERS = "/followers/list"; - public static String FRIENDS = "/friends/list"; - public static String USER_PROFILE = "/users/show"; -} diff --git a/src/Chapter2/support/InfoType.java b/src/Chapter2/support/InfoType.java deleted file mode 100644 index 42b0334..0000000 --- a/src/Chapter2/support/InfoType.java +++ /dev/null @@ -1,12 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter2.support; - -public class InfoType -{ - public static final int PROFILE_INFO = 0; - public static final int FOLLOWER_INFO = 1; - public static final int FRIEND_INFO = 2; - public static final int STATUSES_INFO = 3; -} diff --git a/src/Chapter2/support/Location.java b/src/Chapter2/support/Location.java deleted file mode 100644 index 7f6234f..0000000 --- a/src/Chapter2/support/Location.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter2.support; - -/** - * - * @author shamanth - */ -public class Location -{ - public Double latitude; - public Double longitude; - - public Location(Double lat,Double lng) - { - latitude = lat; - longitude = lng; - } - - @Override - public String toString() - { - return "Latitude: "+latitude+" & Longitude: "+longitude; - } -} diff --git a/src/Chapter2/support/OAuthTokenSecret.java b/src/Chapter2/support/OAuthTokenSecret.java deleted file mode 100644 index 8fee4a8..0000000 --- a/src/Chapter2/support/OAuthTokenSecret.java +++ /dev/null @@ -1,38 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter2.support; - -public class OAuthTokenSecret -{ - String UserAccessToken; - String UserAccessSecret; - - public String getAccessSecret() { - return UserAccessSecret; - } - - public void setAccessSecret(String AccessSecret) { - this.UserAccessSecret = AccessSecret; - } - - public String getAccessToken() { - return UserAccessToken; - } - - public void setAccessToken(String AccessToken) { - this.UserAccessToken = AccessToken; - } - - public OAuthTokenSecret(String token,String secret) - { - this.setAccessToken(token); - this.setAccessSecret(secret); - } - - @Override - public String toString() - { - return "Access Token: "+getAccessToken()+" Access Secret: "+getAccessSecret(); - } -} diff --git a/src/Chapter4/GraphElements/RetweetEdge.java b/src/Chapter4/GraphElements/RetweetEdge.java deleted file mode 100644 index 83836a0..0000000 --- a/src/Chapter4/GraphElements/RetweetEdge.java +++ /dev/null @@ -1,53 +0,0 @@ -package GraphElements; - - -public class RetweetEdge { - private UserNode to, from; - private int retweetCount; - - public RetweetEdge(UserNode to, UserNode from){ - this.to = to; - this.from = from; - retweetCount = 1; - } - - public void incrementRTCount(){ - retweetCount++; - } - - public UserNode getTo() { - return to; - } - public void setTo(UserNode to) { - this.to = to; - } - public UserNode getFrom() { - return from; - } - public void setFrom(UserNode from) { - this.from = from; - } - public int getRetweetCount() { - return retweetCount; - } - public void setRetweetCount(int retweetCount) { - this.retweetCount = retweetCount; - } - - public boolean equals(Object maybeEdge){ - if(maybeEdge instanceof RetweetEdge){ - RetweetEdge edge = (RetweetEdge) maybeEdge; - return edge.to.equals(to) && edge.from.equals(from); - } - return false; - - } - - public String toString(){ - return from + " -> " + to; - } - - public int hashCode(){ - return toString().hashCode(); - } -} diff --git a/src/Chapter4/GraphElements/UserNode.java b/src/Chapter4/GraphElements/UserNode.java deleted file mode 100644 index fba4419..0000000 --- a/src/Chapter4/GraphElements/UserNode.java +++ /dev/null @@ -1,34 +0,0 @@ -package GraphElements; - - - -public class UserNode { - private String username; - - public UserNode(String username){ - this.username = username; - } - - public String getUsername() { - return username; - } - - public void setUsername(String username) { - this.username = username; - } - - public boolean equals(Object un){ - if(un instanceof UserNode){ - return username.equals(((UserNode)un).username); - } - return false; - } - - public String toString(){ - return username; - } - - public int hashCode(){ - return username.hashCode(); - } -} diff --git a/src/Chapter4/centrality/examples/BetweennessCentralityExample.java b/src/Chapter4/centrality/examples/BetweennessCentralityExample.java deleted file mode 100644 index ab9f7e6..0000000 --- a/src/Chapter4/centrality/examples/BetweennessCentralityExample.java +++ /dev/null @@ -1,31 +0,0 @@ -package centrality.examples; - -import Chapter4.util.TweetFileToGraph; -import java.io.File; -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import edu.uci.ics.jung.algorithms.importance.BetweennessCentrality; -import edu.uci.ics.jung.graph.DirectedGraph; - -public class BetweennessCentralityExample { - public static void main(String[] args){ - - File tweetFile; - - if(args.length > 0){ - tweetFile = new File(args[0]); - } - else{ - tweetFile = new File("synthetic_retweet_network.json"); - } - - DirectedGraph retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); - - //calculate the betweenness centrality - BetweennessCentrality betweenness = new BetweennessCentrality(retweetGraph); - - betweenness.evaluate(); - betweenness.printRankings(true, true); - - } -} diff --git a/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java b/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java deleted file mode 100644 index 172dd16..0000000 --- a/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java +++ /dev/null @@ -1,36 +0,0 @@ -package centrality.examples; - -import Chapter4.util.TweetFileToGraph; -import java.io.File; -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality; -import edu.uci.ics.jung.graph.DirectedGraph; - -public class EigenvectorCentralityExample { - public static void main(String[] args){ - - File tweetFile; - - if(args.length > 0){ - tweetFile = new File(args[0]); - } - else{ - tweetFile = new File("synthetic_retweet_network.json"); - } - - DirectedGraph retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); - -// EigenVectorScorer scorer = new EigenVectorScorer(retweetGraph); -// for(UserNode node : retweetGraph.getVertices()){ -// System.out.println(node + " - " + scorer.getVertexScore(node)); -// } - - EigenvectorCentrality eig = new EigenvectorCentrality(retweetGraph); - eig.evaluate(); - - for(UserNode node : retweetGraph.getVertices()){ - System.out.println(node + " - " + eig.getVertexScore(node)); - } - } -} diff --git a/src/Chapter4/centrality/examples/InDegreeCentralityExample.java b/src/Chapter4/centrality/examples/InDegreeCentralityExample.java deleted file mode 100644 index 6a027ac..0000000 --- a/src/Chapter4/centrality/examples/InDegreeCentralityExample.java +++ /dev/null @@ -1,30 +0,0 @@ -package Chapter4.centrality.examples; - -import Chapter4.util.TweetFileToGraph; -import java.io.File; -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import edu.uci.ics.jung.graph.DirectedGraph; - -public class InDegreeCentralityExample { - - public static void main(String[] args){ - - File tweetFile; - - if(args.length > 0){ - tweetFile = new File(args[0]); - } - else{ - tweetFile = new File("synthetic_retweet_network.json"); - } - - DirectedGraph retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); - - //calculate the betweenness centrality - for(UserNode node : retweetGraph.getVertices()){ - System.out.println(node + " - " + retweetGraph.getInEdges(node).size()); - } - - } -} diff --git a/src/Chapter4/centrality/examples/PageRankCentralityExample.java b/src/Chapter4/centrality/examples/PageRankCentralityExample.java deleted file mode 100644 index dd44efd..0000000 --- a/src/Chapter4/centrality/examples/PageRankCentralityExample.java +++ /dev/null @@ -1,39 +0,0 @@ -package Chapter4.centrality.examples; - -import Chapter4.util.TweetFileToGraph; -import java.io.File; -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import edu.uci.ics.jung.algorithms.scoring.PageRank; -import edu.uci.ics.jung.graph.DirectedGraph; - -public class PageRankCentralityExample { - public static void main(String[] args){ - - File tweetFile; - - if(args.length > 0){ - tweetFile = new File(args[0]); - } - else{ - tweetFile = new File("synthetic_retweet_network.json"); - } - - DirectedGraph retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); - - - PageRank pageRank = new PageRank(retweetGraph, .5); - pageRank.evaluate(); - - for(UserNode node : retweetGraph.getVertices()){ - System.out.println(node + " - " + pageRank.getVertexScore(node)); - } - -// EigenvectorCentrality eig = new EigenvectorCentrality(retweetGraph); -// eig.evaluate(); -// -// for(UserNode node : retweetGraph.getVertices()){ -// System.out.println(node + " - " + eig.getVertexScore(node)); -// } - } -} diff --git a/src/Chapter4/classification/bayes/Classification.java b/src/Chapter4/classification/bayes/Classification.java deleted file mode 100644 index ea9aba7..0000000 --- a/src/Chapter4/classification/bayes/Classification.java +++ /dev/null @@ -1,22 +0,0 @@ -package Chapter4.classification.bayes; - -public class Classification { - private String label; - private double confidence; - - public Classification(String label, double confidence){ - this.label = label; - this.confidence = confidence; - } - - public String getLabel() { - return label; - } - public double getConfidence() { - return confidence; - } - - public String toString(){ - return "(" + label + ", " + confidence + ")"; - } -} diff --git a/src/Chapter4/classification/bayes/NBCxv.java b/src/Chapter4/classification/bayes/NBCxv.java deleted file mode 100644 index 5c48e28..0000000 --- a/src/Chapter4/classification/bayes/NBCxv.java +++ /dev/null @@ -1,60 +0,0 @@ -package Chapter4.classification.bayes; - -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import com.google.gson.JsonObject; -import com.google.gson.JsonStreamParser; - -public class NBCxv { - public static void main(String[] args){ - - String filename = args.length >= 1 ? args[0] : "owsemoticons.json"; - - ArrayList allTexts = new ArrayList(); - - try { - //read the file, and train each document - JsonStreamParser parser = new JsonStreamParser(new FileReader(filename)); - JsonObject elem; - while (parser.hasNext()) { - elem = parser.next().getAsJsonObject(); - allTexts.add(elem.get("text").getAsString()); - } - } catch (IOException e) { - e.printStackTrace(); - } - - //do 5-fold cross validation 3 times - Map> buckets; - int bucketIdx; - NaiveBayesSentimentClassifier nbsc; - for(int i = 0; i < 3; i++){ - - //randomly split the texts into 5 buckets - buckets = new HashMap>(); - //initialize the 5 buckets - for(int j = 0; j < 5; j++) buckets.put(j, new ArrayList()); - for(String text : allTexts){ - bucketIdx = (int) (Math.random()*5); - buckets.get(bucketIdx).add(text); - } - - for(int j = 0; j < 5; j++){ - //use all but j as the training, use j as the test. - nbsc = new NaiveBayesSentimentClassifier(); - for(int k = 0; k < 5; k++){ - if(k != j){ - nbsc.trainInstances(buckets.get(k)); - } - } - //test with bucket j - - } - } - - } -} diff --git a/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java b/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java deleted file mode 100644 index 923416c..0000000 --- a/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java +++ /dev/null @@ -1,264 +0,0 @@ -package Chapter4.classification.bayes; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.StringTokenizer; - -/** - * This class performs both the training and classification steps of a Naive Bayes Classifier. - * - */ -public class NaiveBayesSentimentClassifier { - //the possible sentiment labels - private static final String[] SENTIMENT_LABELS = {"happy", "sad"}; - //the tokens to look for in labeling the sentiment. - private static final String[] HAPPY_SMILEYS = {":)", ";)", ":D", ":-)", ":o)", ":-D"}; - private static final String[] SAD_SMILEYS = {":(", ":-(", ":'(", ":'-(", "D:"}; - //store these as a set for faster retrieval - private static final Set HAPPY_SMILEY_SET = new HashSet(Arrays.asList(HAPPY_SMILEYS)); - private static final Set SAD_SMILEY_SET = new HashSet(Arrays.asList(SAD_SMILEYS)); - - //counter for the number of times each word has been associated with each sentiment. - private Map sentOccurs; - //counter for the number of times we've seen each sentiment. - private Integer[] sentCount; - - public NaiveBayesSentimentClassifier(){ - //initialize the counters - sentOccurs = new HashMap(); - sentCount = new Integer[SENTIMENT_LABELS.length]; - for(int i = 0; i < SENTIMENT_LABELS.length; i++){ - sentCount[i] = 0; - } - } - - /** - * Tokenize a string. Turns string into list of words based on whitespace, then - * removes stopwords, punctuation, and reduces the word to its stem. - * @param text - * The piece of text - * @return - * Each individual word. - */ - private List getTokens(String text){ - StringTokenizer tokens = new StringTokenizer(text); - ArrayList words = new ArrayList(); - - String tmp; - StringBuilder sb; - while(tokens.hasMoreTokens()){ - sb = new StringBuilder(); - tmp = tokens.nextToken(); - tmp = tmp.toLowerCase(); - - for(char ch : tmp.toCharArray()){ - if(Character.isLetter(ch)){ - sb.append(ch); - } - } - tmp = sb.toString(); - if(tmp.length() > 0 && !StopwordsList.stopwordsSet.contains(tmp)){ - words.add(sb.toString()); - } - } - - return words; - } - - /** - * Checks if tweet has a "label" (emoticon). If so, stores the words in - * the prior. - * @param tweetText - * The text of the document to check. - */ - public void trainInstance(String tweetText){ - //see if the tweet is labeled (i.e. has a smiley) - int tweetLabel = extractLabel(tweetText); - List tokens = getTokens(tweetText); - if(tweetLabel != -1){ - //add these words to the classifier - updateClassifier(tokens, tweetLabel); - } - } - - public String printWordOccurs(int sentIndex, int topN){ - StringBuilder sb = new StringBuilder(); - - WordCountPair wpcset[] = new WordCountPair[sentOccurs.keySet().size()]; - - String s; - int t = 0; - Iterator sIter = sentOccurs.keySet().iterator(); -// int totalCount = 0; -// while(sIter.hasNext()){ -// s = sIter.next(); -// totalCount += sentOccurs.get(s)[sentIndex]; -// } - - sIter = sentOccurs.keySet().iterator(); - while(sIter.hasNext()){ - s = sIter.next(); -// wpcset[t++] = new WordCountPair(s, sentOccurs.get(s)[sentIndex] * 1.0 / totalCount); - wpcset[t++] = new WordCountPair(s, Math.sqrt(sentOccurs.get(s)[sentIndex] * 1.0 )); - } - - Arrays.sort(wpcset); - - double frac; - for(int i = 0; (i < topN || topN <= 0) && i < wpcset.length; i++){ - s = wpcset[i].getWord(); - frac = wpcset[i].getCount(); - - sb.append(s); - sb.append(":"); - sb.append(frac); - sb.append("\n"); - } - - return sb.toString(); - } - - public void trainInstances(List tweetTexts){ - for(String text : tweetTexts){ - trainInstance(text); - } - } - - /** - * Classify a tweet as happy or sad. This ignores the emoticon for demonstration purposes. - * @param tweetText - * The text of the tweet - * @return - * A Classification object that returns the sentiment of the tweet. - */ - public Classification classify(String tweetText){ - //stores the probability of each sentiment being the tweets true sentiment. - double[] labelProbs = new double[SENTIMENT_LABELS.length]; - //tokenize the string - List tokens = getTokens(tweetText); - int maxLabelIdx = 0; - for(int i = 0; i < labelProbs.length; i++){ - //calculate the probability that the tweet has that sentiment. - labelProbs[i] = calcLabelProb(tokens, i); - System.out.println(i + " -> " + labelProbs[i] ); - //keep track of the label probability - maxLabelIdx = labelProbs[i] > labelProbs[maxLabelIdx] ? i : maxLabelIdx; - } - //calc the confidence - double conf = labelProbs[maxLabelIdx]; - labelProbs[maxLabelIdx] = 0; - conf -= sumVector(labelProbs); - - return new Classification(SENTIMENT_LABELS[maxLabelIdx], conf); - } - - private int extractLabel(String tweetText){ - StringTokenizer tokens = new StringTokenizer(tweetText); - while(tokens.hasMoreTokens()){ - String token = tokens.nextToken(); - if(HAPPY_SMILEY_SET.contains(token)){ - return 0; - } - else if(SAD_SMILEY_SET.contains(token)){ - return 1; - } - } - return -1; - } - - /** - * This updates the classifier's probabilites for each word - * with the new piece of text. - * @param tokens - * The tokens in the tweet. - * @param sentIndex - * The sentiment label. - */ - private void updateClassifier(List tokens, int sentIndex){ - for(String token : tokens){ - if(sentOccurs.containsKey(token)){ - sentOccurs.get(token)[sentIndex] ++ ; - } - else{ - //make a new array and put it - Integer[] newArray = {0, 0}; - newArray[sentIndex] ++; - sentOccurs.put(token, newArray); - } - } - //update the overall document count - sentCount[sentIndex]++; - } - - /** - * The probability of the tweet having a given label. - * @param tokens - * The tokens in the tweet. - * @param sentIndex - * The probability we are testing. - * @return - * The probability the tweet has the class label indicated by "sentIndex". - */ - private double calcLabelProb(List tokens, int sentIndex){ - - //calculate the class probabilities - double[] pClass = new double[SENTIMENT_LABELS.length]; - int cSum = sumVector(sentCount); - int totalWordCount = 0; - - for(int i = 0; i < sentCount.length; i++){ - pClass[i] = sentCount[i] * 1.0 / cSum; - } - - for(String word : sentOccurs.keySet()){ - Integer[] wordCt = sentOccurs.get(word); - totalWordCount = sumVector(wordCt); - } - - - double p = 1.0; - boolean foundOne = false; - for(String token : tokens){ - if(sentOccurs.containsKey(token)){ - foundOne = true; - Integer[] probs = sentOccurs.get(token); - double pWordGivenClass = probs[sentIndex] / (double)(sumVector(probs)); - double pWord = sumVector(probs) / totalWordCount; - p *= pWordGivenClass * pClass[sentIndex] / pWord; - } - } - return foundOne ? p : 0.0; - } - - /** - * Helper function to sum the values in a 1D array. - * @param vector - * The 1D array to sum. - * @return - * The sum. - */ - private double sumVector(double[] vector){ - double sum = 0.0; - for(double d : vector) sum += d; - return sum; - } - - /** - * Helper function to sum the values in a 1D array. - * @param vector - * The 1D array to sum. - * @return - * The sum. - */ - private int sumVector(Integer[] vector){ - int sum = 0; - for(int d : vector) sum += d; - return sum; - } -} diff --git a/src/Chapter4/classification/bayes/StopwordsList.java b/src/Chapter4/classification/bayes/StopwordsList.java deleted file mode 100644 index 06edd5a..0000000 --- a/src/Chapter4/classification/bayes/StopwordsList.java +++ /dev/null @@ -1,10 +0,0 @@ -package Chapter4.classification.bayes; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.Set; - -public class StopwordsList { - private static final String[] stopwords = {"a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can", "did", "do", "does", "doing", "don", "down", "during", "each", "few", "for", "from", "further", "get", "had", "has", "have", "having", "he", "her", "here", "hers", "herself", "him", "himself", "his", "how", "i", "if", "im", "i'm", "in", "into", "is", "it", "its", "itself", "just", "me", "more", "most", "my", "myself", "no", "nor", "not", "now", "of", "off", "on", "once", "only", "or", "other", "our", "ours", "ourselves", "out", "over", "own", "rt", "s", "same", "she", "should", "so", "some", "such", "t", "than", "that", "the", "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", "up", "us", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", "yourselves"}; - public static final Set stopwordsSet = new HashSet(Arrays.asList(stopwords)); -} diff --git a/src/Chapter4/classification/bayes/TestNBC.java b/src/Chapter4/classification/bayes/TestNBC.java deleted file mode 100644 index 7e0e743..0000000 --- a/src/Chapter4/classification/bayes/TestNBC.java +++ /dev/null @@ -1,49 +0,0 @@ -package Chapter4.classification.bayes; - -import java.io.FileReader; -import java.io.IOException; - -import com.google.gson.JsonObject; -import com.google.gson.JsonStreamParser; - -public class TestNBC { - public static void main(String[] args){ - - String filename = args.length >= 1 ? args[0] : "owsemoticons.json"; - - //initialize the sentiment classifier - NaiveBayesSentimentClassifier nbsc = new NaiveBayesSentimentClassifier(); - - try { - //read the file, and train each document - JsonStreamParser parser = new JsonStreamParser(new FileReader(filename)); - JsonObject elem; - String text; - while (parser.hasNext()) { - elem = parser.next().getAsJsonObject(); - text = elem.get("text").getAsString(); - nbsc.trainInstance(text); - } - - //print out the positive and negative dictionary - System.out.println("=== Positive Dictionary ==="); - System.out.println(nbsc.printWordOccurs(0, 25)); - System.out.println("=== Negative Dictionary ==="); - System.out.println(nbsc.printWordOccurs(1, 25)); - - //now go through and classify each line as positive or negative -// parser = new JsonStreamParser(new FileReader(filename)); -// while (parser.hasNext()) { -// elem = parser.next().getAsJsonObject(); -// text = elem.get("text").getAsString(); -// Classification c = nbsc.classify(text); -// System.out.println(c + " -> " + text); -// } - System.out.println(nbsc.classify("I love new york")); - - } catch (IOException e) { - e.printStackTrace(); - } - - } -} diff --git a/src/Chapter4/classification/bayes/WordCountPair.java b/src/Chapter4/classification/bayes/WordCountPair.java deleted file mode 100644 index b96be92..0000000 --- a/src/Chapter4/classification/bayes/WordCountPair.java +++ /dev/null @@ -1,34 +0,0 @@ -package Chapter4.classification.bayes; - -public class WordCountPair implements Comparable{ - - - private String word; - private double count; - - public WordCountPair(String word, double count){ - this.word = word; - this.count = count; - } - - public int compareTo(WordCountPair arg0) { - return arg0.count - count < 0 ? -1 : 1; - } - - public String getWord() { - return word; - } - - public void setWord(String word) { - this.word = word; - } - - public double getCount() { - return count; - } - - public void setCount(int count) { - this.count = count; - } - -} diff --git a/src/Chapter4/graph/visualization/SimpleGraphViewer.java b/src/Chapter4/graph/visualization/SimpleGraphViewer.java deleted file mode 100644 index 7cb46e4..0000000 --- a/src/Chapter4/graph/visualization/SimpleGraphViewer.java +++ /dev/null @@ -1,86 +0,0 @@ -package chapter4.graph.visualization; - -import Chapter4.util.TweetFileToGraph; -import java.awt.Dimension; -import java.awt.Shape; -import java.awt.geom.Ellipse2D; -import java.io.File; - -import javax.swing.JFrame; - -import org.apache.commons.collections15.Transformer; -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import edu.uci.ics.jung.algorithms.layout.KKLayout; -import edu.uci.ics.jung.algorithms.layout.Layout; -import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality; -import edu.uci.ics.jung.graph.DirectedGraph; -import edu.uci.ics.jung.visualization.BasicVisualizationServer; - -public class SimpleGraphViewer { - public static void main(String[] args){ - - File tweetFile; - - if(args.length > 0){ - tweetFile = new File(args[0]); - } - else{ - tweetFile = new File("synthetic_retweet_network.json"); - } - - DirectedGraph retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); - - /* - * Converts a node to its string representation - */ - Transformer stringer = new Transformer(){ - public String transform(UserNode n){ - return n.toString(); - } - }; - - /* - * Calculate the centrality - */ - //calculate the betweenness centrality -// final InDegreeScorer centralityScore = new InDegreeScorer(retweetGraph); -// final BetweennessCentrality centralityScore = new BetweennessCentrality(retweetGraph); -// final PageRank centralityScore = new PageRank(retweetGraph, 0.85); - final EigenvectorCentrality centralityScore = new EigenvectorCentrality(retweetGraph); - centralityScore.evaluate(); - - double centralityMax = 0.0f; - for(UserNode node : retweetGraph.getVertices()){ - centralityMax = Math.max(centralityMax, centralityScore.getVertexScore(node)); - } - final double centralityMaxFinal = centralityMax; - - /* - * Sizes a node by some centrality measure - */ - Transformer shaper = new Transformer(){ - public Shape transform(UserNode n){ - System.out.println("User: " + n.getUsername() + " Cent: " + centralityScore.getVertexScore(n) + " Max: " + centralityMaxFinal); - double radius = 50 * (centralityScore.getVertexScore(n)) / centralityMaxFinal; - radius = Math.max(radius, 5.0f); - float fRadius = (float) radius; - return new Ellipse2D.Float(-fRadius/2, -fRadius/2, fRadius, fRadius); - } - }; - - Layout layout = new KKLayout(retweetGraph); - layout.setSize(new Dimension(500, 500)); - - BasicVisualizationServer vv = new BasicVisualizationServer(layout); - vv.setPreferredSize(new Dimension(550, 550)); - vv.getRenderContext().setVertexLabelTransformer(stringer); - vv.getRenderContext().setVertexShapeTransformer(shaper); - - JFrame jframe = new JFrame("Simple Graph View"); - jframe.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); - jframe.getContentPane().add(vv); - jframe.pack(); - jframe.setVisible(true); - } -} diff --git a/src/Chapter4/tweetlda/LDA.java b/src/Chapter4/tweetlda/LDA.java deleted file mode 100644 index ca7f9a3..0000000 --- a/src/Chapter4/tweetlda/LDA.java +++ /dev/null @@ -1,89 +0,0 @@ -package tweetlda; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import org.json.JSONObject; - -import cc.mallet.pipe.CharSequence2TokenSequence; -import cc.mallet.pipe.CharSequenceLowercase; -import cc.mallet.pipe.Pipe; -import cc.mallet.pipe.SerialPipes; -import cc.mallet.pipe.TokenSequence2FeatureSequence; -import cc.mallet.pipe.TokenSequenceRemoveStopwords; -import cc.mallet.pipe.iterator.StringArrayIterator; -import cc.mallet.topics.ParallelTopicModel; -import cc.mallet.types.Alphabet; -import cc.mallet.types.IDSorter; -import cc.mallet.types.InstanceList; - -public class LDA { - - private static final String STOP_WORDS = "stopwords.txt"; - private static final int ITERATIONS = 100; - private static final int THREADS = 4; - private static final int NUM_TOPICS = 25; - private static final int NOM_WORDS_TO_ANALYZE = 25; - - public static void main(String[] args) throws Exception { - ArrayList pipeList = new ArrayList(); - File stopwords = new File(STOP_WORDS); - - String inputFileName = args.length >= 1 ? args[0] : "testows.json"; - - File inputFile = new File(inputFileName); - - // Lowercase, tokenize, remove stopwords, stem, and convert to features - pipeList.add((Pipe) new CharSequenceLowercase()); - pipeList.add((Pipe) new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}"))); - pipeList.add((Pipe) new TokenSequenceRemoveStopwords(stopwords, "UTF-8", false, false, false)); - pipeList.add((Pipe) new PorterStemmer()); - pipeList.add((Pipe) new TokenSequence2FeatureSequence()); - - InstanceList instances = new InstanceList(new SerialPipes(pipeList)); - - BufferedReader fileReader = new BufferedReader(new FileReader(inputFile)); - LinkedList textList = new LinkedList(); - String line; - while((line = fileReader.readLine()) != null){ - JSONObject elem = new JSONObject(line); - if(elem.has("text")){ - textList.add(elem.getString("text")); - } - } - - instances.addThruPipe(new StringArrayIterator(textList.toArray(new String[textList.size()]))); - - ParallelTopicModel model = new ParallelTopicModel(NUM_TOPICS); - model.addInstances(instances); - model.setNumThreads(THREADS); - model.setNumIterations(ITERATIONS); - model.estimate(); - - // The data alphabet maps word IDs to strings - Alphabet dataAlphabet = instances.getDataAlphabet(); - - int topicIdx=0; - StringBuilder sb; - for (TreeSet set : model.getSortedWords()) { - sb = new StringBuilder().append(topicIdx); - sb.append(" - "); - int j = 0; - double sum = 0.0; - for (IDSorter s : set) { - sum += s.getWeight(); - } - for (IDSorter s : set) { - sb.append(dataAlphabet.lookupObject(s.getID())).append(":").append(s.getWeight() / sum).append(", "); - if (++j >= NOM_WORDS_TO_ANALYZE) break; - } - System.out.println(sb.append("\n").toString()); - topicIdx++; - } - } -} diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java deleted file mode 100644 index 1a7149e..0000000 --- a/src/Chapter4/tweetlda/PorterStemmer.java +++ /dev/null @@ -1,33 +0,0 @@ -package tweetlda; - -import cc.mallet.pipe.Pipe; -import cc.mallet.types.Instance; -import cc.mallet.types.TokenSequence; - -public class PorterStemmer extends Pipe { - - private static final long serialVersionUID = 154100332101873830L; - - public Instance pipe(Instance carrier){ - TokenSequence ts = (TokenSequence) carrier.getData(); - String word; - Stemmer s; - - for(int i = 0; i < ts.size(); i++){ - word = ts.get(i).getText(); - //stem the word - s = new Stemmer(); - for(char ch : word.toCharArray()){ - if(Character.isLetter(ch)){ - s.add(ch); - } - } - s.stem(); - ts.get(i).setText(s.toString()); - } - carrier.setData(ts); - - return carrier; - } - -} diff --git a/src/Chapter4/tweetlda/Stemmer.java b/src/Chapter4/tweetlda/Stemmer.java deleted file mode 100644 index f06dfc6..0000000 --- a/src/Chapter4/tweetlda/Stemmer.java +++ /dev/null @@ -1,428 +0,0 @@ -package tweetlda; - - -/* - - Porter stemmer in Java. The original paper is in - - Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, - no. 3, pp 130-137, - - See also http://www.tartarus.org/~martin/PorterStemmer - - History: - - Release 1 - - Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below. - The words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1] - is then out outside the bounds of b. - - Release 2 - - Similarly, - - Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below. - 'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and - b[j] is then outside the bounds of b. - - Release 3 - - Considerably revised 4/9/00 in the light of many helpful suggestions - from Brian Goetz of Quiotix Corporation (brian@quiotix.com). - - Release 4 - -*/ - -import java.io.*; - -/** - * Stemmer, implementing the Porter Stemming Algorithm - * - * The Stemmer class transforms a word into its root form. The input - * word can be provided a character at time (by calling add()), or at once - * by calling one of the various stem(something) methods. - */ - -class Stemmer -{ private char[] b; - private int i, /* offset into b */ - i_end, /* offset to end of stemmed word */ - j, k; - private static final int INC = 50; - /* unit of size whereby b is increased */ - public Stemmer() - { b = new char[INC]; - i = 0; - i_end = 0; - } - - /** - * Add a character to the word being stemmed. When you are finished - * adding characters, you can call stem(void) to stem the word. - */ - - public void add(char ch) - { if (i == b.length) - { char[] new_b = new char[i+INC]; - for (int c = 0; c < i; c++) new_b[c] = b[c]; - b = new_b; - } - b[i++] = ch; - } - - - /** Adds wLen characters to the word being stemmed contained in a portion - * of a char[] array. This is like repeated calls of add(char ch), but - * faster. - */ - - public void add(char[] w, int wLen) - { if (i+wLen >= b.length) - { char[] new_b = new char[i+wLen+INC]; - for (int c = 0; c < i; c++) new_b[c] = b[c]; - b = new_b; - } - for (int c = 0; c < wLen; c++) b[i++] = w[c]; - } - - /** - * After a word has been stemmed, it can be retrieved by toString(), - * or a reference to the internal buffer can be retrieved by getResultBuffer - * and getResultLength (which is generally more efficient.) - */ - public String toString() { return new String(b,0,i_end); } - - /** - * Returns the length of the word resulting from the stemming process. - */ - public int getResultLength() { return i_end; } - - /** - * Returns a reference to a character buffer containing the results of - * the stemming process. You also need to consult getResultLength() - * to determine the length of the result. - */ - public char[] getResultBuffer() { return b; } - - /* cons(i) is true <=> b[i] is a consonant. */ - - private final boolean cons(int i) - { switch (b[i]) - { case 'a': case 'e': case 'i': case 'o': case 'u': return false; - case 'y': return (i==0) ? true : !cons(i-1); - default: return true; - } - } - - /* m() measures the number of consonant sequences between 0 and j. if c is - a consonant sequence and v a vowel sequence, and <..> indicates arbitrary - presence, - - gives 0 - vc gives 1 - vcvc gives 2 - vcvcvc gives 3 - .... - */ - - private final int m() - { int n = 0; - int i = 0; - while(true) - { if (i > j) return n; - if (! cons(i)) break; i++; - } - i++; - while(true) - { while(true) - { if (i > j) return n; - if (cons(i)) break; - i++; - } - i++; - n++; - while(true) - { if (i > j) return n; - if (! cons(i)) break; - i++; - } - i++; - } - } - - /* vowelinstem() is true <=> 0,...j contains a vowel */ - - private final boolean vowelinstem() - { int i; for (i = 0; i <= j; i++) if (! cons(i)) return true; - return false; - } - - /* doublec(j) is true <=> j,(j-1) contain a double consonant. */ - - private final boolean doublec(int j) - { if (j < 1) return false; - if (b[j] != b[j-1]) return false; - return cons(j); - } - - /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant - and also if the second c is not w,x or y. this is used when trying to - restore an e at the end of a short word. e.g. - - cav(e), lov(e), hop(e), crim(e), but - snow, box, tray. - - */ - - private final boolean cvc(int i) - { if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false; - { int ch = b[i]; - if (ch == 'w' || ch == 'x' || ch == 'y') return false; - } - return true; - } - - private final boolean ends(String s) - { int l = s.length(); - int o = k-l+1; - if (o < 0) return false; - for (int i = 0; i < l; i++) if (b[o+i] != s.charAt(i)) return false; - j = k-l; - return true; - } - - /* setto(s) sets (j+1),...k to the characters in the string s, readjusting - k. */ - - private final void setto(String s) - { int l = s.length(); - int o = j+1; - for (int i = 0; i < l; i++) b[o+i] = s.charAt(i); - k = j+l; - } - - /* r(s) is used further down. */ - - private final void r(String s) { if (m() > 0) setto(s); } - - /* step1() gets rid of plurals and -ed or -ing. e.g. - - caresses -> caress - ponies -> poni - ties -> ti - caress -> caress - cats -> cat - - feed -> feed - agreed -> agree - disabled -> disable - - matting -> mat - mating -> mate - meeting -> meet - milling -> mill - messing -> mess - - meetings -> meet - - */ - - private final void step1() - { if (b[k] == 's') - { if (ends("sses")) k -= 2; else - if (ends("ies")) setto("i"); else - if (b[k-1] != 's') k--; - } - if (ends("eed")) { if (m() > 0) k--; } else - if ((ends("ed") || ends("ing")) && vowelinstem()) - { k = j; - if (ends("at")) setto("ate"); else - if (ends("bl")) setto("ble"); else - if (ends("iz")) setto("ize"); else - if (doublec(k)) - { k--; - { int ch = b[k]; - if (ch == 'l' || ch == 's' || ch == 'z') k++; - } - } - else if (m() == 1 && cvc(k)) setto("e"); - } - } - - /* step2() turns terminal y to i when there is another vowel in the stem. */ - - private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; } - - /* step3() maps double suffices to single ones. so -ization ( = -ize plus - -ation) maps to -ize etc. note that the string before the suffix must give - m() > 0. */ - - private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1]) - { - case 'a': if (ends("ational")) { r("ate"); break; } - if (ends("tional")) { r("tion"); break; } - break; - case 'c': if (ends("enci")) { r("ence"); break; } - if (ends("anci")) { r("ance"); break; } - break; - case 'e': if (ends("izer")) { r("ize"); break; } - break; - case 'l': if (ends("bli")) { r("ble"); break; } - if (ends("alli")) { r("al"); break; } - if (ends("entli")) { r("ent"); break; } - if (ends("eli")) { r("e"); break; } - if (ends("ousli")) { r("ous"); break; } - break; - case 'o': if (ends("ization")) { r("ize"); break; } - if (ends("ation")) { r("ate"); break; } - if (ends("ator")) { r("ate"); break; } - break; - case 's': if (ends("alism")) { r("al"); break; } - if (ends("iveness")) { r("ive"); break; } - if (ends("fulness")) { r("ful"); break; } - if (ends("ousness")) { r("ous"); break; } - break; - case 't': if (ends("aliti")) { r("al"); break; } - if (ends("iviti")) { r("ive"); break; } - if (ends("biliti")) { r("ble"); break; } - break; - case 'g': if (ends("logi")) { r("log"); break; } - } } - - /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */ - - private final void step4() { switch (b[k]) - { - case 'e': if (ends("icate")) { r("ic"); break; } - if (ends("ative")) { r(""); break; } - if (ends("alize")) { r("al"); break; } - break; - case 'i': if (ends("iciti")) { r("ic"); break; } - break; - case 'l': if (ends("ical")) { r("ic"); break; } - if (ends("ful")) { r(""); break; } - break; - case 's': if (ends("ness")) { r(""); break; } - break; - } } - - /* step5() takes off -ant, -ence etc., in context vcvc. */ - - private final void step5() - { if (k == 0) return; /* for Bug 1 */ switch (b[k-1]) - { case 'a': if (ends("al")) break; return; - case 'c': if (ends("ance")) break; - if (ends("ence")) break; return; - case 'e': if (ends("er")) break; return; - case 'i': if (ends("ic")) break; return; - case 'l': if (ends("able")) break; - if (ends("ible")) break; return; - case 'n': if (ends("ant")) break; - if (ends("ement")) break; - if (ends("ment")) break; - /* element etc. not stripped before the m */ - if (ends("ent")) break; return; - case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break; - /* j >= 0 fixes Bug 2 */ - if (ends("ou")) break; return; - /* takes care of -ous */ - case 's': if (ends("ism")) break; return; - case 't': if (ends("ate")) break; - if (ends("iti")) break; return; - case 'u': if (ends("ous")) break; return; - case 'v': if (ends("ive")) break; return; - case 'z': if (ends("ize")) break; return; - default: return; - } - if (m() > 1) k = j; - } - - /* step6() removes a final -e if m() > 1. */ - - private final void step6() - { j = k; - if (b[k] == 'e') - { int a = m(); - if (a > 1 || a == 1 && !cvc(k-1)) k--; - } - if (b[k] == 'l' && doublec(k) && m() > 1) k--; - } - - /** Stem the word placed into the Stemmer buffer through calls to add(). - * Returns true if the stemming process resulted in a word different - * from the input. You can retrieve the result with - * getResultLength()/getResultBuffer() or toString(). - */ - public void stem() - { k = i - 1; - if (k > 1) { step1(); step2(); step3(); step4(); step5(); step6(); } - i_end = k+1; i = 0; - } - - /** Test program for demonstrating the Stemmer. It reads text from a - * a list of files, stems each word, and writes the result to standard - * output. Note that the word stemmed is expected to be in lower case: - * forcing lower case must be done outside the Stemmer class. - * Usage: Stemmer file-name file-name ... - */ - public static void main(String[] args) - { - char[] w = new char[501]; - Stemmer s = new Stemmer(); - for (int i = 0; i < args.length; i++) - try - { - FileInputStream in = new FileInputStream(args[i]); - - try - { while(true) - - { int ch = in.read(); - if (Character.isLetter((char) ch)) - { - int j = 0; - while(true) - { ch = Character.toLowerCase((char) ch); - w[j] = (char) ch; - if (j < 500) j++; - ch = in.read(); - if (!Character.isLetter((char) ch)) - { - /* to test add(char ch) */ - for (int c = 0; c < j; c++) s.add(w[c]); - - /* or, to test add(char[] w, int j) */ - /* s.add(w, j); */ - - s.stem(); - { String u; - - /* and now, to test toString() : */ - u = s.toString(); - - /* to test getResultBuffer(), getResultLength() : */ - /* u = new String(s.getResultBuffer(), 0, s.getResultLength()); */ - - System.out.print(u); - } - break; - } - } - } - if (ch < 0) break; - System.out.print((char)ch); - } - } - catch (IOException e) - { System.out.println("error reading " + args[i]); - break; - } - } - catch (FileNotFoundException e) - { System.out.println("file " + args[i] + " not found"); - break; - } - } -} diff --git a/src/Chapter4/util/BetweennessScorer.java b/src/Chapter4/util/BetweennessScorer.java deleted file mode 100644 index 0926d34..0000000 --- a/src/Chapter4/util/BetweennessScorer.java +++ /dev/null @@ -1,25 +0,0 @@ -package util; - -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import edu.uci.ics.jung.algorithms.scoring.VertexScorer; -import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath; -import edu.uci.ics.jung.graph.Graph; -import edu.uci.ics.jung.graph.Hypergraph; - -public class BetweennessScorer implements VertexScorer{ - - public BetweennessScorer(Hypergraph graph){ - /* - * Step 1: Calculate the shortest path between each pair of nodes. - */ - DijkstraShortestPath paths = new DijkstraShortestPath((Graph) graph); -// paths.getDistance(source, target); - } - - public Double getVertexScore(UserNode arg0) { - // TODO Auto-generated method stub - return null; - } - -} diff --git a/src/Chapter4/util/EigenVectorScorer.java b/src/Chapter4/util/EigenVectorScorer.java deleted file mode 100644 index da0c1a8..0000000 --- a/src/Chapter4/util/EigenVectorScorer.java +++ /dev/null @@ -1,64 +0,0 @@ -package Chapter4.util; - -import GraphElements.RetweetEdge; -import GraphElements.UserNode; -import cern.colt.matrix.DoubleMatrix2D; -import cern.colt.matrix.impl.SparseDoubleMatrix2D; -import cern.colt.matrix.linalg.EigenvalueDecomposition; -import edu.uci.ics.jung.algorithms.scoring.VertexScorer; -import edu.uci.ics.jung.graph.Hypergraph; - -/** - * This is a Jung Node Scorer that computes the Eigenvector Centrality for each node. - */ -public class EigenVectorScorer implements VertexScorer { - - private UserNode[] users; - private DoubleMatrix2D eigenVectors; - private int dominantEigenvectorIdx; - - public EigenVectorScorer(Hypergraph graph){ - users = new UserNode[graph.getVertexCount()]; - graph.getVertices().toArray(users); - - /* Step 1: Create the adjacency matrix. - * - * An adjacency matrix is a matrix with N users and N columns, - * where N is the number of nodes in the network. - * An entry in the matrix is 1 when node i connects to node j, - * and 0 otherwise. - */ - SparseDoubleMatrix2D matrix = new SparseDoubleMatrix2D(users.length, users.length); - for(int i = 0; i < users.length; i++){ - for(int j = 0; j < users.length; j++){ - matrix.setQuick(i, j, graph.containsEdge(new RetweetEdge(users[i], users[j])) ? 1 : 0); - } - } - - /* Step 2: Find the principle eigenvector. - * For more information on eigen-decomposition please see - * http://mathworld.wolfram.com/EigenDecomposition.html - */ - EigenvalueDecomposition eig = new EigenvalueDecomposition(matrix); - DoubleMatrix2D eigenVals = eig.getD(); - eigenVectors = eig.getV(); - - dominantEigenvectorIdx = 0; - for(int i = 1; i < eigenVals.columns(); i++){ - if(eigenVals.getQuick(dominantEigenvectorIdx, dominantEigenvectorIdx) < - eigenVals.getQuick(i, i)){ - dominantEigenvectorIdx = i; - } - } - } - - public Double getVertexScore(UserNode arg0) { - for(int i = 0; i < users.length; i++){ - if(users[i].equals(arg0)){ - return Math.abs(eigenVectors.getQuick(i, dominantEigenvectorIdx)); - } - } - return null; - } - -} diff --git a/src/Chapter4/util/InDegreeScorer.java b/src/Chapter4/util/InDegreeScorer.java deleted file mode 100644 index 014adc6..0000000 --- a/src/Chapter4/util/InDegreeScorer.java +++ /dev/null @@ -1,30 +0,0 @@ -package Chapter4.util; - -import edu.uci.ics.jung.algorithms.scoring.VertexScorer; -import edu.uci.ics.jung.graph.Hypergraph; - -/** - * This is a Jung Node Scorer that computes the - * In-Degree Centrality of nodes. - */ -public class InDegreeScorer implements VertexScorer{ - - //The graph representation in JUNG. - private Hypergraph graph; - - /** - * Initialize the graph scorer. - * @param graph - * The graph we wish to score. - */ - public InDegreeScorer(Hypergraph graph){ - this.graph = graph; - } - - /** - * @return The In-Degree Centrality of the vertex. - */ - public Double getVertexScore(T node) { - return (double) graph.getInEdges(node).size(); - } -} \ No newline at end of file diff --git a/src/Chapter4/util/TweetFileProcessor.java b/src/Chapter4/util/TweetFileProcessor.java deleted file mode 100644 index 9b6b99c..0000000 --- a/src/Chapter4/util/TweetFileProcessor.java +++ /dev/null @@ -1,76 +0,0 @@ -package Chapter4.util; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.UnsupportedEncodingException; -import java.util.Iterator; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONException; -import org.json.JSONObject; - -public class TweetFileProcessor implements Iterator{ - - protected BufferedReader fileBuffer; - protected boolean endOfFile; - protected String nextLine; - - public TweetFileProcessor(File f){ - - endOfFile = false; - - InputStreamReader isr; - BufferedReader br = null; - try { - isr = new InputStreamReader(new FileInputStream(f), "UTF-8"); - br = new BufferedReader(isr); - nextLine = br.readLine(); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - endOfFile = true; - } catch (FileNotFoundException e) { - e.printStackTrace(); - endOfFile = true; - } catch (IOException e) { - e.printStackTrace(); - endOfFile = true; - } - finally{ - fileBuffer = br; - } - } - - @Override - public boolean hasNext() { - return !endOfFile; - } - - @Override - public JSONObject next() { - JSONObject obj = null; - try { - obj = new JSONObject(nextLine); - } catch (JSONException ex) { - Logger.getLogger(TweetFileProcessor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - nextLine = fileBuffer.readLine(); - if(nextLine == null){ - endOfFile = true; - } - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return obj; - } - - @Override - public void remove() throws UnsupportedOperationException{ - throw new UnsupportedOperationException(); - } -} diff --git a/src/Chapter4/util/TweetFileToGraph.java b/src/Chapter4/util/TweetFileToGraph.java deleted file mode 100644 index 6cf2e3a..0000000 --- a/src/Chapter4/util/TweetFileToGraph.java +++ /dev/null @@ -1,77 +0,0 @@ -package Chapter4.util; - -import java.io.File; - -import GraphElements.RetweetEdge; -import GraphElements.UserNode; - -import edu.uci.ics.jung.graph.DirectedGraph; -import edu.uci.ics.jung.graph.DirectedSparseGraph; -import edu.uci.ics.jung.graph.util.EdgeType; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONException; -import org.json.JSONObject; - -/** - * Some basic functionality to convert files collected - * in Chapter 2 to JUNG graphs. - */ -public class TweetFileToGraph { - - public static DirectedGraph getRetweetNetwork(File tweetFile){ - - JSONObject tmp; - - TweetFileProcessor tfp = new TweetFileProcessor(tweetFile); - DirectedSparseGraph dsg = new DirectedSparseGraph(); - - while (tfp.hasNext()){ - tmp = tfp.next(); - if(tmp==null) - { - continue; - } - //get the author - String user=null; - try { - user = tmp.getJSONObject("user").getString("screen_name"); - } catch (JSONException ex) { - Logger.getLogger(TweetFileToGraph.class.getName()).log(Level.SEVERE, null, ex); - } - if(user==null) - { - continue; - } - //get the retweeted user - try{ - JSONObject retweet = tmp.getJSONObject("retweeted_status"); - String retweeted_user = retweet.getJSONObject("user").getString("screen_name"); - - //make an edge or increment the weight if it exists. - UserNode toUser = new UserNode(retweeted_user); - UserNode fromUser = new UserNode(user); - - dsg.addVertex(toUser); - dsg.addVertex(fromUser); - - RetweetEdge edge = new RetweetEdge(toUser, fromUser); - - if(dsg.containsEdge(edge)){ - dsg.findEdge(fromUser, toUser).incrementRTCount(); - } - else{ - dsg.addEdge(edge, fromUser, toUser); - } - dsg.addEdge(edge, fromUser, toUser, EdgeType.DIRECTED); - } - catch(JSONException ex){ - //the tweet is not a retweet. this is not a problem. - } - - - } - - return dsg; - } -} diff --git a/src/Chapter5/network/CreateD3Network.java b/src/Chapter5/network/CreateD3Network.java deleted file mode 100644 index d4c25af..0000000 --- a/src/Chapter5/network/CreateD3Network.java +++ /dev/null @@ -1,716 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.network; - - -import Chapter5.support.HashTagDS; -import Chapter5.support.NetworkNode; -import Chapter5.support.NodeIDComparator; -import Chapter5.support.NodeSizeComparator; -import Chapter5.support.ToNodeInfo; -import Chapter5.support.Tweet; -import java.io.BufferedReader; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import utils.TextUtils; - -/** - * - * @author shamanth - */ -public class CreateD3Network -{ - static final String DEF_INFILENAME = "ows.json"; - private String RTPATTERN = "rt @[_a-zA-Z0-9]+"; - private final int DEFAULT_NODE_SIZE = 0; -// private final int NODE_COUNT_LIMIT = 1; - //private final String[] node_color_scheme = new String[]{"#FFFFD9","#EDF8B1","#C7E9B4","#7FCDBB","#41B6C4","#1D91C0","#225EA8","#253494","#081D58"}; - //private final String[] node_color_scheme = new String[]{"#A6BDDB","#74A9CF","#3690C0","#0570B0","#045A8D","#023858"}; - - /** - * Extracts the users who have been retweeted using the RTPATTERN - * @param text - * @return - */ - public ArrayList GetRTUsers(String text) - { - Pattern p = Pattern.compile(RTPATTERN, Pattern.CASE_INSENSITIVE); - Matcher m = p.matcher(text); - ArrayList rtusers = new ArrayList(); - while(m.find()) - { - String nuser = text.substring(m.start(),m.end()); - nuser = nuser.replaceAll("rt @|RT @", ""); -// nuser = nuser.replaceAll("RT @", ""); - rtusers.add(nuser.toLowerCase()); - } - return rtusers; - } - - /** - * Identifies the category to which the tweet belongs. Each category is defined by a group of words/hashtags - * @param tweet - * @param usercategories - * @return - */ - public int GetCategory(String tweet, HashTagDS[] usercategories) - { - HashMap categoryvotes = new HashMap(); - tweet = tweet.toLowerCase(); - int i=0; - for(HashTagDS cat:usercategories) - { - - for(String s :cat.tags) - { - if(tweet.indexOf(s)!=-1) - { - if(categoryvotes.containsKey(i)) - { - categoryvotes.put(i, categoryvotes.get(i)+1); - } - else - { - categoryvotes.put(i, 1); - } - } - } - i++; - } - Set keyset = categoryvotes.keySet(); - int maxvote = 0; - //by default the tweet will be in the first category - int maxcategoryindex = 0; - for(int key:keyset) - { - if(categoryvotes.get(key)>maxvote) - { - maxvote = categoryvotes.get(key); - maxcategoryindex = key; - } - } - return maxcategoryindex; - } - - /** - * Converts the input jsonobject containing category descriptions to an array for processing. - * @param hashtagcoll JSONObject containing the list of hashtags, color, and the topic information - * @return An array of hashtags - */ - public HashTagDS[] ConvertJSONArrayToArray(JSONObject hashtagcoll) - { - HashTagDS[] hashtags = new HashTagDS[hashtagcoll.length()]; - int j=0; - try{ - if(hashtagcoll!=null) - { - Iterator keyit = hashtagcoll.keys(); - while(keyit.hasNext()) - { - HashTagDS ht = new HashTagDS(); - JSONObject tags = (JSONObject) hashtagcoll.get((String)keyit.next()); - ht.groupname = keyit.toString(); - ht.color = tags.getString("color"); - JSONArray tagjson = tags.getJSONArray("hts"); - ht.tags = new String[tagjson.length()]; - for(int i=0;i catcount = new HashMap(); - //if the node has no tolinks then look at the node that it retweeted to decide the color of the node - for(String tweet:tnfs.data) - { - int id = this.GetCategory(tweet, hashtagarray); - if(catcount.containsKey(id)) - { - catcount.put(id, catcount.get(id)+1); - } - else - catcount.put(id, 1); - } - Set keys = catcount.keySet(); - int maxcatID = -1; - int maxcount = 0; - for(int k:keys) - { - if(maxcatID==-1) - { - maxcatID = k; - maxcount = catcount.get(k); - } - else - { - if(maxcount userconnections = new HashMap(); -// HashMap tweet_class_codes = new HashMap(); -// int tweet_class_counter = 1; - HashTagDS[] hashtagarray = ConvertJSONArrayToArray(hashtags); - BufferedReader br = null; - try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - JSONObject tweetobj; - try { - tweetobj = new JSONObject(temp); - } catch (JSONException ex) { - ex.printStackTrace(); - continue; - } - //Extract the tweet first - Tweet t = new Tweet(); - String text=""; - try { - text = TextUtils.GetCleanText(tweetobj.getString("text")).toLowerCase(); - } catch (JSONException ex) { - ex.printStackTrace(); - continue; - } - //Check that the tweet matches at least one of the topics - boolean groupmatch = false; - for(HashTagDS ht:hashtagarray) - { - String[] tags = ht.tags; - for(String tg:tags) - { - if(text.contains(tg)) - { - groupmatch = true; - break; - } - } - if(groupmatch) - { - break; - } - } - if(!groupmatch) - { - continue; - } - // - ArrayList fromusers = new ArrayList(); - if(!tweetobj.isNull("retweeted_status")) - { - JSONObject rtstatus; - try { - rtstatus = tweetobj.getJSONObject("retweeted_status"); - if(rtstatus.isNull("user")) - { - JSONObject rtuserobj = rtstatus.getJSONObject("user"); - try{ - fromusers.add(rtuserobj.get("screen_name").toString()); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - } catch (JSONException ex) { - Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); - } - } - else - { - //use the tweet text to retrieve the pattern "RT @username:" - fromusers = GetRTUsers(text); - } - if(fromusers.isEmpty()) - { - continue; - } - - //identify the class values to be applied to all the nodes and - //edges. -// String prunedtext = TextUtils.RemoveTwitterElements(text); -// Integer class_code = tweet_class_codes.get(prunedtext); -// if(class_code==null) -// { -// class_code = tweet_class_counter; -// tweet_class_codes.put(prunedtext, class_code); //set the unique id for this tweet -// tweet_class_counter++; -// } - t.text = TextUtils.RemoveRTElements(text); - if(!tweetobj.isNull("user")) - { - JSONObject userobj; - try { - userobj = tweetobj.getJSONObject("user"); - t.user = userobj.getString("screen_name").toLowerCase(); - } catch (JSONException ex) { - Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); - } - } -// try { -// t.pubdate = String.valueOf(tweetobj.get("timestamp")); -// } catch (JSONException ex) { -// Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); -// } - t.catColor = hashtagarray[t.catID].color; - //update the size of the from fromuser - int cur_level = 0; - for(int i=fromusers.size()-1;i>=0;i--) - { - String touser = ""; - if(i==0) - {//if this is the last user in the retweet sequence then use the user of the tweet as the next link - touser = t.user; - } - else - { //if there are still fromuser in the retweet chain then use them as the next link - touser = fromusers.get(i-1); - } - //don't add any selflinks - if(fromusers.get(i).equals(touser)) - { - continue; - } - NetworkNode fromuser = null; - if(userconnections.containsKey(fromusers.get(i))) - { - //from node already exists simply add this new connection to it - fromuser = userconnections.get(fromusers.get(i)); - } - else - { - //the from user was not found. add the node - fromuser = new NetworkNode(); - // fromuser.id = nodeid++; - fromuser.username = fromusers.get(i); - fromuser.tonodes = new ArrayList(); - fromuser.class_codes = new ArrayList(); - fromuser.size = DEFAULT_NODE_SIZE; - fromuser.level = cur_level; - fromuser.data = new ArrayList(); - fromuser.data.add(t.text); - //fromuser.category = ; - } -// if(!fromuser.class_codes.contains(class_code)) -// { -// //add the marker to from node if it does not have it already -// fromuser.class_codes.add(class_code); -// } - //if to node is not in the list then create it - NetworkNode tonode = null; - if(!userconnections.containsKey(touser)) - { - tonode = new NetworkNode(); - // System.out.println(touser+" "+nodeid); - // tonode.id= nodeid++; - tonode.username = touser; - tonode.tonodes= new ArrayList(); - tonode.class_codes = new ArrayList(); - tonode.catID = t.catID; - tonode.catColor = t.catColor; - tonode.size = DEFAULT_NODE_SIZE; - tonode.data= new ArrayList(); - tonode.data.add(t.text); - tonode.level = cur_level+1; - //add the classcode to the node if it doesn't already exist -// if(!tonode.class_codes.contains(class_code)) -// { -// tonode.class_codes.add(class_code); -// } - //add the touser info - userconnections.put(touser, tonode); - } - else - { - tonode = userconnections.get(touser); - tonode.data.add(t.text); - if(tonode.level keys = userconnections.keySet(); - ArrayList returnnodes = new ArrayList(); - //its +1 because nodes with size 0 are not going to be used to calculate the class - int min = DEFAULT_NODE_SIZE+1; - int max = DEFAULT_NODE_SIZE+1; - for(String k:keys) - { - NetworkNode n = userconnections.get(k); - int maxcat = GetMajorityTopicColor(n,hashtagarray); - n.catID = maxcat; - n.catColor = hashtagarray[maxcat].color; - userconnections.put(k, n); - // -// if(n.size==0) -// {//mark the node as a zero node -// n.class_codes.add(-1); -// } -// else -// { - if(n.size>max) - { - max = n.size; - } - if(n.size nodes = ComputeGroupsSqrt(returnnodes, max, min, numNodeClasses); - Collections.sort(nodes,Collections.reverseOrder(new NodeSizeComparator())); - //select how many nodes to show. - int nodes_to_visit = 0; - if(nodes.size()>=num_nodes) - { - nodes_to_visit = num_nodes; - } - else - { - nodes_to_visit = nodes.size(); - } - - HashMap prunednodes = new HashMap(); - HashMap nodeidlist = new HashMap(); - int nodeid = 0; //node nodeid counter - for(int k=0;k rtnodes = GetNextHopConnections(userconnections,nd,new HashMap()); - Set names = rtnodes.keySet(); - for(String n:names) - { - if(!prunednodes.containsKey(n)) - { - NetworkNode newnode = rtnodes.get(n); - if(newnode.size>0) - { - prunednodes.put(n, newnode); - nodeidlist.put(n, nodeid++); - } - } - } - } - - /** We now have all the nodes of the network. compute their ids sequentially - * and assign them to the respective nodes. Simultaneously compact the nodes - * of the network to remove all nodes which have not been retweeted and are - * of size 0 - */ - - Set allnodes = prunednodes.keySet(); -// System.out.println(prunednodes.size()); - ArrayList finalnodes = new ArrayList(); -// HashMap> conninfo = new HashMap>(); - for(String n:allnodes) - { - NetworkNode nd = prunednodes.get(n); - nd.id = nodeidlist.get(nd.username); - ArrayList connids = new ArrayList(); -// ArrayList compact_To_nodes = new ArrayList(); - int counter = 0; - for(ToNodeInfo tnf: nd.tonodes) - { - //user has never been retweeted. the chain terminates here, so remove it - if(nodeidlist.containsKey(tnf.tousername)) - { - tnf.tonodeid = nodeidlist.get(tnf.tousername); - connids.add(tnf.tonodeid); - nd.tonodes.set(counter, tnf); - counter++; - } - } - finalnodes.add(nd); - //store the connections to compute the clusterids later -// if(!conninfo.containsKey(nd.id)) -// { -// conninfo.put(nd.id, connids); -// } - } - //generate the clusterids -// ArrayList[] clusterids = (ArrayList[])new ArrayList[allnodes.size()]; -// Set idkeys = conninfo.keySet(); -// for(int id:idkeys) -// { -// for(int x:conninfo.get(id)) -// { -// if(clusterids[x]==null) -// { -// ArrayList toclusterid = new ArrayList(); -// toclusterid.add(id); -// clusterids[x] = toclusterid; -// } -// else -// { -// ArrayList toclusterid = clusterids[x]; -// if(!toclusterid.contains(id)) -// { -// toclusterid.add(id); -// clusterids[x] = toclusterid; -// } -// } -// } -// } - //now create the final node list with the clusterids -// for(String n:allnodes) -// { -// NetworkNode nd = prunednodes.get(n); -// ArrayList cids = clusterids[nd.id]; -// if(cids!=null) -// { -// int size = cids.size(); -// nd.clusterID = new int[size+1]; -// int counter=0; -// nd.clusterID[counter++] = nd.id; -// for(int c:cids) -// { -// nd.clusterID[counter++] = c; -// } -// } - //System.out.println(nd.class_codes.toString()); -// finalnodes.add(nd); -// } - Collections.sort(finalnodes,new NodeIDComparator()); - System.out.println(finalnodes.size()); - for(NetworkNode node:finalnodes) - { - System.out.println(node.id+" "+node.username+" "+node.level+" "+node.size+" "+node.catColor+node.data.get(0)); - } - return GetD3Structure(finalnodes); - } - - /** - * Creates a D3 representation of the nodes, consisting of two JSONArray a set of nodes and a set of links between the nodes - * @param finalnodes - * @return - */ - public JSONObject GetD3Structure(ArrayList finalnodes) - { - JSONObject alltweets = new JSONObject(); - try { - JSONArray nodes = new JSONArray(); - JSONArray links = new JSONArray(); - for (NetworkNode node : finalnodes) - { - try { - //create adjacencies - JSONArray nodedata = new JSONArray(); - for (ToNodeInfo tnf : node.tonodes) { - JSONObject jsadj = new JSONObject(); - jsadj.put("source", node.id); - jsadj.put("target", tnf.tonodeid); - //weight of the edge - jsadj.put("value", 1); - //class code is a unique id corresponding to the text - jsadj.put("data", tnf.class_code); - links.put(jsadj); - //create a data object for the node - JSONObject jsdata = new JSONObject(); - jsdata.put("tonodeid", tnf.tonodeid); - jsdata.put("nodefrom", node.username); - jsdata.put("nodeto", tnf.tousername); - jsdata.put("tweet", tnf.text); -// jsdata.put("pubtime", tnf.date); - //class code for tweet to be used to filter -// jsdata.put("classcode", tnf.class_code); - nodedata.put(jsdata); - } - //add node - JSONObject nd = new JSONObject(); - nd.put("name", node.username); - nd.put("group", node.group); - nd.put("id", node.id); - nd.put("size", node.size); - nd.put("catColor", node.catColor); - nd.put("catID", node.catID); - nd.put("data", nodedata); - nd.put("level", node.level); - //clusterids for the node -// JSONArray cids = new JSONArray(); -// if (node.clusterID != null) { -// for (int code : node.clusterID) { -// cids.put(code); -// } -// } else { -// cids.put(node.id); -// } -// nd.put("clusterids", cids); - //classcodes for the node -// JSONArray codes = new JSONArray(); -// for (int c : node.class_codes) { -// codes.put(c); -// } -// nd.put("classcodes", codes); - nodes.put(nd); - } catch (JSONException ex) { - ex.printStackTrace(); - } - } - alltweets.put("nodes", nodes); - alltweets.put("links", links); - } catch (JSONException ex) { - Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); - } - return alltweets; - } - - /** - * Recursively traverses the list of nodes to identify all nodes reachable from a starting node. - * @param userconnections A map containing the usernames as keys and the node information as value - * @param cur_node Node currently being processed. - * @param newnodes A list of nodes which can be reached from the current node - * @return A map of the usernames and the node information for all nodes reachable - */ - public HashMap GetNextHopConnections(HashMap userconnections,NetworkNode cur_node,HashMap newnodes) - { - cur_node.level = cur_node.level+1; - newnodes.put(cur_node.username,cur_node); - for(int i=0;i rtnodes = GetNextHopConnections(userconnections, userconnections.get(tnf.tousername),newnodes); - newnodes = rtnodes; - } - return newnodes; - } - - /** - * Divides a list of nodes into groups using the square root binning - * technique. If a node has size x and there are y groups in total. Then the - * group of the node is computed as ceil((sqrt(x)/sqrt(max))*y), where max is - * the size of the largest node. - * @param nodes A list of nodes - * @param max The maximum size of a node - * @param min The minimum size of a node - * @param noofclasses Number of classes into which the nodes must be classified - * @return A list of nodes along with their class - */ - public ArrayList ComputeGroupsSqrt(ArrayList nodes, int max, int min, int noofclasses) - { - ArrayList finalnodes = new ArrayList(); - for(int i=0;i0) - { - color_index = (int) Math.ceil(((double)Math.sqrt(node.size)/Math.sqrt(max))*noofclasses)-1; -// node.size = color_index*6; - } - node.group = color_index; - finalnodes.add(node); - } - return finalnodes; - } - - - //DEBUG use only - public static void main(String[] args) - { - try { - CreateD3Network cdn = new CreateD3Network(); - JSONObject jobj = new JSONObject(); - JSONObject obj = new JSONObject(); - obj.put("color", "#800000"); - JSONArray ja = new JSONArray(); - ja.put("zuccotti"); - obj.put("hts", ja); - jobj.put("Group 1", obj); - obj = new JSONObject(); - obj.put("color", "#0FFF00"); - ja = new JSONArray(); - ja.put("#nypd"); - obj.put("hts", ja); - jobj.put("Group 2", obj); - String filename = "D:\\Twitter Data Analytics\\Data\\testows.json"; - JSONObject nodes = cdn.ConvertTweetsToDiffusionPath(filename,7, jobj,5); - } catch (JSONException ex) { - ex.printStackTrace(); - } - } -} diff --git a/src/Chapter5/network/ExtractUserTagNetwork.java b/src/Chapter5/network/ExtractUserTagNetwork.java deleted file mode 100644 index 43ae680..0000000 --- a/src/Chapter5/network/ExtractUserTagNetwork.java +++ /dev/null @@ -1,173 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.network; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ExtractUserTagNetwork -{ - - static final String DEF_INFILENAME = "ows.json"; - - /** - * Extracts a map of all the hashtags a user has used in his tweets resulting in a bipartite network. The frequency of each tag is also returned in the form of a map. - * @param inFilename File containing a list of tweets as JSON objects - * @return A map containing the users as keys and a map containing the hashtags they use along with their frequency. - */ - public HashMap> ExtractUserHashtagNetwork(String inFilename) - { - HashMap> usertagmap = new HashMap>(); - BufferedReader br = null; - try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try{ - JSONObject tweetobj = new JSONObject(temp); - String text; - String username; - HashMap tags = new HashMap(); - if(!tweetobj.isNull("entities")) - { - JSONObject entities = tweetobj.getJSONObject("entities"); - JSONArray hashtags; - try { - hashtags = entities.getJSONArray("hashtags"); - for(int i=0;i usertags = usertagmap.get(username); - Set keys = tags.keySet(); - for(String k:keys) - { - if(usertags.containsKey(k)) - { - usertags.put(k, usertags.get(k)+tags.get(k)); - } - else - { - usertags.put(k, tags.get(k)); - } - } - usertagmap.put(username, usertags); - } - else - { - usertagmap.put(username, tags); - } - } - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractUserTagNetwork.class.getName()).log(Level.SEVERE, null, ex); - } - } - return usertagmap; - } - - /** - * Extracts all the hashtags mentioned in a tweet and creates a map with the frequency of their occurrence. - * @param text - * @return A map containing the hashtags as keys and their frequency as value - */ - public HashMap ExtractHashTags(String text) - { - Pattern p = Pattern.compile("#[a-zA-Z0-9]+"); - Matcher m = p.matcher(text); - HashMap tags = new HashMap(); - while(m.find()) - { - String tag = text.substring(m.start(),m.end()).toLowerCase(); - if(!tags.containsKey(tag)) - { - tags.put(tag,1); - } - else - { - tags.put(tag, tags.get(tag)+1); - } - } - return tags; - } - - public static void main(String[] args) - { - ExtractUserTagNetwork eutn = new ExtractUserTagNetwork(); - - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - HashMap> usertagmap = eutn.ExtractUserHashtagNetwork(infilename); - Set keys = usertagmap.keySet(); - for(String key:keys) - { - System.out.println(key); - HashMap tags = usertagmap.get(key); - Set tagkeys = tags.keySet(); - for(String tag:tagkeys) - { - System.out.println(tag+","+tags.get(tag)); - } - } - } -} diff --git a/src/Chapter5/support/DateInfo.java b/src/Chapter5/support/DateInfo.java deleted file mode 100644 index 9a32d4c..0000000 --- a/src/Chapter5/support/DateInfo.java +++ /dev/null @@ -1,30 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.support; - -import java.util.Date; -import java.util.HashMap; - -public class DateInfo implements Comparable -{ - public Date d; - public HashMap catcounts = new HashMap(); - - public int compareTo(Object o) { - DateInfo temp = (DateInfo) o; - if(temp.d.after(this.d)) - { - return 1; - } - else - if(temp.d.before(this.d)) - { - return -1; - } - else - { - return 0; - } - } -} diff --git a/src/Chapter5/support/HashTagDS.java b/src/Chapter5/support/HashTagDS.java deleted file mode 100644 index b338b6d..0000000 --- a/src/Chapter5/support/HashTagDS.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; - -/** - * - * @author shamanth - */ -public class HashTagDS -{ - public String groupname; - public String[] tags; - public String color; - -} diff --git a/src/Chapter5/support/NetworkNode.java b/src/Chapter5/support/NetworkNode.java deleted file mode 100644 index 4f662e8..0000000 --- a/src/Chapter5/support/NetworkNode.java +++ /dev/null @@ -1,49 +0,0 @@ -package Chapter5.support; - - -import java.util.ArrayList; - -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -/** - * - * @author shamanth - */ -public class NetworkNode -{ - public int id; - public String username; - public int size; - public String catColor; - public int group; -// public int[] clusterID; - public int catID; -// public double lat; -// public double lng; - public ArrayList data; - public int level; - public ArrayList class_codes; - public ArrayList tonodes; - - public NetworkNode Copy() - { - NetworkNode tempnode = new NetworkNode(); - tempnode.catColor = this.catColor; - tempnode.id = this.id; - tempnode.username= this.username; - tempnode.size = this.size; - tempnode.group = this.group; -// tempnode.clusterID = this.clusterID; - tempnode.catID = this.catID; -// tempnode.lat = this.lat; -// tempnode.lng = this.lng; - tempnode.data = this.data; -// tempnode.level = this.level; - tempnode.class_codes = this.class_codes; - tempnode.tonodes = this.tonodes; - return tempnode; - } -} diff --git a/src/Chapter5/support/NodeIDComparator.java b/src/Chapter5/support/NodeIDComparator.java deleted file mode 100644 index 0b41ae7..0000000 --- a/src/Chapter5/support/NodeIDComparator.java +++ /dev/null @@ -1,32 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ - -package Chapter5.support; - -import java.util.Comparator; - -/** - * - * @author shamanth - */ -public class NodeIDComparator implements Comparator -{ - - public int compare(Object o1, Object o2) { - int id1 = ((NetworkNode) o1).id; - int id2 = ((NetworkNode) o2).id; - if(id1>id2) - { - return 1; - } - else - if(id1size2) - { - return 1; - } - if(size1> CATEGORIES = new HashMap>(); - SimpleDateFormat twittersdm = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy"); - SimpleDateFormat dayhoursdm = new SimpleDateFormat("yyyy-MM-dd:HH"); -// SimpleDateFormat daysdm = new SimpleDateFormat("MM/dd/yyyy"); - SimpleDateFormat hoursdm = new SimpleDateFormat("HH"); - - /** - * - */ - public void InitializeCategories() - { - ArrayList people = new ArrayList(); - people.add("protesters"); - people.add("people"); - CATEGORIES.put("People",people); - ArrayList police = new ArrayList(); - police.add("police"); - police.add("cops"); - police.add("nypd"); - police.add("raid"); - CATEGORIES.put("Police",police); - ArrayList media = new ArrayList(); - media.add("press"); - media.add("news"); - media.add("media"); - CATEGORIES.put("Media",media); - ArrayList city = new ArrayList(); - city.add("nyc"); - city.add("zucotti"); - city.add("park"); - CATEGORIES.put("Location",city); - ArrayList judiciary = new ArrayList(); - judiciary.add("judge"); - judiciary.add("eviction"); - judiciary.add("order"); - judiciary.add("court"); - CATEGORIES.put("Judiciary", judiciary); - } - - /** - * - * @param filename - * @return - */ - public JSONObject ExtractCategoryTrends(String filename) - { - JSONObject result = new JSONObject(); - try { - BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); - String temp = ""; - Set catkeys = CATEGORIES.keySet(); - HashMap> datecount = new HashMap>(); - while((temp = br.readLine())!=null) - { - Date d = new Date(); - try { - JSONObject jobj = new JSONObject(temp); - //Published time - if(!jobj.isNull("created_at")) - { - String time = ""; - try { - time = jobj.getString("created_at"); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - if(time.isEmpty()) - { - continue; - } - else - { - try { - d = twittersdm.parse(time); - } catch (ParseException ex) { - continue; - } - } - } - else - if(!jobj.isNull("timestamp")) - { - long time = new Date().getTime(); - try{ - time = jobj.getLong("timestamp"); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - d = new Date(); - d.setTime(time); - } - String datestr = dayhoursdm.format(d); - String text = jobj.getString("text").toLowerCase(); -// System.out.println(text); - for(String key:catkeys) - { - ArrayList words = CATEGORIES.get(key); - for(String word:words) - { - if(text.contains(word)) - { - HashMap categorycount = new HashMap(); - if(datecount.containsKey(datestr)) - { - categorycount = datecount.get(datestr); - } - if(categorycount.containsKey(key)) - { - categorycount.put(key, categorycount.get(key)+1); - } - else - { - categorycount.put(key, 1); - } - //update the categorycount for the specific date - datecount.put(datestr, categorycount); - break; - } - } - } - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - Set datekeys = datecount.keySet(); - ArrayList dinfos = new ArrayList(); - for(String date:datekeys) - { - Date d = null; - try { - d = dayhoursdm.parse(date); - } catch (ParseException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - if(d!=null) - { - DateInfo info = new DateInfo(); - info.d = d; - info.catcounts = datecount.get(date); - dinfos.add(info); - } - } - Collections.sort(dinfos, Collections.reverseOrder()); - try { - result.put("axisxstep", dinfos.size()-1); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("axisystep", CATEGORIES.size()-1); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - JSONArray xcoordinates = new JSONArray(); - JSONArray ycoordinates = new JSONArray(); - //now add the data and the axis labels - JSONArray axisxlabels = new JSONArray(); - JSONArray axisylabels = new JSONArray(); - JSONArray data = new JSONArray(); - for(String key:catkeys) - { - axisylabels.put(key); - } - //counters to mark the indices of the values added to data field. i is the x coordinate and j is the y coordinate - int i=0,j=0; - - for(DateInfo date:dinfos) - { - String strdate = hoursdm.format(date.d); - axisxlabels.put(strdate); - HashMap catcounts = date.catcounts; - for(String key:catkeys) - { - xcoordinates.put(j); - ycoordinates.put(i++); - if(catcounts.containsKey(key)) - { - data.put(catcounts.get(key)); - } - else - { - data.put(0); - } - } - //reset the x coordinate as we move to the next y item - i=0; - j++; - } - try { - result.put("xcoordinates", xcoordinates); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("ycoordinates", ycoordinates); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("axisxlabels", axisxlabels); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("axisylabels", axisylabels); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - try { - result.put("data", data); - } catch (JSONException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - br.close(); - } catch (IOException ex) { - Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); - } - return result; - } - - public static void main(String[] args) - { - EventSummaryExtractor ese = new EventSummaryExtractor(); - String infilename = ese.DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - ese.InitializeCategories(); - System.out.println(ese.ExtractCategoryTrends(infilename).toString()); - } -} diff --git a/src/Chapter5/text/ExtractTopKeywords.java b/src/Chapter5/text/ExtractTopKeywords.java deleted file mode 100644 index 8ab412a..0000000 --- a/src/Chapter5/text/ExtractTopKeywords.java +++ /dev/null @@ -1,151 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.text; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import utils.Tags; -import utils.TextUtils; - -public class ExtractTopKeywords -{ - - static final String DEF_INFILENAME = "ows.json"; - static final int DEF_K = 60; - - /** - * Extracts the most frequently occurring keywords from the tweets by processing them sequentially. Stopwords are ignored. - * @param inFilename File containing a list of tweets as JSON objects - * @param K Count of the top keywords to return - * @param ignoreHashtags If true, hashtags are not considered while counting the most frequent keywords - * @param ignoreUsernames If true, usernames are not considered while counting the most frequent keywords - * @param tu TextUtils object which handles the stopwords - * @return a JSONArray containing an array of JSONObjects. Each object contains two elements "text" and "size" referring to the word and it's frequency - */ - public JSONArray GetTopKeywords(String inFilename, int K, boolean ignoreHashtags, boolean ignoreUsernames, TextUtils tu) - { - HashMap words = new HashMap(); - BufferedReader br = null; - try{ - br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try{ - JSONObject tweetobj = new JSONObject(temp); - if(!tweetobj.isNull("text")) - { - String text = tweetobj.getString("text"); - //System.out.println(text); - text = text.toLowerCase().replaceAll("\\s+", " "); - /** Step 1: Tokenize tweets into individual words. and count their frequency in the corpus - * Remove stop words and special characters. Ignore user names and hashtags if the user chooses to. - */ - HashMap tokens = tu.TokenizeText(text,ignoreHashtags,ignoreUsernames); - Set keys = tokens.keySet(); - for(String key:keys) - { - if(words.containsKey(key)) - { - words.put(key, words.get(key)+tokens.get(key)); - } - else - { - words.put(key, tokens.get(key)); - } - } - } - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractTopKeywords.class.getName()).log(Level.SEVERE, null, ex); - } - } - Set keys = words.keySet(); - ArrayList tags = new ArrayList(); - for(String key:keys) - { - Tags tag = new Tags(); - tag.setKey(key); - tag.setValue(words.get(key)); - tags.add(tag); - } - // Step 2: Sort the words in descending order of frequency - Collections.sort(tags, Collections.reverseOrder()); - JSONArray cloudwords = new JSONArray(); - int numwords = K; - if(tags.size()=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - if(args.length>=2&&!args[1].isEmpty()) - { - try{ - K = Integer.parseInt(args[1]); - }catch(NumberFormatException ex) - { - ex.printStackTrace(); - } - } - } - System.out.println(etk.GetTopKeywords(infilename, K, false,true,tu)); - } - -} diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java deleted file mode 100644 index 2df814f..0000000 --- a/src/Chapter5/trends/ControlChartExample.java +++ /dev/null @@ -1,144 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ControlChartExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap datecount = new HashMap(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - if(datecount.containsKey(strdate)) - { - datecount.put(strdate, datecount.get(strdate)+1); - } - else - { - datecount.put(strdate, 1); - } - } catch (JSONException ex) { - Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - ArrayList dinfos = new ArrayList(); - Set keys = datecount.keySet(); - for(String key:keys) - { - DateInfo dinfo = new DateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.count = datecount.get(key); - dinfos.add(dinfo); - } - double mean = this.GetMean(dinfos); - double stddev = this.GetStandardDev(dinfos, mean); - Collections.sort(dinfos); - //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1 - for(DateInfo dinfo:dinfos) - { - try{ - JSONObject jobj = new JSONObject(); - jobj.put("date", SDM.format(dinfo.d)); - jobj.put("count", (dinfo.count-mean)/stddev); - jobj.put("mean", 0); - jobj.put("stdev+3", 3); - jobj.put("stdev-3", -3); - result.put(jobj); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public double GetStandardDev(ArrayList dateinfos,double mean) - { - double intsum = 0; - int numperiods = dateinfos.size(); - for(DateInfo dinfo:dateinfos) - { - intsum+=Math.pow((dinfo.count - mean),2); - } -// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size())); - return Math.sqrt((double)intsum/numperiods); - } - - public double GetMean(ArrayList dateinfos) - { - int numperiods = dateinfos.size(); - int sum = 0; - for(DateInfo dinfo:dateinfos) - { - sum +=dinfo.count; - } -// System.out.println((double)sum/numPeriods); - return ((double)sum/numperiods); - } - - public static void main(String[] args) - { - ControlChartExample cce = new ControlChartExample(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - System.out.println(cce.GenerateDataTrend(infilename)); - } - -} diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java deleted file mode 100644 index 209f4a3..0000000 --- a/src/Chapter5/trends/DateInfo.java +++ /dev/null @@ -1,29 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.util.Date; - -public class DateInfo implements Comparable -{ - public Date d; - public int count; - - public int compareTo(Object o) { - DateInfo temp = (DateInfo) o; - if(temp.d.after(this.d)) - { - return -1; - } - else - if(temp.d.before(this.d)) - { - return 1; - } - else - { - return 0; - } - } -} diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java deleted file mode 100644 index dad7f27..0000000 --- a/src/Chapter5/trends/ExtractDatasetTrend.java +++ /dev/null @@ -1,120 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ExtractDatasetTrend -{ - static final String DEF_INFILENAME = "ows.json"; - // Date pattern used to count the volume of tweets - final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap datecount = new HashMap(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - if(datecount.containsKey(strdate)) - { - datecount.put(strdate, datecount.get(strdate)+1); - } - else - { - datecount.put(strdate, 1); - } - } catch (JSONException ex) { - Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); - } - } - /** DateInfo consists of a date string and the corresponding count. - * It also implements a Comparator for sorting by date - */ - ArrayList dinfos = new ArrayList(); - Set keys = datecount.keySet(); - for(String key:keys) - { - DateInfo dinfo = new DateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.count = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - // Format and return the date string and the corresponding count - for(DateInfo dinfo:dinfos) - { - try{ - JSONObject jobj = new JSONObject(); - jobj.put("date", SDM.format(dinfo.d)); - jobj.put("count", dinfo.count); - result.put(jobj); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - ExtractDatasetTrend edt = new ExtractDatasetTrend(); - - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - System.out.println(edt.GenerateDataTrend(infilename)); - } - -} diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java deleted file mode 100644 index 4a0164b..0000000 --- a/src/Chapter5/trends/SparkLineExample.java +++ /dev/null @@ -1,163 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class SparkLineExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH"); - - public JSONObject GenerateDataTrend(String inFilename, ArrayList keywords) - { - BufferedReader br = null; - JSONObject result = new JSONObject(); - HashMap> datecount = new HashMap>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - String text = jobj.getString("text").toLowerCase(); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - for(String word:keywords) - { - if(text.contains(word)) - { - HashMap wordcount = new HashMap(); - if(datecount.containsKey(strdate)) - { - wordcount = datecount.get(strdate); - } - if(wordcount.containsKey(word)) - { - wordcount.put(word, wordcount.get(word)+1); - } - else - { - wordcount.put(word, 1); - } - //update the wordcount for the specific date - datecount.put(strdate, wordcount); - } - } - } catch (JSONException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - ArrayList dinfos = new ArrayList(); - Set keys = datecount.keySet(); - for(String key:keys) - { - TCDateInfo dinfo = new TCDateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.wordcount = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - JSONArray[] tseriesvals = new JSONArray[keywords.size()]; - for(int i=0;i wordcount = date.wordcount; - int counter=0; - for(String word:keywords) - { - if(wordcount.containsKey(word)) - { - tseriesvals[counter].put(wordcount.get(word)); - } - else - { - tseriesvals[counter].put(0); - } - counter++; - } - } - int counter=0; - for(String word:keywords) - { - try { - result.put(word, tseriesvals[counter]); - } catch (JSONException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - counter++; - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - SparkLineExample sle = new SparkLineExample(); - ArrayList words = new ArrayList(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - for(int i=1;i wordcount = new HashMap(); - - public int compareTo(Object o) { - TCDateInfo temp = (TCDateInfo) o; - if(temp.d.after(this.d)) - { - return -1; - } - else - if(temp.d.before(this.d)) - { - return 1; - } - else - { - return 0; - } - } - -} diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java deleted file mode 100644 index 20991cd..0000000 --- a/src/Chapter5/trends/TrendComparisonExample.java +++ /dev/null @@ -1,155 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class TrendComparisonExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename, ArrayList keywords) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap> datecount = new HashMap>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - String text = jobj.getString("text").toLowerCase(); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - for(String word:keywords) - { - if(text.contains(word)) - { - HashMap wordcount = new HashMap(); - if(datecount.containsKey(strdate)) - { - wordcount = datecount.get(strdate); - } - if(wordcount.containsKey(word)) - { - wordcount.put(word, wordcount.get(word)+1); - } - else - { - wordcount.put(word, 1); - } - //update the wordcount for the specific date - datecount.put(strdate, wordcount); - } - } - } catch (JSONException ex) { - Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - ArrayList dinfos = new ArrayList(); - Set keys = datecount.keySet(); - for(String key:keys) - { - TCDateInfo dinfo = new TCDateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.wordcount = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - //prepare the output - for(TCDateInfo date:dinfos) - { - JSONObject item = new JSONObject(); - String strdate = SDM.format(date.d); - try{ - item.put("date",strdate); - HashMap wordcount = date.wordcount; - for(String word:keywords) - { - if(wordcount.containsKey(word)) - { - item.put(word, wordcount.get(word)); - } - else - { - item.put(word, 0); - } - } - result.put(item); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - TrendComparisonExample tce = new TrendComparisonExample(); - ArrayList words = new ArrayList(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - for(int i=1;i