summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaurice Laveaux <m.laveaux@student.tue.nl>2014-05-22 16:33:24 +0200
committerMaurice Laveaux <m.laveaux@student.tue.nl>2014-05-22 16:33:24 +0200
commit4f32eedd2bd49837cc297acce399c108e8b558a7 (patch)
treee80ceb3fdf38db9552b52bd7f0c6b209c6c4bc28
parent1da00321db0aa8c412c3ff1dff5734962ee49240 (diff)
downloadTwitterDataAnalytics-4f32eedd2bd49837cc297acce399c108e8b558a7.tar.gz
Removed unused source files
* Removed the shitty examples.
-rw-r--r--src/Chapter2/Location/LocationTranslationExample.java124
-rw-r--r--src/Chapter2/openauthentication/OAuthExample.java79
-rw-r--r--src/Chapter2/restapi/RESTApiExample.java676
-rw-r--r--src/Chapter2/restapi/RESTSearchExample.java311
-rw-r--r--src/Chapter2/streamingapi/StreamingApiExample.java372
-rw-r--r--src/Chapter2/support/APIType.java12
-rw-r--r--src/Chapter2/support/InfoType.java12
-rw-r--r--src/Chapter2/support/Location.java28
-rw-r--r--src/Chapter2/support/OAuthTokenSecret.java38
-rw-r--r--src/Chapter4/GraphElements/RetweetEdge.java53
-rw-r--r--src/Chapter4/GraphElements/UserNode.java34
-rw-r--r--src/Chapter4/centrality/examples/BetweennessCentralityExample.java31
-rw-r--r--src/Chapter4/centrality/examples/EigenvectorCentralityExample.java36
-rw-r--r--src/Chapter4/centrality/examples/InDegreeCentralityExample.java30
-rw-r--r--src/Chapter4/centrality/examples/PageRankCentralityExample.java39
-rw-r--r--src/Chapter4/classification/bayes/Classification.java22
-rw-r--r--src/Chapter4/classification/bayes/NBCxv.java60
-rw-r--r--src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java264
-rw-r--r--src/Chapter4/classification/bayes/StopwordsList.java10
-rw-r--r--src/Chapter4/classification/bayes/TestNBC.java49
-rw-r--r--src/Chapter4/classification/bayes/WordCountPair.java34
-rw-r--r--src/Chapter4/graph/visualization/SimpleGraphViewer.java86
-rw-r--r--src/Chapter4/tweetlda/LDA.java89
-rw-r--r--src/Chapter4/tweetlda/PorterStemmer.java33
-rw-r--r--src/Chapter4/tweetlda/Stemmer.java428
-rw-r--r--src/Chapter4/util/BetweennessScorer.java25
-rw-r--r--src/Chapter4/util/EigenVectorScorer.java64
-rw-r--r--src/Chapter4/util/InDegreeScorer.java30
-rw-r--r--src/Chapter4/util/TweetFileProcessor.java76
-rw-r--r--src/Chapter4/util/TweetFileToGraph.java77
-rw-r--r--src/Chapter5/network/CreateD3Network.java716
-rw-r--r--src/Chapter5/network/ExtractUserTagNetwork.java173
-rw-r--r--src/Chapter5/support/DateInfo.java30
-rw-r--r--src/Chapter5/support/HashTagDS.java18
-rw-r--r--src/Chapter5/support/NetworkNode.java49
-rw-r--r--src/Chapter5/support/NodeIDComparator.java32
-rw-r--r--src/Chapter5/support/NodeSizeComparator.java29
-rw-r--r--src/Chapter5/support/ToNodeInfo.java23
-rw-r--r--src/Chapter5/support/Tweet.java21
-rw-r--r--src/Chapter5/text/EventSummaryExtractor.java269
-rw-r--r--src/Chapter5/text/ExtractTopKeywords.java151
-rw-r--r--src/Chapter5/trends/ControlChartExample.java144
-rw-r--r--src/Chapter5/trends/DateInfo.java29
-rw-r--r--src/Chapter5/trends/ExtractDatasetTrend.java120
-rw-r--r--src/Chapter5/trends/SparkLineExample.java163
-rw-r--r--src/Chapter5/trends/TCDateInfo.java31
-rw-r--r--src/Chapter5/trends/TrendComparisonExample.java155
47 files changed, 0 insertions, 5375 deletions
diff --git a/src/Chapter2/Location/LocationTranslationExample.java b/src/Chapter2/Location/LocationTranslationExample.java
deleted file mode 100644
index 69178dc..0000000
--- a/src/Chapter2/Location/LocationTranslationExample.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.Location;
-
-import Chapter2.support.Location;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
-import java.net.HttpURLConnection;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.net.URLEncoder;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-
-public class LocationTranslationExample
-{
-
- /**
- * Translates a location string to coordinates using the database or Nominatim Service
- * @param loc
- * @return
- */
- public Location TranslateLoc(String loc)
- {
- if(loc!=null&&!loc.isEmpty())
- {
- String encodedLoc="";
- try {
- //Step 1: Encode the location name
- encodedLoc = URLEncoder.encode(loc, "UTF-8");
- } catch (UnsupportedEncodingException ex) {
- Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- //Step 2: Create a get request to MapQuest API with the name of the location
- String url= "http://open.mapquestapi.com/nominatim/v1/search?q="+encodedLoc+"&format=json";
- String page = ReadHTML(url);
- if(page!=null)
- {
- try{
- JSONArray results = new JSONArray(page);
- if(results.length()>0)
- {
- //Step 3: Read and extract the coordinates of the location as a JSONObject
- Location loca = new Location(results.getJSONObject(0).getDouble("lat"),results.getJSONObject(0).getDouble("lon"));
- return loca;
- }
- }catch(JSONException ex)
- {
- Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- }
- return null;
- }
-
- /**
- * Extracts the html content of a URL
- * @param url
- * @return html page
- */
- public String ReadHTML(String url)
- {
- URLConnection conn = null;
- URL theURL = null;
- try
- {
- theURL = new URL(url);
- }
- catch ( MalformedURLException e)
- {
- System.out.println("Bad URL: " + theURL);
- return null;
- }
- String page = "";
- try
- {
- conn = theURL.openConnection();
- HttpURLConnection huc = (HttpURLConnection) conn;
- conn.setConnectTimeout(2000);
- huc.setRequestProperty("User-Agent", "Mozilla/4.5");
- //Set your email address in the request so MapQuest knows how to reach you in the event of problems
- huc.setRequestProperty("Email", "twitterdataanalytics@gmail.com");
- if(huc.getResponseCode()>=400&&huc.getResponseCode()<=404)
- {
- return null;
- }
- conn.connect();
- BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) conn.getContent()));
- String temp=null;
- while( (temp= bRead.readLine())!=null)
- {
- page = page+"\n"+temp;
- }
- bRead.close();
- }
- catch (IOException e) {
- //System.out.print("ReadHTML IO Error:" + e.getMessage()+" \n");
- return null;
- }
- return page;
- }
-
- public static void main(String[] args)
- {
- LocationTranslationExample lte = new LocationTranslationExample();
- if(args!=null)
- {
- if(args.length>0)
- {
- for(int i=0;i<args.length;i++)
- {
- System.out.println(lte.TranslateLoc(args[i]).toString());
- }
- }
- }
- }
-}
diff --git a/src/Chapter2/openauthentication/OAuthExample.java b/src/Chapter2/openauthentication/OAuthExample.java
deleted file mode 100644
index 34ca3a1..0000000
--- a/src/Chapter2/openauthentication/OAuthExample.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.openauthentication;
-
-import Chapter2.support.OAuthTokenSecret;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import oauth.signpost.OAuth;
-import oauth.signpost.OAuthConsumer;
-import oauth.signpost.OAuthProvider;
-import oauth.signpost.basic.DefaultOAuthProvider;
-import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer;
-import oauth.signpost.exception.OAuthCommunicationException;
-import oauth.signpost.exception.OAuthExpectationFailedException;
-import oauth.signpost.exception.OAuthMessageSignerException;
-import oauth.signpost.exception.OAuthNotAuthorizedException;
-import utils.Configuration;
-
-public class OAuthExample
-{
- public OAuthTokenSecret GetUserAccessKeySecret()
- {
- try {
- //consumer key for Twitter Data Analytics application
- if(Configuration.CONSUMER_KEY.isEmpty())
- {
- System.out.println("Register an application and copy the consumer key into the configuration file.");
- return null;
- }
- if(Configuration.CONSUMER_SECRET.isEmpty())
- {
- System.out.println("Register an application and copy the consumer secret into the configuration file.");
- return null;
- }
- OAuthConsumer consumer = new CommonsHttpOAuthConsumer(Configuration.CONSUMER_KEY,Configuration.CONSUMER_SECRET);
- OAuthProvider provider = new DefaultOAuthProvider(Configuration.REQUEST_TOKEN_URL, Configuration.ACCESS_TOKEN_URL, Configuration.AUTHORIZE_URL);
- String authUrl = provider.retrieveRequestToken(consumer, OAuth.OUT_OF_BAND);
- System.out.println("Now visit:\n" + authUrl + "\n and grant this app authorization");
- System.out.println("Enter the PIN code and hit ENTER when you're done:");
- BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
- String pin = br.readLine();
- System.out.println("Fetching access token from Twitter");
- provider.retrieveAccessToken(consumer,pin);
- String accesstoken = consumer.getToken();
- String accesssecret = consumer.getTokenSecret();
- OAuthTokenSecret tokensecret = new OAuthTokenSecret(accesstoken,accesssecret);
- return tokensecret;
- } catch (OAuthNotAuthorizedException ex) {
- ex.printStackTrace();
- } catch (OAuthMessageSignerException ex) {
- ex.printStackTrace();
- } catch (OAuthExpectationFailedException ex) {
- ex.printStackTrace();
- } catch (OAuthCommunicationException ex) {
- ex.printStackTrace();
- } catch(IOException ex)
- {
- ex.printStackTrace();
- }
- return null;
- }
-
- public static OAuthTokenSecret DEBUGUserAccessSecret()
- {
- String accesstoken = "1262619914-tcCPB1SyXy3BMuui9OAhprcPmqg3z2csSjDSCNY";
- String accesssecret = "cXXO0qFLBjLXGtE97pnf5Vv1RZGxZ2FZ97wCYiaVU";
- OAuthTokenSecret tokensecret = new OAuthTokenSecret(accesstoken,accesssecret);
- return tokensecret;
- }
-
- public static void main(String[] args)
- {
- OAuthExample aue = new OAuthExample();
- OAuthTokenSecret tokensecret = aue.GetUserAccessKeySecret();
- System.out.println(tokensecret.toString());
- }
-}
diff --git a/src/Chapter2/restapi/RESTApiExample.java b/src/Chapter2/restapi/RESTApiExample.java
deleted file mode 100644
index 3d6074b..0000000
--- a/src/Chapter2/restapi/RESTApiExample.java
+++ /dev/null
@@ -1,676 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.restapi;
-
-import Chapter2.support.APIType;
-import Chapter2.support.OAuthTokenSecret;
-import Chapter2.openauthentication.OAuthExample;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import oauth.signpost.OAuthConsumer;
-import oauth.signpost.basic.DefaultOAuthConsumer;
-import oauth.signpost.exception.OAuthCommunicationException;
-import oauth.signpost.exception.OAuthExpectationFailedException;
-import oauth.signpost.exception.OAuthMessageSignerException;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class RESTApiExample
-{
- //file handlers to store the collected user information
- BufferedWriter OutFileWriter;
- OAuthTokenSecret OAuthTokens;
- /**
- * name of the file containing a list of users
- */
- final String DEF_FILENAME = "users.txt";
- final String DEF_OUTFILENAME = "restapiresults.json";
- ArrayList<String> Usernames = new ArrayList<String>();
- OAuthConsumer Consumer;
-
- /**
- * Creates a OAuthConsumer with the current consumer & user access tokens and secrets
- * @return consumer
- */
- public OAuthConsumer GetConsumer()
- {
- OAuthConsumer consumer = new DefaultOAuthConsumer(utils.Configuration.CONSUMER_KEY,utils.Configuration.CONSUMER_SECRET);
- consumer.setTokenWithSecret(OAuthTokens.getAccessToken(),OAuthTokens.getAccessSecret());
- return consumer;
- }
-
- /**
- * Reads the file and loads the users in the file to be crawled
- * @param filename
- */
- public void ReadUsers(String filename)
- {
- BufferedReader br = null;
- try {
- br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- if(!temp.isEmpty())
- {
- Usernames.add(temp);
- }
- }
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- finally{
- try {
- br.close();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
- }
-
- /**
- * Load the User Access Token, and the User Access Secret
- */
- public void LoadTwitterToken()
- {
- //Un-comment before release
-// OAuthExample oae = new OAuthExample();
-// OAuthTokens = oae.GetUserAccessKeySecret();
- //Remove before release
- OAuthTokens = OAuthExample.DEBUGUserAccessSecret();
- }
-
- public static void main(String[] args)
- {
- RESTApiExample rae = new RESTApiExample();
- rae.LoadTwitterToken();
- rae.Consumer = rae.GetConsumer();
-// System.out.println(rae.GetStatuses("twtanalyticsbk"));
- System.out.println(rae.GetRateLimitStatus());
-// int apicode = InfoType.PROFILE_INFO;
-// String infilename = rae.DEF_FILENAME;
-// String outfilename = rae.DEF_OUTFILENAME;
-// if(args!=null)
-// {
-// if(args.length>2)
-// {
-// apicode = Integer.parseInt(args[2]);
-// outfilename = args[1];
-// infilename = args[0];
-// }
-// if(args.length>1)
-// {
-// outfilename = args[1];
-// infilename = args[0];
-// }
-// else
-// if(args.length>0)
-// {
-// infilename = args[0];
-// }
-// }
-// rae.InitializeWriters(outfilename);
-// rae.ReadUsers(infilename);
-// if(apicode!=InfoType.PROFILE_INFO&&apicode!=InfoType.FOLLOWER_INFO&&apicode!=InfoType.FRIEND_INFO&&apicode!=InfoType.STATUSES_INFO)
-// {
-// System.out.println("Invalid API type: Use 0 for Profile, 1 for Followers, 2 for Friends, and 3 for Statuses");
-// System.exit(0);
-// }
-// if(rae.Usernames.size()>0)
-// {
-// //TO-DO: Print the possible API types and get user selection to crawl the users.
-// rae.LoadTwitterToken();
-// for(String user:rae.Usernames)
-// {
-// if(apicode==InfoType.PROFILE_INFO)
-// {
-// JSONObject jobj = rae.GetProfile(user);
-// if(jobj!=null&&jobj.length()==0)
-// {
-// rae.WriteToFile(user, jobj.toString());
-// }
-// }
-// else
-// if(apicode==InfoType.FRIEND_INFO)
-// {
-// JSONArray statusarr = rae.GetFriends(user);
-// if(statusarr.length()>0)
-// {
-// rae.WriteToFile(user, statusarr.toString());
-// }
-// }
-// else
-// if(apicode == InfoType.FOLLOWER_INFO)
-// {
-// JSONArray statusarr = rae.GetFollowers(user);
-// if(statusarr.length()>0)
-// {
-// rae.WriteToFile(user, statusarr.toString());
-// }
-// }
-// else
-// if(apicode == InfoType.STATUSES_INFO)
-// {
-// JSONArray statusarr = rae.GetStatuses(user);
-// if(statusarr.length()>0)
-// {
-// rae.GetStatuses(user);
-// }
-// }
-// }
-// }
-//// now you can close the files as all the threads have finished
-// rae.CleanupAfterFinish();
- }
-
- /**
- * Retrieves the rate limit status of the application
- * @return
- */
- public JSONObject GetRateLimitStatus()
- {
- try{
- URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json");
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setReadTimeout(5000);
- Consumer.sign(huc);
- huc.connect();
- BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
- StringBuffer page = new StringBuffer();
- String temp= "";
- while((temp = bRead.readLine())!=null)
- {
- page.append(temp);
- }
- bRead.close();
- return (new JSONObject(page.toString()));
- } catch (JSONException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthCommunicationException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthMessageSignerException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthExpectationFailedException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }catch(IOException ex)
- {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- return null;
- }
-
- /**
- * Initialize the file writer
- * @param path of the file
- * @param outFilename name of the file
- */
- public void InitializeWriters(String outFilename) {
- try {
- File fl = new File(outFilename);
- if(!fl.exists())
- {
- fl.createNewFile();
- }
- /**
- * Use UTF-8 encoding when saving files to avoid
- * losing Unicode characters in the data
- */
- OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8"));
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
-
- /**
- * Close the opened filewriter to save the data
- */
- public void CleanupAfterFinish()
- {
- try {
- OutFileWriter.close();
- } catch (IOException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
-
- /**
- * Writes the retrieved data to the output file
- * @param data containing the retrived information in JSON
- * @param user name of the user currently being written
- */
- public void WriteToFile(String user, String data)
- {
- try
- {
- OutFileWriter.write(data);
- OutFileWriter.newLine();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
-
- /**
- * Retrives the profile information of the user
- * @param username of the user whose profile needs to be retrieved
- * @return the profile information as a JSONObject
- */
- public JSONObject GetProfile(String username)
- {
- BufferedReader bRead = null;
- JSONObject profile = null;
- try {
- System.out.println("Processing profile of "+username);
- boolean flag = true;
- URL url = new URL("https://api.twitter.com/1.1/users/show.json?screen_name="+username);
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setReadTimeout(5000);
- // Step 2: Sign the request using the OAuth Secret
- Consumer.sign(huc);
- huc.connect();
- if(huc.getResponseCode()==404||huc.getResponseCode()==401)
- {
- System.out.println(huc.getResponseMessage());
- }
- else
- if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
- {
- try {
- huc.disconnect();
- System.out.println(huc.getResponseMessage());
- Thread.sleep(3000);
- } catch (InterruptedException ex) {
- ex.printStackTrace();
- }
- }
- else
- // Step 3: If the requests have been exhausted, then wait until the quota is renewed
- if(huc.getResponseCode()==429)
- {
- try {
- huc.disconnect();
- Thread.sleep(this.GetWaitTime("/users/show/:id"));
- flag = false;
- } catch (InterruptedException ex) {
- ex.printStackTrace();
- }
- }
- if(!flag)
- {
- //recreate the connection because something went wrong the first time.
- huc.connect();
- }
- StringBuilder content=new StringBuilder();
- if(flag)
- {
- bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
- String temp= "";
- while((temp = bRead.readLine())!=null)
- {
- content.append(temp);
- }
- }
- huc.disconnect();
- try {
- profile = new JSONObject(content.toString());
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- } catch (OAuthCommunicationException ex) {
- ex.printStackTrace();
- } catch (OAuthMessageSignerException ex) {
- ex.printStackTrace();
- } catch (OAuthExpectationFailedException ex) {
- ex.printStackTrace();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- return profile;
- }
-
- /**
- * Retrieves the followers of a user
- * @param username the name of the user whose followers need to be retrieved
- * @return a list of user objects corresponding to the followers of the user
- */
- public JSONArray GetFollowers(String username)
- {
- BufferedReader bRead = null;
- JSONArray followers = new JSONArray();
- try {
- System.out.println(" followers user = "+username);
- long cursor = -1;
- while(true)
- {
- if(cursor==0)
- {
- break;
- }
- // Step 1: Create the APi request using the supplied username
- URL url = new URL("https://api.twitter.com/1.1/followers/list.json?screen_name="+username+"&cursor=" + cursor);
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setReadTimeout(5000);
- // Step 2: Sign the request using the OAuth Secret
- Consumer.sign(huc);
- huc.connect();
- if(huc.getResponseCode()==400||huc.getResponseCode()==404)
- {
- System.out.println(huc.getResponseMessage());
- break;
- }
- else
- if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503||huc.getResponseCode()==504)
- {
- try{
- System.out.println(huc.getResponseMessage());
- huc.disconnect();
- Thread.sleep(3000);
- continue;
- } catch (InterruptedException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- else
- // Step 3: If the requests have been exhausted, then wait until the quota is renewed
- if(huc.getResponseCode()==429)
- {
- try {
- huc.disconnect();
- Thread.sleep(this.GetWaitTime("/followers/list"));
- continue;
- } catch (InterruptedException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- // Step 4: Retrieve the followers list from Twitter
- bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
- StringBuilder content = new StringBuilder();
- String temp = "";
- while((temp = bRead.readLine())!=null)
- {
- content.append(temp);
- }
- try {
- JSONObject jobj = new JSONObject(content.toString());
- // Step 5: Retrieve the token for the next request
- cursor = jobj.getLong("next_cursor");
- JSONArray idlist = jobj.getJSONArray("users");
- if(idlist.length()==0)
- {
- break;
- }
- for(int i=0;i<idlist.length();i++)
- {
- followers.put(idlist.getJSONObject(i));
- }
- } catch (JSONException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- } catch (OAuthCommunicationException ex) {
- ex.printStackTrace();
- } catch (OAuthMessageSignerException ex) {
- ex.printStackTrace();
- } catch (OAuthExpectationFailedException ex) {
- ex.printStackTrace();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- return followers;
- }
-
- /**
- * Retrieved the status messages of a user
- * @param username the name of the user whose status messages need to be retrieved
- * @return a list of status messages
- */
- public JSONArray GetStatuses(String username)
- {
- BufferedReader bRead = null;
- //Get the maximum number of tweets possible in a single page 200
- int tweetcount = 200;
- //Include include_rts because it is counted towards the limit anyway.
- boolean include_rts = true;
- JSONArray statuses = new JSONArray();
- try {
- System.out.println("Processing status messages of "+username);
- long maxid = 0;
- while(true)
- {
- URL url = null;
- if(maxid==0)
- {
- url = new URL("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + username+"&include_rts="+include_rts+"&count="+tweetcount);
- }
- else
- {
- //use max_id to get the tweets in the next page. Use max_id-1 to avoid getting redundant tweets.
- url = new URL("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + username+"&include_rts="+include_rts+"&count="+tweetcount+"&max_id="+(maxid-1));
- }
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setReadTimeout(5000);
- Consumer.sign(huc);
- huc.connect();
- if(huc.getResponseCode()==400||huc.getResponseCode()==404)
- {
- System.out.println(huc.getResponseCode());
- break;
- }
- else
- if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
- {
- try {System.out.println(huc.getResponseCode());
- Thread.sleep(3000);
- } catch (InterruptedException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- else
- // Step 3: If the requests have been exhausted, then wait until the quota is renewed
- if(huc.getResponseCode()==429)
- {
- try {
- huc.disconnect();
- Thread.sleep(this.GetWaitTime("/statuses/user_timeline"));
- continue;
- } catch (InterruptedException ex) {
- ex.printStackTrace();
- }
- }
- bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getInputStream()));
- StringBuilder content = new StringBuilder();
- String temp = "";
- while((temp = bRead.readLine())!=null)
- {
- content.append(temp);
- }
- try {
- JSONArray statusarr = new JSONArray(content.toString());
- if(statusarr.length()==0)
- {
- break;
- }
- for(int i=0;i<statusarr.length();i++)
- {
- JSONObject jobj = statusarr.getJSONObject(i);
- statuses.put(jobj);
- //Get the max_id to get the next batch of tweets
- if(!jobj.isNull("id"))
- {
- maxid = jobj.getLong("id");
- }
- }
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
- System.out.println(statuses.length());
- } catch (OAuthCommunicationException ex) {
- ex.printStackTrace();
- } catch (OAuthMessageSignerException ex) {
- ex.printStackTrace();
- } catch (OAuthExpectationFailedException ex) {
- ex.printStackTrace();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- return statuses;
- }
-
- /**
- * Retrieves the friends of a user
- * @param username the name of the user whose friends need to be fetched
- * @return a list of user objects who are friends of the user
- */
- public JSONArray GetFriends(String username)
- {
- BufferedReader bRead = null;
- JSONArray friends = new JSONArray();
- try {
- System.out.println("Processing friends of "+username);
- long cursor = -1;
- while(true)
- {
- if(cursor==0)
- {
- break;
- }
- // Step 1: Create the APi request using the supplied username
- URL url = new URL("https://api.twitter.com/1.1/friends/list.json?screen_name="+username+"&cursor="+cursor);
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setReadTimeout(5000);
- //Step 2: Sign the request using the OAuth Secret
- Consumer.sign(huc);
- huc.connect();
- if(huc.getResponseCode()==400||huc.getResponseCode()==401)
- {
- System.out.println(huc.getResponseMessage());
- break;
- }
- else
- if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
- {
- try {
- System.out.println(huc.getResponseMessage());
- Thread.sleep(3000);
- continue;
- } catch (InterruptedException ex) {
- ex.printStackTrace();
- }
- }
- else
- // Step 3: If the requests have been exhausted, then wait until the quota is renewed
- if(huc.getResponseCode()==429)
- {
- try {
- huc.disconnect();
- Thread.sleep(this.GetWaitTime("/friends/list"));
- continue;
- } catch (InterruptedException ex) {
- ex.printStackTrace();
- }
- }
- // Step 4: Retrieve the friends list from Twitter
- bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
- StringBuilder content = new StringBuilder();
- String temp = "";
- while((temp = bRead.readLine())!=null)
- {
- content.append(temp);
- }
- try {
- JSONObject jobj = new JSONObject(content.toString());
- // Step 5: Retrieve the token for the next request
- cursor = jobj.getLong("next_cursor");
- JSONArray userlist = jobj.getJSONArray("users");
- if(userlist.length()==0)
- {
- break;
- }
- for(int i=0;i<userlist.length();i++)
- {
- friends.put(userlist.get(i));
- }
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- huc.disconnect();
- }
- } catch (OAuthCommunicationException ex) {
- ex.printStackTrace();
- } catch (OAuthMessageSignerException ex) {
- ex.printStackTrace();
- } catch (OAuthExpectationFailedException ex) {
- ex.printStackTrace();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- return friends;
- }
-
- /**
- * Retrieves the wait time if the API Rate Limit has been hit
- * @param api the name of the API currently being used
- * @return the number of milliseconds to wait before initiating a new request
- */
- public long GetWaitTime(String api)
- {
- JSONObject jobj = this.GetRateLimitStatus();
- if(jobj!=null)
- {
- try {
- if(!jobj.isNull("resources"))
- {
- JSONObject resourcesobj = jobj.getJSONObject("resources");
- JSONObject apilimit = null;
- if(api.equals(APIType.USER_TIMELINE))
- {
- JSONObject statusobj = resourcesobj.getJSONObject("statuses");
- apilimit = statusobj.getJSONObject(api);
- }
- else
- if(api.equals(APIType.FOLLOWERS))
- {
- JSONObject followersobj = resourcesobj.getJSONObject("followers");
- apilimit = followersobj.getJSONObject(api);
- }
- else
- if(api.equals(APIType.FRIENDS))
- {
- JSONObject friendsobj = resourcesobj.getJSONObject("friends");
- apilimit = friendsobj.getJSONObject(api);
- }
- else
- if(api.equals(APIType.USER_PROFILE))
- {
- JSONObject userobj = resourcesobj.getJSONObject("users");
- apilimit = userobj.getJSONObject(api);
- }
- int numremhits = apilimit.getInt("remaining");
- if(numremhits<=1)
- {
- long resettime = apilimit.getInt("reset");
- resettime = resettime*1000; //convert to milliseconds
- return resettime;
- }
- }
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
- return 0;
- }
-}
diff --git a/src/Chapter2/restapi/RESTSearchExample.java b/src/Chapter2/restapi/RESTSearchExample.java
deleted file mode 100644
index e9a5dd7..0000000
--- a/src/Chapter2/restapi/RESTSearchExample.java
+++ /dev/null
@@ -1,311 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.restapi;
-
-import Chapter2.support.OAuthTokenSecret;
-import Chapter2.openauthentication.OAuthExample;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.net.HttpURLConnection;
-import java.net.URL;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import oauth.signpost.OAuthConsumer;
-import oauth.signpost.basic.DefaultOAuthConsumer;
-import oauth.signpost.exception.OAuthCommunicationException;
-import oauth.signpost.exception.OAuthExpectationFailedException;
-import oauth.signpost.exception.OAuthMessageSignerException;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-import org.json.JSONTokener;
-
-public class RESTSearchExample
-{
- BufferedWriter OutFileWriter;
- OAuthTokenSecret OAuthTokens;
- OAuthConsumer Consumer;
- String query = "#protest";
- String DEF_FILENAME = "searchresults.json";
-
- /**
- * Creates a OAuthConsumer with the current consumer & user access tokens and secrets
- * @return consumer
- */
- public OAuthConsumer GetConsumer()
- {
- OAuthConsumer consumer = new DefaultOAuthConsumer(utils.Configuration.CONSUMER_KEY,utils.Configuration.CONSUMER_SECRET);
- consumer.setTokenWithSecret(OAuthTokens.getAccessToken(), OAuthTokens.getAccessSecret());
- return consumer;
- }
-
- /**
- * Load the User Access Token, and the User Access Secret
- */
- public void LoadTwitterToken()
- {
- //Un-comment before release
-// OAuthExample oae = new OAuthExample();
-// OAuthTokens = oae.GetUserAccessKeySecret();
- //Remove before release
- OAuthTokens = OAuthExample.DEBUGUserAccessSecret();
- }
-
- /**
- * Fetches tweets matching a query
- * @param query for which tweets need to be fetched
- * @return an array of status objects
- */
- public JSONArray GetSearchResults(String query)
- {
- try{
- //construct the request url
- String URL_PARAM_SEPERATOR = "&";
- StringBuilder url = new StringBuilder();
- url.append("https://api.twitter.com/1.1/search/tweets.json?q=");
- //query needs to be encoded
- url.append(URLEncoder.encode(query, "UTF-8"));
- url.append(URL_PARAM_SEPERATOR);
- url.append("count=100");
- URL navurl = new URL(url.toString());
- HttpURLConnection huc = (HttpURLConnection) navurl.openConnection();
- huc.setReadTimeout(5000);
- Consumer.sign(huc);
- huc.connect();
- if(huc.getResponseCode()==400||huc.getResponseCode()==404||huc.getResponseCode()==429)
- {
- System.out.println(huc.getResponseMessage());
- try {
- huc.disconnect();
- Thread.sleep(this.GetWaitTime("/friends/list"));
- } catch (InterruptedException ex) {
- ex.printStackTrace();
- }
- }
- if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
- {
- System.out.println(huc.getResponseMessage());
- try {
- Thread.sleep(2000);
- } catch (InterruptedException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getInputStream()));
- String temp;
- StringBuilder page = new StringBuilder();
- while( (temp = bRead.readLine())!=null)
- {
- page.append(temp);
- }
- JSONTokener jsonTokener = new JSONTokener(page.toString());
- try {
- JSONObject json = new JSONObject(jsonTokener);
- JSONArray results = json.getJSONArray("statuses");
- return results;
- } catch (JSONException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- } catch (OAuthCommunicationException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthMessageSignerException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthExpectationFailedException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }
- return null;
- }
-
- /**
- * Retrieves the rate limit status of the application
- * @return
- */
- public JSONObject GetRateLimitStatus()
- {
- try{
- URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json");
- HttpURLConnection huc = (HttpURLConnection) url.openConnection();
- huc.setReadTimeout(5000);
- OAuthConsumer consumer = new DefaultOAuthConsumer(utils.Configuration.CONSUMER_KEY,utils.Configuration.CONSUMER_SECRET);
- consumer.setTokenWithSecret(OAuthTokens.getAccessToken(), OAuthTokens.getAccessSecret());
- consumer.sign(huc);
- huc.connect();
- BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
- StringBuffer page = new StringBuffer();
- String temp= "";
- while((temp = bRead.readLine())!=null)
- {
- page.append(temp);
- }
- bRead.close();
- return (new JSONObject(page.toString()));
- } catch (JSONException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthCommunicationException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthMessageSignerException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- } catch (OAuthExpectationFailedException ex) {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }catch(IOException ex)
- {
- Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- return null;
- }
-
- /**
- * Initialize the file writer
- * @param path of the file
- * @param outFilename name of the file
- */
- public void InitializeWriters(String outFilename) {
- try {
- File fl = new File(outFilename);
- if(!fl.exists())
- {
- fl.createNewFile();
- }
- /**
- * Use UTF-8 encoding when saving files to avoid
- * losing Unicode characters in the data
- */
- OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8"));
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
-
- /**
- * Close the opened filewriter to save the data
- */
- public void CleanupAfterFinish()
- {
- try {
- OutFileWriter.close();
- } catch (IOException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
-
- /**
- * Writes the retrieved data to the output file
- * @param data containing the retrived information in JSON
- * @param user name of the user currently being written
- */
- public void WriteToFile(JSONArray searchResults)
- {
- try
- {
- for(int i=0;i<searchResults.length();i++)
- {
- try {
- OutFileWriter.write(searchResults.getJSONObject(i).toString());
- OutFileWriter.newLine();
- } catch (JSONException ex) {
- Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
-
- /**
- * Retrieves the wait time if the API Rate Limit has been hit
- * @param api the name of the API currently being used
- * @return the number of milliseconds to wait before initiating a new request
- */
- public long GetWaitTime(String api)
- {
- JSONObject jobj = this.GetRateLimitStatus();
- if(jobj!=null)
- {
- try {
- if(!jobj.isNull("resources"))
- {
- JSONObject resourcesobj = jobj.getJSONObject("resources");
- JSONObject statusobj = resourcesobj.getJSONObject("statuses");
- JSONObject apilimit = statusobj.getJSONObject(api);
- int numremhits = apilimit.getInt("remaining");
- if(numremhits<=1)
- {
- long resettime = apilimit.getInt("reset");
- resettime = resettime*1000; //convert to milliseconds
- return resettime;
- }
- }
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
- return 0;
- }
-
- /**
- * Creates an OR search query from the supplied terms
- * @param queryTerms
- * @return a String formatted as term1 OR term2
- */
- public String CreateORQuery(ArrayList<String> queryTerms)
- {
- String OR_Operator = " OR ";
- StringBuffer querystr = new StringBuffer();
- int count = 1;
- for(String term:queryTerms)
- {
- if(count==1)
- {
- querystr.append(term);
- }
- else
- {
- querystr.append(OR_Operator).append(term);
- }
- }
- return querystr.toString();
- }
-
- public static void main(String[] args)
- {
- RESTSearchExample rse = new RESTSearchExample();
- ArrayList<String> queryterms = new ArrayList<String>();
- String outfilename = rse.DEF_FILENAME;
- if(args!=null)
- {
- if(args.length>0)
- {
- for(int i=0;i<args.length;i++)
- {
- queryterms.add(args[i]);
- }
- }
- else
- {
- queryterms.add(rse.query);
- }
- }
- rse.LoadTwitterToken();
- rse.Consumer = rse.GetConsumer();
- System.out.println(rse.GetRateLimitStatus());
- rse.InitializeWriters(outfilename);
- JSONArray results = rse.GetSearchResults(rse.CreateORQuery(queryterms));
- if(results!=null)
- {
- rse.WriteToFile(results);
- }
- rse.CleanupAfterFinish();
- }
-}
diff --git a/src/Chapter2/streamingapi/StreamingApiExample.java b/src/Chapter2/streamingapi/StreamingApiExample.java
deleted file mode 100644
index 61a9cc0..0000000
--- a/src/Chapter2/streamingapi/StreamingApiExample.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.streamingapi;
-
-import Chapter2.support.OAuthTokenSecret;
-import Chapter2.openauthentication.OAuthExample;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import oauth.signpost.OAuthConsumer;
-import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer;
-import oauth.signpost.exception.OAuthCommunicationException;
-import oauth.signpost.exception.OAuthExpectationFailedException;
-import oauth.signpost.exception.OAuthMessageSignerException;
-import org.apache.commons.httpclient.HttpStatus;
-import org.apache.http.HttpResponse;
-import org.apache.http.client.HttpClient;
-import org.apache.http.HttpEntity;
-import org.apache.http.NameValuePair;
-import org.apache.http.client.entity.UrlEncodedFormEntity;
-import org.apache.http.client.methods.HttpPost;
-import org.apache.http.impl.client.DefaultHttpClient;
-import org.apache.http.message.BasicNameValuePair;
-import org.apache.http.params.CoreConnectionPNames;
-import org.json.JSONException;
-import org.json.JSONObject;
-import org.json.JSONTokener;
-import utils.Configuration;
-
-public class StreamingApiExample
-{
- OAuthTokenSecret OAuthToken;
- final int RECORDS_TO_PROCESS = 1000;
- final int MAX_GEOBOXES = 25;
- final int MAX_KEYWORDS = 400;
- final int MAX_USERS = 5000;
- HashSet<String> Keywords;
- HashSet<String> Geoboxes;
- HashSet<String> Userids;
- final String CONFIG_FILE_PATH = "streaming/streaming.config";
- final String DEF_OUTPATH = "streaming/";
-
- /**
- * Loads the Twitter access token and secret for a user
- */
- public void LoadTwitterToken()
- {
-// OAuthExample oae = new OAuthExample();
-// OAuthToken = oae.GetUserAccessKeySecret();
- OAuthToken = OAuthExample.DEBUGUserAccessSecret();
- }
-
- /**
- * Creates a connection to the Streaming Filter API
- * @param baseUrl the URL for Twitter Filter API
- * @param outFilePath Location to place the exported file
- */
- public void CreateStreamingConnection(String baseUrl, String outFilePath)
- {
- HttpClient httpClient = new DefaultHttpClient();
- httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, new Integer(90000));
- //Step 1: Initialize OAuth Consumer
- OAuthConsumer consumer = new CommonsHttpOAuthConsumer(Configuration.CONSUMER_KEY,Configuration.CONSUMER_SECRET);
- consumer.setTokenWithSecret(OAuthToken.getAccessToken(),OAuthToken.getAccessSecret());
- //Step 2: Create a new HTTP POST request and set parameters
- HttpPost httppost = new HttpPost(baseUrl);
- try {
- httppost.setEntity(new UrlEncodedFormEntity(CreateRequestBody(), "UTF-8"));
- } catch (UnsupportedEncodingException ex) {
- ex.printStackTrace();
- }
- try {
- //Step 3: Sign the request
- consumer.sign(httppost);
- } catch (OAuthMessageSignerException ex) {
- ex.printStackTrace();
- } catch (OAuthExpectationFailedException ex) {
- ex.printStackTrace();
- } catch (OAuthCommunicationException ex) {
- ex.printStackTrace();
- }
- HttpResponse response;
- InputStream is = null;
- try {
- //Step 4: Connect to the API
- response = httpClient.execute(httppost);
- if (response.getStatusLine().getStatusCode()!= HttpStatus.SC_OK)
- {
- throw new IOException("Got status " +response.getStatusLine().getStatusCode());
- }
- else
- {
- System.out.println(OAuthToken.getAccessToken()+ ": Processing from " + baseUrl);
- HttpEntity entity = response.getEntity();
- try {
- is = entity.getContent();
- } catch (IOException ex) {
- ex.printStackTrace();
- } catch (IllegalStateException ex) {
- ex.printStackTrace();
- }
- //Step 5: Process the incoming Tweet Stream
- this.ProcessTwitterStream(is, outFilePath);
- }
- } catch (IOException ex) {
- ex.printStackTrace();
- }finally {
- // Abort the method, otherwise releaseConnection() will
- // attempt to finish reading the never-ending response.
- // These methods do not throw exceptions.
- if(is!=null)
- {
- try {
- is.close();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
- }
- }
-
- /**
- * Processes a stream of tweets and writes them to a file one tweet per line. Each tweet here is represented by a JSON document.
- * @param is input stream already connected to the streaming API
- * @param outFilePath file to put the collected tweets in
- * @throws InterruptedException
- * @throws IOException
- */
- public void ProcessTwitterStream(InputStream is, String outFilePath)
- {
- BufferedWriter bwrite = null;
- try {
- JSONTokener jsonTokener = new JSONTokener(new InputStreamReader(is, "UTF-8"));
- ArrayList<JSONObject> rawtweets = new ArrayList<JSONObject>();
- int nooftweetsuploaded = 0;
- while (true) {
- try {
- JSONObject temp = new JSONObject(jsonTokener);
- rawtweets.add(temp);
-// System.out.println(temp);
- if (rawtweets.size() >= RECORDS_TO_PROCESS)
- {
- Calendar cal = Calendar.getInstance();
- String filename = outFilePath + "tweets_" + cal.getTimeInMillis() + ".json";
- bwrite = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8"));
- nooftweetsuploaded += RECORDS_TO_PROCESS;
- //Write the collected tweets to a file
- for (JSONObject jobj : rawtweets) {
- bwrite.write(jobj.toString());
- bwrite.newLine();
- }
- System.out.println("Written "+nooftweetsuploaded+" records so far");
- bwrite.close();
- rawtweets.clear();
- }
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
-
- public static void main(String[] args)
- {
- StreamingApiExample sae = new StreamingApiExample();
- sae.LoadTwitterToken();
- //load parameters from a TSV file
- String filename = sae.CONFIG_FILE_PATH;
- String outfilepath = sae.DEF_OUTPATH;
- if(args!=null)
- {
- if(args.length>0)
- {
- filename = args[0];
- }
- if(args.length>1)
- {
- File fl = new File(args[1]);
- if(fl.exists()&&fl.isDirectory())
- {
- outfilepath = args[1];
- }
- }
- }
- sae.ReadParameters(filename);
- sae.CreateStreamingConnection("https://stream.twitter.com/1.1/statuses/filter.json", outfilepath);
- }
-
- /**
- * Reads the file and loads the parameters to be crawled. Expects that the parameters are tab separated values and the
- * @param filename
- */
- public void ReadParameters(String filename)
- {
- BufferedReader br = null;
- try {
- br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
- String temp = "";
- int count = 1;
- if(Userids==null)
- {
- Userids = new HashSet<String>();
- }
- if(Geoboxes==null)
- {
- Geoboxes = new HashSet<String>();
- }
- if(Keywords==null)
- {
- Keywords = new HashSet<String>();
- }
- while((temp = br.readLine())!=null)
- {
- if(!temp.isEmpty())
- {
- if(count==1)
- {
- String[] keywords = temp.split("\t");
- HashSet<String> temptags = new HashSet<String>();
- for(String word:keywords)
- {
- if(!temptags.contains(word))
- {
- temptags.add(word);
- }
- }
- FilterKeywords(temptags);
- }
- else
- if(count==2)
- {
- String[] geoboxes = temp.split("\t");
- HashSet<String> tempboxes = new HashSet<String>();
- for(String box:geoboxes)
- {
- if(!tempboxes.contains(box))
- {
- tempboxes.add(box);
- }
- }
- FilterGeoboxes(tempboxes);
- }
- else
- if(count==3)
- {
- String[] userids = temp.split("\t");
- HashSet<String> tempids = new HashSet<String>();
- for(String id:userids)
- {
- if(!tempids.contains(id))
- {
- tempids.add(id);
- }
- }
- FilterUserids(tempids);
- }
- count++;
- }
- }
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- finally{
- try {
- br.close();
- } catch (IOException ex) {
- ex.printStackTrace();
- }
- }
- }
-
- private void FilterUserids(HashSet<String> userids)
- {
- if(userids!=null)
- {
- int maxsize = MAX_USERS;
- if(userids.size()<maxsize)
- {
- maxsize = userids.size();
- }
- for(String id:userids)
- {
- Userids.add(id);
- }
- }
- }
-
- private void FilterGeoboxes(HashSet<String> geoboxes)
- {
- if(geoboxes!=null)
- {
- int maxsize = MAX_GEOBOXES;
- if(geoboxes.size()<maxsize)
- {
- maxsize = geoboxes.size();
- }
- for(String box:geoboxes)
- {
- Geoboxes.add(box);
- }
- }
- }
- /**
- * Keep only the maximum permitted number of parameters for a connection. Ignoring the rest.
- * This can be extended to create multiple sets to be crawled by different threads.
- */
- private void FilterKeywords(HashSet<String> hashtags)
- {
- if(hashtags!=null)
- {
- int maxsize = MAX_KEYWORDS;
- if(hashtags.size()<maxsize)
- {
- maxsize = hashtags.size();
- }
- for(String tag:hashtags)
- {
- Keywords.add(tag);
- }
- }
-
- }
-
- private List<NameValuePair> CreateRequestBody()
- {
- List<NameValuePair> params = new ArrayList<NameValuePair>();
- if(Userids != null&&Userids.size()>0)
- {
- params.add(CreateNameValuePair("follow", Userids));
- System.out.println("userids = "+Userids);
- }
- if (Geoboxes != null&&Geoboxes.size()>0) {
- params.add(CreateNameValuePair("locations", Geoboxes));
- System.out.println("locations = "+Geoboxes);
-
- }
- if (Keywords != null&&Keywords.size()>0) {
- params.add(CreateNameValuePair("track", Keywords));
- System.out.println("keywords = "+Keywords);
- }
- return params;
- }
-
- private NameValuePair CreateNameValuePair(String name, Collection<String> items)
- {
- StringBuilder sb = new StringBuilder();
- boolean needComma = false;
- for (String item : items) {
- if (needComma) {
- sb.append(',');
- }
- needComma = true;
- sb.append(item);
- }
- return new BasicNameValuePair(name, sb.toString());
- }
-}
diff --git a/src/Chapter2/support/APIType.java b/src/Chapter2/support/APIType.java
deleted file mode 100644
index 94449f8..0000000
--- a/src/Chapter2/support/APIType.java
+++ /dev/null
@@ -1,12 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.support;
-
-public class APIType
-{
- public static String USER_TIMELINE = "/statuses/user_timeline";
- public static String FOLLOWERS = "/followers/list";
- public static String FRIENDS = "/friends/list";
- public static String USER_PROFILE = "/users/show";
-}
diff --git a/src/Chapter2/support/InfoType.java b/src/Chapter2/support/InfoType.java
deleted file mode 100644
index 42b0334..0000000
--- a/src/Chapter2/support/InfoType.java
+++ /dev/null
@@ -1,12 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.support;
-
-public class InfoType
-{
- public static final int PROFILE_INFO = 0;
- public static final int FOLLOWER_INFO = 1;
- public static final int FRIEND_INFO = 2;
- public static final int STATUSES_INFO = 3;
-}
diff --git a/src/Chapter2/support/Location.java b/src/Chapter2/support/Location.java
deleted file mode 100644
index 7f6234f..0000000
--- a/src/Chapter2/support/Location.java
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter2.support;
-
-/**
- *
- * @author shamanth
- */
-public class Location
-{
- public Double latitude;
- public Double longitude;
-
- public Location(Double lat,Double lng)
- {
- latitude = lat;
- longitude = lng;
- }
-
- @Override
- public String toString()
- {
- return "Latitude: "+latitude+" & Longitude: "+longitude;
- }
-}
diff --git a/src/Chapter2/support/OAuthTokenSecret.java b/src/Chapter2/support/OAuthTokenSecret.java
deleted file mode 100644
index 8fee4a8..0000000
--- a/src/Chapter2/support/OAuthTokenSecret.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter2.support;
-
-public class OAuthTokenSecret
-{
- String UserAccessToken;
- String UserAccessSecret;
-
- public String getAccessSecret() {
- return UserAccessSecret;
- }
-
- public void setAccessSecret(String AccessSecret) {
- this.UserAccessSecret = AccessSecret;
- }
-
- public String getAccessToken() {
- return UserAccessToken;
- }
-
- public void setAccessToken(String AccessToken) {
- this.UserAccessToken = AccessToken;
- }
-
- public OAuthTokenSecret(String token,String secret)
- {
- this.setAccessToken(token);
- this.setAccessSecret(secret);
- }
-
- @Override
- public String toString()
- {
- return "Access Token: "+getAccessToken()+" Access Secret: "+getAccessSecret();
- }
-}
diff --git a/src/Chapter4/GraphElements/RetweetEdge.java b/src/Chapter4/GraphElements/RetweetEdge.java
deleted file mode 100644
index 83836a0..0000000
--- a/src/Chapter4/GraphElements/RetweetEdge.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package GraphElements;
-
-
-public class RetweetEdge {
- private UserNode to, from;
- private int retweetCount;
-
- public RetweetEdge(UserNode to, UserNode from){
- this.to = to;
- this.from = from;
- retweetCount = 1;
- }
-
- public void incrementRTCount(){
- retweetCount++;
- }
-
- public UserNode getTo() {
- return to;
- }
- public void setTo(UserNode to) {
- this.to = to;
- }
- public UserNode getFrom() {
- return from;
- }
- public void setFrom(UserNode from) {
- this.from = from;
- }
- public int getRetweetCount() {
- return retweetCount;
- }
- public void setRetweetCount(int retweetCount) {
- this.retweetCount = retweetCount;
- }
-
- public boolean equals(Object maybeEdge){
- if(maybeEdge instanceof RetweetEdge){
- RetweetEdge edge = (RetweetEdge) maybeEdge;
- return edge.to.equals(to) && edge.from.equals(from);
- }
- return false;
-
- }
-
- public String toString(){
- return from + " -> " + to;
- }
-
- public int hashCode(){
- return toString().hashCode();
- }
-}
diff --git a/src/Chapter4/GraphElements/UserNode.java b/src/Chapter4/GraphElements/UserNode.java
deleted file mode 100644
index fba4419..0000000
--- a/src/Chapter4/GraphElements/UserNode.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package GraphElements;
-
-
-
-public class UserNode {
- private String username;
-
- public UserNode(String username){
- this.username = username;
- }
-
- public String getUsername() {
- return username;
- }
-
- public void setUsername(String username) {
- this.username = username;
- }
-
- public boolean equals(Object un){
- if(un instanceof UserNode){
- return username.equals(((UserNode)un).username);
- }
- return false;
- }
-
- public String toString(){
- return username;
- }
-
- public int hashCode(){
- return username.hashCode();
- }
-}
diff --git a/src/Chapter4/centrality/examples/BetweennessCentralityExample.java b/src/Chapter4/centrality/examples/BetweennessCentralityExample.java
deleted file mode 100644
index ab9f7e6..0000000
--- a/src/Chapter4/centrality/examples/BetweennessCentralityExample.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package centrality.examples;
-
-import Chapter4.util.TweetFileToGraph;
-import java.io.File;
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import edu.uci.ics.jung.algorithms.importance.BetweennessCentrality;
-import edu.uci.ics.jung.graph.DirectedGraph;
-
-public class BetweennessCentralityExample {
- public static void main(String[] args){
-
- File tweetFile;
-
- if(args.length > 0){
- tweetFile = new File(args[0]);
- }
- else{
- tweetFile = new File("synthetic_retweet_network.json");
- }
-
- DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
-
- //calculate the betweenness centrality
- BetweennessCentrality<UserNode, RetweetEdge> betweenness = new BetweennessCentrality<UserNode, RetweetEdge>(retweetGraph);
-
- betweenness.evaluate();
- betweenness.printRankings(true, true);
-
- }
-}
diff --git a/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java b/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java
deleted file mode 100644
index 172dd16..0000000
--- a/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package centrality.examples;
-
-import Chapter4.util.TweetFileToGraph;
-import java.io.File;
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality;
-import edu.uci.ics.jung.graph.DirectedGraph;
-
-public class EigenvectorCentralityExample {
- public static void main(String[] args){
-
- File tweetFile;
-
- if(args.length > 0){
- tweetFile = new File(args[0]);
- }
- else{
- tweetFile = new File("synthetic_retweet_network.json");
- }
-
- DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
-
-// EigenVectorScorer scorer = new EigenVectorScorer(retweetGraph);
-// for(UserNode node : retweetGraph.getVertices()){
-// System.out.println(node + " - " + scorer.getVertexScore(node));
-// }
-
- EigenvectorCentrality<UserNode, RetweetEdge> eig = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph);
- eig.evaluate();
-
- for(UserNode node : retweetGraph.getVertices()){
- System.out.println(node + " - " + eig.getVertexScore(node));
- }
- }
-}
diff --git a/src/Chapter4/centrality/examples/InDegreeCentralityExample.java b/src/Chapter4/centrality/examples/InDegreeCentralityExample.java
deleted file mode 100644
index 6a027ac..0000000
--- a/src/Chapter4/centrality/examples/InDegreeCentralityExample.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package Chapter4.centrality.examples;
-
-import Chapter4.util.TweetFileToGraph;
-import java.io.File;
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import edu.uci.ics.jung.graph.DirectedGraph;
-
-public class InDegreeCentralityExample {
-
- public static void main(String[] args){
-
- File tweetFile;
-
- if(args.length > 0){
- tweetFile = new File(args[0]);
- }
- else{
- tweetFile = new File("synthetic_retweet_network.json");
- }
-
- DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
-
- //calculate the betweenness centrality
- for(UserNode node : retweetGraph.getVertices()){
- System.out.println(node + " - " + retweetGraph.getInEdges(node).size());
- }
-
- }
-}
diff --git a/src/Chapter4/centrality/examples/PageRankCentralityExample.java b/src/Chapter4/centrality/examples/PageRankCentralityExample.java
deleted file mode 100644
index dd44efd..0000000
--- a/src/Chapter4/centrality/examples/PageRankCentralityExample.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package Chapter4.centrality.examples;
-
-import Chapter4.util.TweetFileToGraph;
-import java.io.File;
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import edu.uci.ics.jung.algorithms.scoring.PageRank;
-import edu.uci.ics.jung.graph.DirectedGraph;
-
-public class PageRankCentralityExample {
- public static void main(String[] args){
-
- File tweetFile;
-
- if(args.length > 0){
- tweetFile = new File(args[0]);
- }
- else{
- tweetFile = new File("synthetic_retweet_network.json");
- }
-
- DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
-
-
- PageRank<UserNode, RetweetEdge> pageRank = new PageRank<UserNode, RetweetEdge>(retweetGraph, .5);
- pageRank.evaluate();
-
- for(UserNode node : retweetGraph.getVertices()){
- System.out.println(node + " - " + pageRank.getVertexScore(node));
- }
-
-// EigenvectorCentrality<UserNode, RetweetEdge> eig = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph);
-// eig.evaluate();
-//
-// for(UserNode node : retweetGraph.getVertices()){
-// System.out.println(node + " - " + eig.getVertexScore(node));
-// }
- }
-}
diff --git a/src/Chapter4/classification/bayes/Classification.java b/src/Chapter4/classification/bayes/Classification.java
deleted file mode 100644
index ea9aba7..0000000
--- a/src/Chapter4/classification/bayes/Classification.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package Chapter4.classification.bayes;
-
-public class Classification {
- private String label;
- private double confidence;
-
- public Classification(String label, double confidence){
- this.label = label;
- this.confidence = confidence;
- }
-
- public String getLabel() {
- return label;
- }
- public double getConfidence() {
- return confidence;
- }
-
- public String toString(){
- return "(" + label + ", " + confidence + ")";
- }
-}
diff --git a/src/Chapter4/classification/bayes/NBCxv.java b/src/Chapter4/classification/bayes/NBCxv.java
deleted file mode 100644
index 5c48e28..0000000
--- a/src/Chapter4/classification/bayes/NBCxv.java
+++ /dev/null
@@ -1,60 +0,0 @@
-package Chapter4.classification.bayes;
-
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Map;
-
-import com.google.gson.JsonObject;
-import com.google.gson.JsonStreamParser;
-
-public class NBCxv {
- public static void main(String[] args){
-
- String filename = args.length >= 1 ? args[0] : "owsemoticons.json";
-
- ArrayList<String> allTexts = new ArrayList<String>();
-
- try {
- //read the file, and train each document
- JsonStreamParser parser = new JsonStreamParser(new FileReader(filename));
- JsonObject elem;
- while (parser.hasNext()) {
- elem = parser.next().getAsJsonObject();
- allTexts.add(elem.get("text").getAsString());
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- //do 5-fold cross validation 3 times
- Map<Integer, ArrayList<String>> buckets;
- int bucketIdx;
- NaiveBayesSentimentClassifier nbsc;
- for(int i = 0; i < 3; i++){
-
- //randomly split the texts into 5 buckets
- buckets = new HashMap<Integer, ArrayList<String>>();
- //initialize the 5 buckets
- for(int j = 0; j < 5; j++) buckets.put(j, new ArrayList<String>());
- for(String text : allTexts){
- bucketIdx = (int) (Math.random()*5);
- buckets.get(bucketIdx).add(text);
- }
-
- for(int j = 0; j < 5; j++){
- //use all but j as the training, use j as the test.
- nbsc = new NaiveBayesSentimentClassifier();
- for(int k = 0; k < 5; k++){
- if(k != j){
- nbsc.trainInstances(buckets.get(k));
- }
- }
- //test with bucket j
-
- }
- }
-
- }
-}
diff --git a/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java b/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java
deleted file mode 100644
index 923416c..0000000
--- a/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java
+++ /dev/null
@@ -1,264 +0,0 @@
-package Chapter4.classification.bayes;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
-
-/**
- * This class performs both the training and classification steps of a Naive Bayes Classifier.
- *
- */
-public class NaiveBayesSentimentClassifier {
- //the possible sentiment labels
- private static final String[] SENTIMENT_LABELS = {"happy", "sad"};
- //the tokens to look for in labeling the sentiment.
- private static final String[] HAPPY_SMILEYS = {":)", ";)", ":D", ":-)", ":o)", ":-D"};
- private static final String[] SAD_SMILEYS = {":(", ":-(", ":'(", ":'-(", "D:"};
- //store these as a set for faster retrieval
- private static final Set<String> HAPPY_SMILEY_SET = new HashSet<String>(Arrays.asList(HAPPY_SMILEYS));
- private static final Set<String> SAD_SMILEY_SET = new HashSet<String>(Arrays.asList(SAD_SMILEYS));
-
- //counter for the number of times each word has been associated with each sentiment.
- private Map<String, Integer[]> sentOccurs;
- //counter for the number of times we've seen each sentiment.
- private Integer[] sentCount;
-
- public NaiveBayesSentimentClassifier(){
- //initialize the counters
- sentOccurs = new HashMap<String, Integer[]>();
- sentCount = new Integer[SENTIMENT_LABELS.length];
- for(int i = 0; i < SENTIMENT_LABELS.length; i++){
- sentCount[i] = 0;
- }
- }
-
- /**
- * Tokenize a string. Turns string into list of words based on whitespace, then
- * removes stopwords, punctuation, and reduces the word to its stem.
- * @param text
- * The piece of text
- * @return
- * Each individual word.
- */
- private List<String> getTokens(String text){
- StringTokenizer tokens = new StringTokenizer(text);
- ArrayList<String> words = new ArrayList<String>();
-
- String tmp;
- StringBuilder sb;
- while(tokens.hasMoreTokens()){
- sb = new StringBuilder();
- tmp = tokens.nextToken();
- tmp = tmp.toLowerCase();
-
- for(char ch : tmp.toCharArray()){
- if(Character.isLetter(ch)){
- sb.append(ch);
- }
- }
- tmp = sb.toString();
- if(tmp.length() > 0 && !StopwordsList.stopwordsSet.contains(tmp)){
- words.add(sb.toString());
- }
- }
-
- return words;
- }
-
- /**
- * Checks if tweet has a "label" (emoticon). If so, stores the words in
- * the prior.
- * @param tweetText
- * The text of the document to check.
- */
- public void trainInstance(String tweetText){
- //see if the tweet is labeled (i.e. has a smiley)
- int tweetLabel = extractLabel(tweetText);
- List<String> tokens = getTokens(tweetText);
- if(tweetLabel != -1){
- //add these words to the classifier
- updateClassifier(tokens, tweetLabel);
- }
- }
-
- public String printWordOccurs(int sentIndex, int topN){
- StringBuilder sb = new StringBuilder();
-
- WordCountPair wpcset[] = new WordCountPair[sentOccurs.keySet().size()];
-
- String s;
- int t = 0;
- Iterator<String> sIter = sentOccurs.keySet().iterator();
-// int totalCount = 0;
-// while(sIter.hasNext()){
-// s = sIter.next();
-// totalCount += sentOccurs.get(s)[sentIndex];
-// }
-
- sIter = sentOccurs.keySet().iterator();
- while(sIter.hasNext()){
- s = sIter.next();
-// wpcset[t++] = new WordCountPair(s, sentOccurs.get(s)[sentIndex] * 1.0 / totalCount);
- wpcset[t++] = new WordCountPair(s, Math.sqrt(sentOccurs.get(s)[sentIndex] * 1.0 ));
- }
-
- Arrays.sort(wpcset);
-
- double frac;
- for(int i = 0; (i < topN || topN <= 0) && i < wpcset.length; i++){
- s = wpcset[i].getWord();
- frac = wpcset[i].getCount();
-
- sb.append(s);
- sb.append(":");
- sb.append(frac);
- sb.append("\n");
- }
-
- return sb.toString();
- }
-
- public void trainInstances(List<String> tweetTexts){
- for(String text : tweetTexts){
- trainInstance(text);
- }
- }
-
- /**
- * Classify a tweet as happy or sad. This ignores the emoticon for demonstration purposes.
- * @param tweetText
- * The text of the tweet
- * @return
- * A Classification object that returns the sentiment of the tweet.
- */
- public Classification classify(String tweetText){
- //stores the probability of each sentiment being the tweets true sentiment.
- double[] labelProbs = new double[SENTIMENT_LABELS.length];
- //tokenize the string
- List<String> tokens = getTokens(tweetText);
- int maxLabelIdx = 0;
- for(int i = 0; i < labelProbs.length; i++){
- //calculate the probability that the tweet has that sentiment.
- labelProbs[i] = calcLabelProb(tokens, i);
- System.out.println(i + " -> " + labelProbs[i] );
- //keep track of the label probability
- maxLabelIdx = labelProbs[i] > labelProbs[maxLabelIdx] ? i : maxLabelIdx;
- }
- //calc the confidence
- double conf = labelProbs[maxLabelIdx];
- labelProbs[maxLabelIdx] = 0;
- conf -= sumVector(labelProbs);
-
- return new Classification(SENTIMENT_LABELS[maxLabelIdx], conf);
- }
-
- private int extractLabel(String tweetText){
- StringTokenizer tokens = new StringTokenizer(tweetText);
- while(tokens.hasMoreTokens()){
- String token = tokens.nextToken();
- if(HAPPY_SMILEY_SET.contains(token)){
- return 0;
- }
- else if(SAD_SMILEY_SET.contains(token)){
- return 1;
- }
- }
- return -1;
- }
-
- /**
- * This updates the classifier's probabilites for each word
- * with the new piece of text.
- * @param tokens
- * The tokens in the tweet.
- * @param sentIndex
- * The sentiment label.
- */
- private void updateClassifier(List<String> tokens, int sentIndex){
- for(String token : tokens){
- if(sentOccurs.containsKey(token)){
- sentOccurs.get(token)[sentIndex] ++ ;
- }
- else{
- //make a new array and put it
- Integer[] newArray = {0, 0};
- newArray[sentIndex] ++;
- sentOccurs.put(token, newArray);
- }
- }
- //update the overall document count
- sentCount[sentIndex]++;
- }
-
- /**
- * The probability of the tweet having a given label.
- * @param tokens
- * The tokens in the tweet.
- * @param sentIndex
- * The probability we are testing.
- * @return
- * The probability the tweet has the class label indicated by "sentIndex".
- */
- private double calcLabelProb(List<String> tokens, int sentIndex){
-
- //calculate the class probabilities
- double[] pClass = new double[SENTIMENT_LABELS.length];
- int cSum = sumVector(sentCount);
- int totalWordCount = 0;
-
- for(int i = 0; i < sentCount.length; i++){
- pClass[i] = sentCount[i] * 1.0 / cSum;
- }
-
- for(String word : sentOccurs.keySet()){
- Integer[] wordCt = sentOccurs.get(word);
- totalWordCount = sumVector(wordCt);
- }
-
-
- double p = 1.0;
- boolean foundOne = false;
- for(String token : tokens){
- if(sentOccurs.containsKey(token)){
- foundOne = true;
- Integer[] probs = sentOccurs.get(token);
- double pWordGivenClass = probs[sentIndex] / (double)(sumVector(probs));
- double pWord = sumVector(probs) / totalWordCount;
- p *= pWordGivenClass * pClass[sentIndex] / pWord;
- }
- }
- return foundOne ? p : 0.0;
- }
-
- /**
- * Helper function to sum the values in a 1D array.
- * @param vector
- * The 1D array to sum.
- * @return
- * The sum.
- */
- private double sumVector(double[] vector){
- double sum = 0.0;
- for(double d : vector) sum += d;
- return sum;
- }
-
- /**
- * Helper function to sum the values in a 1D array.
- * @param vector
- * The 1D array to sum.
- * @return
- * The sum.
- */
- private int sumVector(Integer[] vector){
- int sum = 0;
- for(int d : vector) sum += d;
- return sum;
- }
-}
diff --git a/src/Chapter4/classification/bayes/StopwordsList.java b/src/Chapter4/classification/bayes/StopwordsList.java
deleted file mode 100644
index 06edd5a..0000000
--- a/src/Chapter4/classification/bayes/StopwordsList.java
+++ /dev/null
@@ -1,10 +0,0 @@
-package Chapter4.classification.bayes;
-
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-public class StopwordsList {
- private static final String[] stopwords = {"a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can", "did", "do", "does", "doing", "don", "down", "during", "each", "few", "for", "from", "further", "get", "had", "has", "have", "having", "he", "her", "here", "hers", "herself", "him", "himself", "his", "how", "i", "if", "im", "i'm", "in", "into", "is", "it", "its", "itself", "just", "me", "more", "most", "my", "myself", "no", "nor", "not", "now", "of", "off", "on", "once", "only", "or", "other", "our", "ours", "ourselves", "out", "over", "own", "rt", "s", "same", "she", "should", "so", "some", "such", "t", "than", "that", "the", "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", "up", "us", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", "yourselves"};
- public static final Set<String> stopwordsSet = new HashSet<String>(Arrays.asList(stopwords));
-}
diff --git a/src/Chapter4/classification/bayes/TestNBC.java b/src/Chapter4/classification/bayes/TestNBC.java
deleted file mode 100644
index 7e0e743..0000000
--- a/src/Chapter4/classification/bayes/TestNBC.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package Chapter4.classification.bayes;
-
-import java.io.FileReader;
-import java.io.IOException;
-
-import com.google.gson.JsonObject;
-import com.google.gson.JsonStreamParser;
-
-public class TestNBC {
- public static void main(String[] args){
-
- String filename = args.length >= 1 ? args[0] : "owsemoticons.json";
-
- //initialize the sentiment classifier
- NaiveBayesSentimentClassifier nbsc = new NaiveBayesSentimentClassifier();
-
- try {
- //read the file, and train each document
- JsonStreamParser parser = new JsonStreamParser(new FileReader(filename));
- JsonObject elem;
- String text;
- while (parser.hasNext()) {
- elem = parser.next().getAsJsonObject();
- text = elem.get("text").getAsString();
- nbsc.trainInstance(text);
- }
-
- //print out the positive and negative dictionary
- System.out.println("=== Positive Dictionary ===");
- System.out.println(nbsc.printWordOccurs(0, 25));
- System.out.println("=== Negative Dictionary ===");
- System.out.println(nbsc.printWordOccurs(1, 25));
-
- //now go through and classify each line as positive or negative
-// parser = new JsonStreamParser(new FileReader(filename));
-// while (parser.hasNext()) {
-// elem = parser.next().getAsJsonObject();
-// text = elem.get("text").getAsString();
-// Classification c = nbsc.classify(text);
-// System.out.println(c + " -> " + text);
-// }
- System.out.println(nbsc.classify("I love new york"));
-
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- }
-}
diff --git a/src/Chapter4/classification/bayes/WordCountPair.java b/src/Chapter4/classification/bayes/WordCountPair.java
deleted file mode 100644
index b96be92..0000000
--- a/src/Chapter4/classification/bayes/WordCountPair.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package Chapter4.classification.bayes;
-
-public class WordCountPair implements Comparable<WordCountPair>{
-
-
- private String word;
- private double count;
-
- public WordCountPair(String word, double count){
- this.word = word;
- this.count = count;
- }
-
- public int compareTo(WordCountPair arg0) {
- return arg0.count - count < 0 ? -1 : 1;
- }
-
- public String getWord() {
- return word;
- }
-
- public void setWord(String word) {
- this.word = word;
- }
-
- public double getCount() {
- return count;
- }
-
- public void setCount(int count) {
- this.count = count;
- }
-
-}
diff --git a/src/Chapter4/graph/visualization/SimpleGraphViewer.java b/src/Chapter4/graph/visualization/SimpleGraphViewer.java
deleted file mode 100644
index 7cb46e4..0000000
--- a/src/Chapter4/graph/visualization/SimpleGraphViewer.java
+++ /dev/null
@@ -1,86 +0,0 @@
-package chapter4.graph.visualization;
-
-import Chapter4.util.TweetFileToGraph;
-import java.awt.Dimension;
-import java.awt.Shape;
-import java.awt.geom.Ellipse2D;
-import java.io.File;
-
-import javax.swing.JFrame;
-
-import org.apache.commons.collections15.Transformer;
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import edu.uci.ics.jung.algorithms.layout.KKLayout;
-import edu.uci.ics.jung.algorithms.layout.Layout;
-import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality;
-import edu.uci.ics.jung.graph.DirectedGraph;
-import edu.uci.ics.jung.visualization.BasicVisualizationServer;
-
-public class SimpleGraphViewer {
- public static void main(String[] args){
-
- File tweetFile;
-
- if(args.length > 0){
- tweetFile = new File(args[0]);
- }
- else{
- tweetFile = new File("synthetic_retweet_network.json");
- }
-
- DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
-
- /*
- * Converts a node to its string representation
- */
- Transformer<UserNode, String> stringer = new Transformer<UserNode, String>(){
- public String transform(UserNode n){
- return n.toString();
- }
- };
-
- /*
- * Calculate the centrality
- */
- //calculate the betweenness centrality
-// final InDegreeScorer<UserNode> centralityScore = new InDegreeScorer<UserNode>(retweetGraph);
-// final BetweennessCentrality<UserNode, RetweetEdge> centralityScore = new BetweennessCentrality<UserNode, RetweetEdge>(retweetGraph);
-// final PageRank<UserNode, RetweetEdge> centralityScore = new PageRank<UserNode, RetweetEdge>(retweetGraph, 0.85);
- final EigenvectorCentrality<UserNode, RetweetEdge> centralityScore = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph);
- centralityScore.evaluate();
-
- double centralityMax = 0.0f;
- for(UserNode node : retweetGraph.getVertices()){
- centralityMax = Math.max(centralityMax, centralityScore.getVertexScore(node));
- }
- final double centralityMaxFinal = centralityMax;
-
- /*
- * Sizes a node by some centrality measure
- */
- Transformer<UserNode, Shape> shaper = new Transformer<UserNode, Shape>(){
- public Shape transform(UserNode n){
- System.out.println("User: " + n.getUsername() + " Cent: " + centralityScore.getVertexScore(n) + " Max: " + centralityMaxFinal);
- double radius = 50 * (centralityScore.getVertexScore(n)) / centralityMaxFinal;
- radius = Math.max(radius, 5.0f);
- float fRadius = (float) radius;
- return new Ellipse2D.Float(-fRadius/2, -fRadius/2, fRadius, fRadius);
- }
- };
-
- Layout<UserNode, RetweetEdge> layout = new KKLayout<UserNode, RetweetEdge>(retweetGraph);
- layout.setSize(new Dimension(500, 500));
-
- BasicVisualizationServer<UserNode, RetweetEdge> vv = new BasicVisualizationServer<UserNode, RetweetEdge>(layout);
- vv.setPreferredSize(new Dimension(550, 550));
- vv.getRenderContext().setVertexLabelTransformer(stringer);
- vv.getRenderContext().setVertexShapeTransformer(shaper);
-
- JFrame jframe = new JFrame("Simple Graph View");
- jframe.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
- jframe.getContentPane().add(vv);
- jframe.pack();
- jframe.setVisible(true);
- }
-}
diff --git a/src/Chapter4/tweetlda/LDA.java b/src/Chapter4/tweetlda/LDA.java
deleted file mode 100644
index ca7f9a3..0000000
--- a/src/Chapter4/tweetlda/LDA.java
+++ /dev/null
@@ -1,89 +0,0 @@
-package tweetlda;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.TreeSet;
-import java.util.regex.Pattern;
-
-import org.json.JSONObject;
-
-import cc.mallet.pipe.CharSequence2TokenSequence;
-import cc.mallet.pipe.CharSequenceLowercase;
-import cc.mallet.pipe.Pipe;
-import cc.mallet.pipe.SerialPipes;
-import cc.mallet.pipe.TokenSequence2FeatureSequence;
-import cc.mallet.pipe.TokenSequenceRemoveStopwords;
-import cc.mallet.pipe.iterator.StringArrayIterator;
-import cc.mallet.topics.ParallelTopicModel;
-import cc.mallet.types.Alphabet;
-import cc.mallet.types.IDSorter;
-import cc.mallet.types.InstanceList;
-
-public class LDA {
-
- private static final String STOP_WORDS = "stopwords.txt";
- private static final int ITERATIONS = 100;
- private static final int THREADS = 4;
- private static final int NUM_TOPICS = 25;
- private static final int NOM_WORDS_TO_ANALYZE = 25;
-
- public static void main(String[] args) throws Exception {
- ArrayList<Pipe> pipeList = new ArrayList<Pipe>();
- File stopwords = new File(STOP_WORDS);
-
- String inputFileName = args.length >= 1 ? args[0] : "testows.json";
-
- File inputFile = new File(inputFileName);
-
- // Lowercase, tokenize, remove stopwords, stem, and convert to features
- pipeList.add((Pipe) new CharSequenceLowercase());
- pipeList.add((Pipe) new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")));
- pipeList.add((Pipe) new TokenSequenceRemoveStopwords(stopwords, "UTF-8", false, false, false));
- pipeList.add((Pipe) new PorterStemmer());
- pipeList.add((Pipe) new TokenSequence2FeatureSequence());
-
- InstanceList instances = new InstanceList(new SerialPipes(pipeList));
-
- BufferedReader fileReader = new BufferedReader(new FileReader(inputFile));
- LinkedList<String> textList = new LinkedList<String>();
- String line;
- while((line = fileReader.readLine()) != null){
- JSONObject elem = new JSONObject(line);
- if(elem.has("text")){
- textList.add(elem.getString("text"));
- }
- }
-
- instances.addThruPipe(new StringArrayIterator(textList.toArray(new String[textList.size()])));
-
- ParallelTopicModel model = new ParallelTopicModel(NUM_TOPICS);
- model.addInstances(instances);
- model.setNumThreads(THREADS);
- model.setNumIterations(ITERATIONS);
- model.estimate();
-
- // The data alphabet maps word IDs to strings
- Alphabet dataAlphabet = instances.getDataAlphabet();
-
- int topicIdx=0;
- StringBuilder sb;
- for (TreeSet<IDSorter> set : model.getSortedWords()) {
- sb = new StringBuilder().append(topicIdx);
- sb.append(" - ");
- int j = 0;
- double sum = 0.0;
- for (IDSorter s : set) {
- sum += s.getWeight();
- }
- for (IDSorter s : set) {
- sb.append(dataAlphabet.lookupObject(s.getID())).append(":").append(s.getWeight() / sum).append(", ");
- if (++j >= NOM_WORDS_TO_ANALYZE) break;
- }
- System.out.println(sb.append("\n").toString());
- topicIdx++;
- }
- }
-}
diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java
deleted file mode 100644
index 1a7149e..0000000
--- a/src/Chapter4/tweetlda/PorterStemmer.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package tweetlda;
-
-import cc.mallet.pipe.Pipe;
-import cc.mallet.types.Instance;
-import cc.mallet.types.TokenSequence;
-
-public class PorterStemmer extends Pipe {
-
- private static final long serialVersionUID = 154100332101873830L;
-
- public Instance pipe(Instance carrier){
- TokenSequence ts = (TokenSequence) carrier.getData();
- String word;
- Stemmer s;
-
- for(int i = 0; i < ts.size(); i++){
- word = ts.get(i).getText();
- //stem the word
- s = new Stemmer();
- for(char ch : word.toCharArray()){
- if(Character.isLetter(ch)){
- s.add(ch);
- }
- }
- s.stem();
- ts.get(i).setText(s.toString());
- }
- carrier.setData(ts);
-
- return carrier;
- }
-
-}
diff --git a/src/Chapter4/tweetlda/Stemmer.java b/src/Chapter4/tweetlda/Stemmer.java
deleted file mode 100644
index f06dfc6..0000000
--- a/src/Chapter4/tweetlda/Stemmer.java
+++ /dev/null
@@ -1,428 +0,0 @@
-package tweetlda;
-
-
-/*
-
- Porter stemmer in Java. The original paper is in
-
- Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
- no. 3, pp 130-137,
-
- See also http://www.tartarus.org/~martin/PorterStemmer
-
- History:
-
- Release 1
-
- Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
- The words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
- is then out outside the bounds of b.
-
- Release 2
-
- Similarly,
-
- Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
- 'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
- b[j] is then outside the bounds of b.
-
- Release 3
-
- Considerably revised 4/9/00 in the light of many helpful suggestions
- from Brian Goetz of Quiotix Corporation (brian@quiotix.com).
-
- Release 4
-
-*/
-
-import java.io.*;
-
-/**
- * Stemmer, implementing the Porter Stemming Algorithm
- *
- * The Stemmer class transforms a word into its root form. The input
- * word can be provided a character at time (by calling add()), or at once
- * by calling one of the various stem(something) methods.
- */
-
-class Stemmer
-{ private char[] b;
- private int i, /* offset into b */
- i_end, /* offset to end of stemmed word */
- j, k;
- private static final int INC = 50;
- /* unit of size whereby b is increased */
- public Stemmer()
- { b = new char[INC];
- i = 0;
- i_end = 0;
- }
-
- /**
- * Add a character to the word being stemmed. When you are finished
- * adding characters, you can call stem(void) to stem the word.
- */
-
- public void add(char ch)
- { if (i == b.length)
- { char[] new_b = new char[i+INC];
- for (int c = 0; c < i; c++) new_b[c] = b[c];
- b = new_b;
- }
- b[i++] = ch;
- }
-
-
- /** Adds wLen characters to the word being stemmed contained in a portion
- * of a char[] array. This is like repeated calls of add(char ch), but
- * faster.
- */
-
- public void add(char[] w, int wLen)
- { if (i+wLen >= b.length)
- { char[] new_b = new char[i+wLen+INC];
- for (int c = 0; c < i; c++) new_b[c] = b[c];
- b = new_b;
- }
- for (int c = 0; c < wLen; c++) b[i++] = w[c];
- }
-
- /**
- * After a word has been stemmed, it can be retrieved by toString(),
- * or a reference to the internal buffer can be retrieved by getResultBuffer
- * and getResultLength (which is generally more efficient.)
- */
- public String toString() { return new String(b,0,i_end); }
-
- /**
- * Returns the length of the word resulting from the stemming process.
- */
- public int getResultLength() { return i_end; }
-
- /**
- * Returns a reference to a character buffer containing the results of
- * the stemming process. You also need to consult getResultLength()
- * to determine the length of the result.
- */
- public char[] getResultBuffer() { return b; }
-
- /* cons(i) is true <=> b[i] is a consonant. */
-
- private final boolean cons(int i)
- { switch (b[i])
- { case 'a': case 'e': case 'i': case 'o': case 'u': return false;
- case 'y': return (i==0) ? true : !cons(i-1);
- default: return true;
- }
- }
-
- /* m() measures the number of consonant sequences between 0 and j. if c is
- a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
- presence,
-
- <c><v> gives 0
- <c>vc<v> gives 1
- <c>vcvc<v> gives 2
- <c>vcvcvc<v> gives 3
- ....
- */
-
- private final int m()
- { int n = 0;
- int i = 0;
- while(true)
- { if (i > j) return n;
- if (! cons(i)) break; i++;
- }
- i++;
- while(true)
- { while(true)
- { if (i > j) return n;
- if (cons(i)) break;
- i++;
- }
- i++;
- n++;
- while(true)
- { if (i > j) return n;
- if (! cons(i)) break;
- i++;
- }
- i++;
- }
- }
-
- /* vowelinstem() is true <=> 0,...j contains a vowel */
-
- private final boolean vowelinstem()
- { int i; for (i = 0; i <= j; i++) if (! cons(i)) return true;
- return false;
- }
-
- /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
-
- private final boolean doublec(int j)
- { if (j < 1) return false;
- if (b[j] != b[j-1]) return false;
- return cons(j);
- }
-
- /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
- and also if the second c is not w,x or y. this is used when trying to
- restore an e at the end of a short word. e.g.
-
- cav(e), lov(e), hop(e), crim(e), but
- snow, box, tray.
-
- */
-
- private final boolean cvc(int i)
- { if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false;
- { int ch = b[i];
- if (ch == 'w' || ch == 'x' || ch == 'y') return false;
- }
- return true;
- }
-
- private final boolean ends(String s)
- { int l = s.length();
- int o = k-l+1;
- if (o < 0) return false;
- for (int i = 0; i < l; i++) if (b[o+i] != s.charAt(i)) return false;
- j = k-l;
- return true;
- }
-
- /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
- k. */
-
- private final void setto(String s)
- { int l = s.length();
- int o = j+1;
- for (int i = 0; i < l; i++) b[o+i] = s.charAt(i);
- k = j+l;
- }
-
- /* r(s) is used further down. */
-
- private final void r(String s) { if (m() > 0) setto(s); }
-
- /* step1() gets rid of plurals and -ed or -ing. e.g.
-
- caresses -> caress
- ponies -> poni
- ties -> ti
- caress -> caress
- cats -> cat
-
- feed -> feed
- agreed -> agree
- disabled -> disable
-
- matting -> mat
- mating -> mate
- meeting -> meet
- milling -> mill
- messing -> mess
-
- meetings -> meet
-
- */
-
- private final void step1()
- { if (b[k] == 's')
- { if (ends("sses")) k -= 2; else
- if (ends("ies")) setto("i"); else
- if (b[k-1] != 's') k--;
- }
- if (ends("eed")) { if (m() > 0) k--; } else
- if ((ends("ed") || ends("ing")) && vowelinstem())
- { k = j;
- if (ends("at")) setto("ate"); else
- if (ends("bl")) setto("ble"); else
- if (ends("iz")) setto("ize"); else
- if (doublec(k))
- { k--;
- { int ch = b[k];
- if (ch == 'l' || ch == 's' || ch == 'z') k++;
- }
- }
- else if (m() == 1 && cvc(k)) setto("e");
- }
- }
-
- /* step2() turns terminal y to i when there is another vowel in the stem. */
-
- private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; }
-
- /* step3() maps double suffices to single ones. so -ization ( = -ize plus
- -ation) maps to -ize etc. note that the string before the suffix must give
- m() > 0. */
-
- private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1])
- {
- case 'a': if (ends("ational")) { r("ate"); break; }
- if (ends("tional")) { r("tion"); break; }
- break;
- case 'c': if (ends("enci")) { r("ence"); break; }
- if (ends("anci")) { r("ance"); break; }
- break;
- case 'e': if (ends("izer")) { r("ize"); break; }
- break;
- case 'l': if (ends("bli")) { r("ble"); break; }
- if (ends("alli")) { r("al"); break; }
- if (ends("entli")) { r("ent"); break; }
- if (ends("eli")) { r("e"); break; }
- if (ends("ousli")) { r("ous"); break; }
- break;
- case 'o': if (ends("ization")) { r("ize"); break; }
- if (ends("ation")) { r("ate"); break; }
- if (ends("ator")) { r("ate"); break; }
- break;
- case 's': if (ends("alism")) { r("al"); break; }
- if (ends("iveness")) { r("ive"); break; }
- if (ends("fulness")) { r("ful"); break; }
- if (ends("ousness")) { r("ous"); break; }
- break;
- case 't': if (ends("aliti")) { r("al"); break; }
- if (ends("iviti")) { r("ive"); break; }
- if (ends("biliti")) { r("ble"); break; }
- break;
- case 'g': if (ends("logi")) { r("log"); break; }
- } }
-
- /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
-
- private final void step4() { switch (b[k])
- {
- case 'e': if (ends("icate")) { r("ic"); break; }
- if (ends("ative")) { r(""); break; }
- if (ends("alize")) { r("al"); break; }
- break;
- case 'i': if (ends("iciti")) { r("ic"); break; }
- break;
- case 'l': if (ends("ical")) { r("ic"); break; }
- if (ends("ful")) { r(""); break; }
- break;
- case 's': if (ends("ness")) { r(""); break; }
- break;
- } }
-
- /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
-
- private final void step5()
- { if (k == 0) return; /* for Bug 1 */ switch (b[k-1])
- { case 'a': if (ends("al")) break; return;
- case 'c': if (ends("ance")) break;
- if (ends("ence")) break; return;
- case 'e': if (ends("er")) break; return;
- case 'i': if (ends("ic")) break; return;
- case 'l': if (ends("able")) break;
- if (ends("ible")) break; return;
- case 'n': if (ends("ant")) break;
- if (ends("ement")) break;
- if (ends("ment")) break;
- /* element etc. not stripped before the m */
- if (ends("ent")) break; return;
- case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
- /* j >= 0 fixes Bug 2 */
- if (ends("ou")) break; return;
- /* takes care of -ous */
- case 's': if (ends("ism")) break; return;
- case 't': if (ends("ate")) break;
- if (ends("iti")) break; return;
- case 'u': if (ends("ous")) break; return;
- case 'v': if (ends("ive")) break; return;
- case 'z': if (ends("ize")) break; return;
- default: return;
- }
- if (m() > 1) k = j;
- }
-
- /* step6() removes a final -e if m() > 1. */
-
- private final void step6()
- { j = k;
- if (b[k] == 'e')
- { int a = m();
- if (a > 1 || a == 1 && !cvc(k-1)) k--;
- }
- if (b[k] == 'l' && doublec(k) && m() > 1) k--;
- }
-
- /** Stem the word placed into the Stemmer buffer through calls to add().
- * Returns true if the stemming process resulted in a word different
- * from the input. You can retrieve the result with
- * getResultLength()/getResultBuffer() or toString().
- */
- public void stem()
- { k = i - 1;
- if (k > 1) { step1(); step2(); step3(); step4(); step5(); step6(); }
- i_end = k+1; i = 0;
- }
-
- /** Test program for demonstrating the Stemmer. It reads text from a
- * a list of files, stems each word, and writes the result to standard
- * output. Note that the word stemmed is expected to be in lower case:
- * forcing lower case must be done outside the Stemmer class.
- * Usage: Stemmer file-name file-name ...
- */
- public static void main(String[] args)
- {
- char[] w = new char[501];
- Stemmer s = new Stemmer();
- for (int i = 0; i < args.length; i++)
- try
- {
- FileInputStream in = new FileInputStream(args[i]);
-
- try
- { while(true)
-
- { int ch = in.read();
- if (Character.isLetter((char) ch))
- {
- int j = 0;
- while(true)
- { ch = Character.toLowerCase((char) ch);
- w[j] = (char) ch;
- if (j < 500) j++;
- ch = in.read();
- if (!Character.isLetter((char) ch))
- {
- /* to test add(char ch) */
- for (int c = 0; c < j; c++) s.add(w[c]);
-
- /* or, to test add(char[] w, int j) */
- /* s.add(w, j); */
-
- s.stem();
- { String u;
-
- /* and now, to test toString() : */
- u = s.toString();
-
- /* to test getResultBuffer(), getResultLength() : */
- /* u = new String(s.getResultBuffer(), 0, s.getResultLength()); */
-
- System.out.print(u);
- }
- break;
- }
- }
- }
- if (ch < 0) break;
- System.out.print((char)ch);
- }
- }
- catch (IOException e)
- { System.out.println("error reading " + args[i]);
- break;
- }
- }
- catch (FileNotFoundException e)
- { System.out.println("file " + args[i] + " not found");
- break;
- }
- }
-}
diff --git a/src/Chapter4/util/BetweennessScorer.java b/src/Chapter4/util/BetweennessScorer.java
deleted file mode 100644
index 0926d34..0000000
--- a/src/Chapter4/util/BetweennessScorer.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package util;
-
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
-import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath;
-import edu.uci.ics.jung.graph.Graph;
-import edu.uci.ics.jung.graph.Hypergraph;
-
-public class BetweennessScorer implements VertexScorer<UserNode, Double>{
-
- public BetweennessScorer(Hypergraph<UserNode, RetweetEdge> graph){
- /*
- * Step 1: Calculate the shortest path between each pair of nodes.
- */
- DijkstraShortestPath<UserNode, RetweetEdge> paths = new DijkstraShortestPath<UserNode, RetweetEdge>((Graph<UserNode, RetweetEdge>) graph);
-// paths.getDistance(source, target);
- }
-
- public Double getVertexScore(UserNode arg0) {
- // TODO Auto-generated method stub
- return null;
- }
-
-}
diff --git a/src/Chapter4/util/EigenVectorScorer.java b/src/Chapter4/util/EigenVectorScorer.java
deleted file mode 100644
index da0c1a8..0000000
--- a/src/Chapter4/util/EigenVectorScorer.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package Chapter4.util;
-
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-import cern.colt.matrix.DoubleMatrix2D;
-import cern.colt.matrix.impl.SparseDoubleMatrix2D;
-import cern.colt.matrix.linalg.EigenvalueDecomposition;
-import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
-import edu.uci.ics.jung.graph.Hypergraph;
-
-/**
- * This is a Jung Node Scorer that computes the Eigenvector Centrality for each node.
- */
-public class EigenVectorScorer implements VertexScorer<UserNode, Double> {
-
- private UserNode[] users;
- private DoubleMatrix2D eigenVectors;
- private int dominantEigenvectorIdx;
-
- public EigenVectorScorer(Hypergraph<UserNode, RetweetEdge> graph){
- users = new UserNode[graph.getVertexCount()];
- graph.getVertices().toArray(users);
-
- /* Step 1: Create the adjacency matrix.
- *
- * An adjacency matrix is a matrix with N users and N columns,
- * where N is the number of nodes in the network.
- * An entry in the matrix is 1 when node i connects to node j,
- * and 0 otherwise.
- */
- SparseDoubleMatrix2D matrix = new SparseDoubleMatrix2D(users.length, users.length);
- for(int i = 0; i < users.length; i++){
- for(int j = 0; j < users.length; j++){
- matrix.setQuick(i, j, graph.containsEdge(new RetweetEdge(users[i], users[j])) ? 1 : 0);
- }
- }
-
- /* Step 2: Find the principle eigenvector.
- * For more information on eigen-decomposition please see
- * http://mathworld.wolfram.com/EigenDecomposition.html
- */
- EigenvalueDecomposition eig = new EigenvalueDecomposition(matrix);
- DoubleMatrix2D eigenVals = eig.getD();
- eigenVectors = eig.getV();
-
- dominantEigenvectorIdx = 0;
- for(int i = 1; i < eigenVals.columns(); i++){
- if(eigenVals.getQuick(dominantEigenvectorIdx, dominantEigenvectorIdx) <
- eigenVals.getQuick(i, i)){
- dominantEigenvectorIdx = i;
- }
- }
- }
-
- public Double getVertexScore(UserNode arg0) {
- for(int i = 0; i < users.length; i++){
- if(users[i].equals(arg0)){
- return Math.abs(eigenVectors.getQuick(i, dominantEigenvectorIdx));
- }
- }
- return null;
- }
-
-}
diff --git a/src/Chapter4/util/InDegreeScorer.java b/src/Chapter4/util/InDegreeScorer.java
deleted file mode 100644
index 014adc6..0000000
--- a/src/Chapter4/util/InDegreeScorer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package Chapter4.util;
-
-import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
-import edu.uci.ics.jung.graph.Hypergraph;
-
-/**
- * This is a Jung Node Scorer that computes the
- * In-Degree Centrality of nodes.
- */
-public class InDegreeScorer<T> implements VertexScorer<T, Double>{
-
- //The graph representation in JUNG.
- private Hypergraph<T, ?> graph;
-
- /**
- * Initialize the graph scorer.
- * @param graph
- * The graph we wish to score.
- */
- public InDegreeScorer(Hypergraph<T, ?> graph){
- this.graph = graph;
- }
-
- /**
- * @return The In-Degree Centrality of the vertex.
- */
- public Double getVertexScore(T node) {
- return (double) graph.getInEdges(node).size();
- }
-} \ No newline at end of file
diff --git a/src/Chapter4/util/TweetFileProcessor.java b/src/Chapter4/util/TweetFileProcessor.java
deleted file mode 100644
index 9b6b99c..0000000
--- a/src/Chapter4/util/TweetFileProcessor.java
+++ /dev/null
@@ -1,76 +0,0 @@
-package Chapter4.util;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
-import java.util.Iterator;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class TweetFileProcessor implements Iterator<JSONObject>{
-
- protected BufferedReader fileBuffer;
- protected boolean endOfFile;
- protected String nextLine;
-
- public TweetFileProcessor(File f){
-
- endOfFile = false;
-
- InputStreamReader isr;
- BufferedReader br = null;
- try {
- isr = new InputStreamReader(new FileInputStream(f), "UTF-8");
- br = new BufferedReader(isr);
- nextLine = br.readLine();
- } catch (UnsupportedEncodingException e) {
- e.printStackTrace();
- endOfFile = true;
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- endOfFile = true;
- } catch (IOException e) {
- e.printStackTrace();
- endOfFile = true;
- }
- finally{
- fileBuffer = br;
- }
- }
-
- @Override
- public boolean hasNext() {
- return !endOfFile;
- }
-
- @Override
- public JSONObject next() {
- JSONObject obj = null;
- try {
- obj = new JSONObject(nextLine);
- } catch (JSONException ex) {
- Logger.getLogger(TweetFileProcessor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- nextLine = fileBuffer.readLine();
- if(nextLine == null){
- endOfFile = true;
- }
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- return obj;
- }
-
- @Override
- public void remove() throws UnsupportedOperationException{
- throw new UnsupportedOperationException();
- }
-}
diff --git a/src/Chapter4/util/TweetFileToGraph.java b/src/Chapter4/util/TweetFileToGraph.java
deleted file mode 100644
index 6cf2e3a..0000000
--- a/src/Chapter4/util/TweetFileToGraph.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package Chapter4.util;
-
-import java.io.File;
-
-import GraphElements.RetweetEdge;
-import GraphElements.UserNode;
-
-import edu.uci.ics.jung.graph.DirectedGraph;
-import edu.uci.ics.jung.graph.DirectedSparseGraph;
-import edu.uci.ics.jung.graph.util.EdgeType;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-/**
- * Some basic functionality to convert files collected
- * in Chapter 2 to JUNG graphs.
- */
-public class TweetFileToGraph {
-
- public static DirectedGraph<UserNode, RetweetEdge> getRetweetNetwork(File tweetFile){
-
- JSONObject tmp;
-
- TweetFileProcessor tfp = new TweetFileProcessor(tweetFile);
- DirectedSparseGraph<UserNode, RetweetEdge> dsg = new DirectedSparseGraph<UserNode, RetweetEdge>();
-
- while (tfp.hasNext()){
- tmp = tfp.next();
- if(tmp==null)
- {
- continue;
- }
- //get the author
- String user=null;
- try {
- user = tmp.getJSONObject("user").getString("screen_name");
- } catch (JSONException ex) {
- Logger.getLogger(TweetFileToGraph.class.getName()).log(Level.SEVERE, null, ex);
- }
- if(user==null)
- {
- continue;
- }
- //get the retweeted user
- try{
- JSONObject retweet = tmp.getJSONObject("retweeted_status");
- String retweeted_user = retweet.getJSONObject("user").getString("screen_name");
-
- //make an edge or increment the weight if it exists.
- UserNode toUser = new UserNode(retweeted_user);
- UserNode fromUser = new UserNode(user);
-
- dsg.addVertex(toUser);
- dsg.addVertex(fromUser);
-
- RetweetEdge edge = new RetweetEdge(toUser, fromUser);
-
- if(dsg.containsEdge(edge)){
- dsg.findEdge(fromUser, toUser).incrementRTCount();
- }
- else{
- dsg.addEdge(edge, fromUser, toUser);
- }
- dsg.addEdge(edge, fromUser, toUser, EdgeType.DIRECTED);
- }
- catch(JSONException ex){
- //the tweet is not a retweet. this is not a problem.
- }
-
-
- }
-
- return dsg;
- }
-}
diff --git a/src/Chapter5/network/CreateD3Network.java b/src/Chapter5/network/CreateD3Network.java
deleted file mode 100644
index d4c25af..0000000
--- a/src/Chapter5/network/CreateD3Network.java
+++ /dev/null
@@ -1,716 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.network;
-
-
-import Chapter5.support.HashTagDS;
-import Chapter5.support.NetworkNode;
-import Chapter5.support.NodeIDComparator;
-import Chapter5.support.NodeSizeComparator;
-import Chapter5.support.ToNodeInfo;
-import Chapter5.support.Tweet;
-import java.io.BufferedReader;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-import utils.TextUtils;
-
-/**
- *
- * @author shamanth
- */
-public class CreateD3Network
-{
- static final String DEF_INFILENAME = "ows.json";
- private String RTPATTERN = "rt @[_a-zA-Z0-9]+";
- private final int DEFAULT_NODE_SIZE = 0;
-// private final int NODE_COUNT_LIMIT = 1;
- //private final String[] node_color_scheme = new String[]{"#FFFFD9","#EDF8B1","#C7E9B4","#7FCDBB","#41B6C4","#1D91C0","#225EA8","#253494","#081D58"};
- //private final String[] node_color_scheme = new String[]{"#A6BDDB","#74A9CF","#3690C0","#0570B0","#045A8D","#023858"};
-
- /**
- * Extracts the users who have been retweeted using the RTPATTERN
- * @param text
- * @return
- */
- public ArrayList<String> GetRTUsers(String text)
- {
- Pattern p = Pattern.compile(RTPATTERN, Pattern.CASE_INSENSITIVE);
- Matcher m = p.matcher(text);
- ArrayList<String> rtusers = new ArrayList<String>();
- while(m.find())
- {
- String nuser = text.substring(m.start(),m.end());
- nuser = nuser.replaceAll("rt @|RT @", "");
-// nuser = nuser.replaceAll("RT @", "");
- rtusers.add(nuser.toLowerCase());
- }
- return rtusers;
- }
-
- /**
- * Identifies the category to which the tweet belongs. Each category is defined by a group of words/hashtags
- * @param tweet
- * @param usercategories
- * @return
- */
- public int GetCategory(String tweet, HashTagDS[] usercategories)
- {
- HashMap<Integer,Integer> categoryvotes = new HashMap<Integer,Integer>();
- tweet = tweet.toLowerCase();
- int i=0;
- for(HashTagDS cat:usercategories)
- {
-
- for(String s :cat.tags)
- {
- if(tweet.indexOf(s)!=-1)
- {
- if(categoryvotes.containsKey(i))
- {
- categoryvotes.put(i, categoryvotes.get(i)+1);
- }
- else
- {
- categoryvotes.put(i, 1);
- }
- }
- }
- i++;
- }
- Set<Integer> keyset = categoryvotes.keySet();
- int maxvote = 0;
- //by default the tweet will be in the first category
- int maxcategoryindex = 0;
- for(int key:keyset)
- {
- if(categoryvotes.get(key)>maxvote)
- {
- maxvote = categoryvotes.get(key);
- maxcategoryindex = key;
- }
- }
- return maxcategoryindex;
- }
-
- /**
- * Converts the input jsonobject containing category descriptions to an array for processing.
- * @param hashtagcoll JSONObject containing the list of hashtags, color, and the topic information
- * @return An array of hashtags
- */
- public HashTagDS[] ConvertJSONArrayToArray(JSONObject hashtagcoll)
- {
- HashTagDS[] hashtags = new HashTagDS[hashtagcoll.length()];
- int j=0;
- try{
- if(hashtagcoll!=null)
- {
- Iterator keyit = hashtagcoll.keys();
- while(keyit.hasNext())
- {
- HashTagDS ht = new HashTagDS();
- JSONObject tags = (JSONObject) hashtagcoll.get((String)keyit.next());
- ht.groupname = keyit.toString();
- ht.color = tags.getString("color");
- JSONArray tagjson = tags.getJSONArray("hts");
- ht.tags = new String[tagjson.length()];
- for(int i=0;i<tagjson.length();i++)
- {
- ht.tags[i] = tagjson.getString(i);
- }
- hashtags[j++] = ht;
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- return hashtags;
- }
-
- /**
- * Identifies the category of a node based on the content of his tweets(each tweet can be assigned a category based on it's text). A simple majority is sufficient to make this decision.
- * @param tnfs
- * @param hashtagarray
- * @return
- */
- public int GetMajorityTopicColor(NetworkNode tnfs,HashTagDS[] hashtagarray)
- {
- HashMap<Integer,Integer> catcount = new HashMap<Integer,Integer>();
- //if the node has no tolinks then look at the node that it retweeted to decide the color of the node
- for(String tweet:tnfs.data)
- {
- int id = this.GetCategory(tweet, hashtagarray);
- if(catcount.containsKey(id))
- {
- catcount.put(id, catcount.get(id)+1);
- }
- else
- catcount.put(id, 1);
- }
- Set<Integer> keys = catcount.keySet();
- int maxcatID = -1;
- int maxcount = 0;
- for(int k:keys)
- {
- if(maxcatID==-1)
- {
- maxcatID = k;
- maxcount = catcount.get(k);
- }
- else
- {
- if(maxcount<catcount.get(k))
- {
- maxcount = catcount.get(k);
- maxcatID = k;
- }
- }
- }
- return maxcatID;
- }
-
- /**
- * Takes as input a JSON file and reads through the file sequentially to process and create a retweet network from the tweets.
- * @param inFilename
- * @param numNodeClasses
- * @param hashtags category info containing hashtags
- * @param num_nodes number of seed nodes to be included in the network
- * @return a JSONObject consisting of nodes and links of the network
- */
- public JSONObject ConvertTweetsToDiffusionPath(String inFilename,int numNodeClasses,
- JSONObject hashtags, int num_nodes)
- {
- HashMap<String,NetworkNode> userconnections = new HashMap<String,NetworkNode>();
-// HashMap<String,Integer> tweet_class_codes = new HashMap<String,Integer>();
-// int tweet_class_counter = 1;
- HashTagDS[] hashtagarray = ConvertJSONArrayToArray(hashtags);
- BufferedReader br = null;
- try{
- br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- JSONObject tweetobj;
- try {
- tweetobj = new JSONObject(temp);
- } catch (JSONException ex) {
- ex.printStackTrace();
- continue;
- }
- //Extract the tweet first
- Tweet t = new Tweet();
- String text="";
- try {
- text = TextUtils.GetCleanText(tweetobj.getString("text")).toLowerCase();
- } catch (JSONException ex) {
- ex.printStackTrace();
- continue;
- }
- //Check that the tweet matches at least one of the topics
- boolean groupmatch = false;
- for(HashTagDS ht:hashtagarray)
- {
- String[] tags = ht.tags;
- for(String tg:tags)
- {
- if(text.contains(tg))
- {
- groupmatch = true;
- break;
- }
- }
- if(groupmatch)
- {
- break;
- }
- }
- if(!groupmatch)
- {
- continue;
- }
- //
- ArrayList<String> fromusers = new ArrayList<String>();
- if(!tweetobj.isNull("retweeted_status"))
- {
- JSONObject rtstatus;
- try {
- rtstatus = tweetobj.getJSONObject("retweeted_status");
- if(rtstatus.isNull("user"))
- {
- JSONObject rtuserobj = rtstatus.getJSONObject("user");
- try{
- fromusers.add(rtuserobj.get("screen_name").toString());
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- else
- {
- //use the tweet text to retrieve the pattern "RT @username:"
- fromusers = GetRTUsers(text);
- }
- if(fromusers.isEmpty())
- {
- continue;
- }
-
- //identify the class values to be applied to all the nodes and
- //edges.
-// String prunedtext = TextUtils.RemoveTwitterElements(text);
-// Integer class_code = tweet_class_codes.get(prunedtext);
-// if(class_code==null)
-// {
-// class_code = tweet_class_counter;
-// tweet_class_codes.put(prunedtext, class_code); //set the unique id for this tweet
-// tweet_class_counter++;
-// }
- t.text = TextUtils.RemoveRTElements(text);
- if(!tweetobj.isNull("user"))
- {
- JSONObject userobj;
- try {
- userobj = tweetobj.getJSONObject("user");
- t.user = userobj.getString("screen_name").toLowerCase();
- } catch (JSONException ex) {
- Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
-// try {
-// t.pubdate = String.valueOf(tweetobj.get("timestamp"));
-// } catch (JSONException ex) {
-// Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
-// }
- t.catColor = hashtagarray[t.catID].color;
- //update the size of the from fromuser
- int cur_level = 0;
- for(int i=fromusers.size()-1;i>=0;i--)
- {
- String touser = "";
- if(i==0)
- {//if this is the last user in the retweet sequence then use the user of the tweet as the next link
- touser = t.user;
- }
- else
- { //if there are still fromuser in the retweet chain then use them as the next link
- touser = fromusers.get(i-1);
- }
- //don't add any selflinks
- if(fromusers.get(i).equals(touser))
- {
- continue;
- }
- NetworkNode fromuser = null;
- if(userconnections.containsKey(fromusers.get(i)))
- {
- //from node already exists simply add this new connection to it
- fromuser = userconnections.get(fromusers.get(i));
- }
- else
- {
- //the from user was not found. add the node
- fromuser = new NetworkNode();
- // fromuser.id = nodeid++;
- fromuser.username = fromusers.get(i);
- fromuser.tonodes = new ArrayList<ToNodeInfo>();
- fromuser.class_codes = new ArrayList<Integer>();
- fromuser.size = DEFAULT_NODE_SIZE;
- fromuser.level = cur_level;
- fromuser.data = new ArrayList<String>();
- fromuser.data.add(t.text);
- //fromuser.category = ;
- }
-// if(!fromuser.class_codes.contains(class_code))
-// {
-// //add the marker to from node if it does not have it already
-// fromuser.class_codes.add(class_code);
-// }
- //if to node is not in the list then create it
- NetworkNode tonode = null;
- if(!userconnections.containsKey(touser))
- {
- tonode = new NetworkNode();
- // System.out.println(touser+" "+nodeid);
- // tonode.id= nodeid++;
- tonode.username = touser;
- tonode.tonodes= new ArrayList<ToNodeInfo>();
- tonode.class_codes = new ArrayList<Integer>();
- tonode.catID = t.catID;
- tonode.catColor = t.catColor;
- tonode.size = DEFAULT_NODE_SIZE;
- tonode.data= new ArrayList<String>();
- tonode.data.add(t.text);
- tonode.level = cur_level+1;
- //add the classcode to the node if it doesn't already exist
-// if(!tonode.class_codes.contains(class_code))
-// {
-// tonode.class_codes.add(class_code);
-// }
- //add the touser info
- userconnections.put(touser, tonode);
- }
- else
- {
- tonode = userconnections.get(touser);
- tonode.data.add(t.text);
- if(tonode.level<cur_level+1)
- {
- tonode.level = cur_level;
- }
- //add the classcode to the node if it doesn't already exist
-// if(!tonode.class_codes.contains(class_code))
-// {
-// tonode.class_codes.add(class_code);
-// }
- }
- ToNodeInfo inf = new ToNodeInfo();
- inf.tonodeid = tonode.id;
- inf.text = t.text;
-// inf.date = t.pubdate;
-// inf.class_code = class_code;
- inf.tousername = touser;
- inf.catID = t.catID;
- inf.catColor = t.catColor;
- fromuser.tonodes.add(inf);
- //update from node size
- fromuser.size++;
- //add back updated fromuser
- userconnections.put(fromusers.get(i), fromuser);
- //update the level for next iteration
- cur_level++;
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }
- Set<String> keys = userconnections.keySet();
- ArrayList<NetworkNode> returnnodes = new ArrayList<NetworkNode>();
- //its +1 because nodes with size 0 are not going to be used to calculate the class
- int min = DEFAULT_NODE_SIZE+1;
- int max = DEFAULT_NODE_SIZE+1;
- for(String k:keys)
- {
- NetworkNode n = userconnections.get(k);
- int maxcat = GetMajorityTopicColor(n,hashtagarray);
- n.catID = maxcat;
- n.catColor = hashtagarray[maxcat].color;
- userconnections.put(k, n);
- //
-// if(n.size==0)
-// {//mark the node as a zero node
-// n.class_codes.add(-1);
-// }
-// else
-// {
- if(n.size>max)
- {
- max = n.size;
- }
- if(n.size<min)
- {
- min = n.size;
- }
-// }
- returnnodes.add(n);
- }
- //create node groups to assign unique colors to nodes in different Categories based upon the number of connections
- ArrayList<NetworkNode> nodes = ComputeGroupsSqrt(returnnodes, max, min, numNodeClasses);
- Collections.sort(nodes,Collections.reverseOrder(new NodeSizeComparator()));
- //select how many nodes to show.
- int nodes_to_visit = 0;
- if(nodes.size()>=num_nodes)
- {
- nodes_to_visit = num_nodes;
- }
- else
- {
- nodes_to_visit = nodes.size();
- }
-
- HashMap<String,NetworkNode> prunednodes = new HashMap<String,NetworkNode>();
- HashMap<String,Integer> nodeidlist = new HashMap<String,Integer>();
- int nodeid = 0; //node nodeid counter
- for(int k=0;k<nodes_to_visit;k++)
- {
- NetworkNode nd = nodes.get(k);
-// System.out.println("visiting node "+nd.username);
- nd.level = 0;
- HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections,nd,new HashMap<String,NetworkNode>());
- Set<String> names = rtnodes.keySet();
- for(String n:names)
- {
- if(!prunednodes.containsKey(n))
- {
- NetworkNode newnode = rtnodes.get(n);
- if(newnode.size>0)
- {
- prunednodes.put(n, newnode);
- nodeidlist.put(n, nodeid++);
- }
- }
- }
- }
-
- /** We now have all the nodes of the network. compute their ids sequentially
- * and assign them to the respective nodes. Simultaneously compact the nodes
- * of the network to remove all nodes which have not been retweeted and are
- * of size 0
- */
-
- Set<String> allnodes = prunednodes.keySet();
-// System.out.println(prunednodes.size());
- ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>();
-// HashMap<Integer,ArrayList<Integer>> conninfo = new HashMap<Integer,ArrayList<Integer>>();
- for(String n:allnodes)
- {
- NetworkNode nd = prunednodes.get(n);
- nd.id = nodeidlist.get(nd.username);
- ArrayList<Integer> connids = new ArrayList<Integer>();
-// ArrayList<ToNodeInfo> compact_To_nodes = new ArrayList<ToNodeInfo>();
- int counter = 0;
- for(ToNodeInfo tnf: nd.tonodes)
- {
- //user has never been retweeted. the chain terminates here, so remove it
- if(nodeidlist.containsKey(tnf.tousername))
- {
- tnf.tonodeid = nodeidlist.get(tnf.tousername);
- connids.add(tnf.tonodeid);
- nd.tonodes.set(counter, tnf);
- counter++;
- }
- }
- finalnodes.add(nd);
- //store the connections to compute the clusterids later
-// if(!conninfo.containsKey(nd.id))
-// {
-// conninfo.put(nd.id, connids);
-// }
- }
- //generate the clusterids
-// ArrayList<Integer>[] clusterids = (ArrayList<Integer>[])new ArrayList[allnodes.size()];
-// Set<Integer> idkeys = conninfo.keySet();
-// for(int id:idkeys)
-// {
-// for(int x:conninfo.get(id))
-// {
-// if(clusterids[x]==null)
-// {
-// ArrayList<Integer> toclusterid = new ArrayList<Integer>();
-// toclusterid.add(id);
-// clusterids[x] = toclusterid;
-// }
-// else
-// {
-// ArrayList<Integer> toclusterid = clusterids[x];
-// if(!toclusterid.contains(id))
-// {
-// toclusterid.add(id);
-// clusterids[x] = toclusterid;
-// }
-// }
-// }
-// }
- //now create the final node list with the clusterids
-// for(String n:allnodes)
-// {
-// NetworkNode nd = prunednodes.get(n);
-// ArrayList<Integer> cids = clusterids[nd.id];
-// if(cids!=null)
-// {
-// int size = cids.size();
-// nd.clusterID = new int[size+1];
-// int counter=0;
-// nd.clusterID[counter++] = nd.id;
-// for(int c:cids)
-// {
-// nd.clusterID[counter++] = c;
-// }
-// }
- //System.out.println(nd.class_codes.toString());
-// finalnodes.add(nd);
-// }
- Collections.sort(finalnodes,new NodeIDComparator());
- System.out.println(finalnodes.size());
- for(NetworkNode node:finalnodes)
- {
- System.out.println(node.id+" "+node.username+" "+node.level+" "+node.size+" "+node.catColor+node.data.get(0));
- }
- return GetD3Structure(finalnodes);
- }
-
- /**
- * Creates a D3 representation of the nodes, consisting of two JSONArray a set of nodes and a set of links between the nodes
- * @param finalnodes
- * @return
- */
- public JSONObject GetD3Structure(ArrayList<NetworkNode> finalnodes)
- {
- JSONObject alltweets = new JSONObject();
- try {
- JSONArray nodes = new JSONArray();
- JSONArray links = new JSONArray();
- for (NetworkNode node : finalnodes)
- {
- try {
- //create adjacencies
- JSONArray nodedata = new JSONArray();
- for (ToNodeInfo tnf : node.tonodes) {
- JSONObject jsadj = new JSONObject();
- jsadj.put("source", node.id);
- jsadj.put("target", tnf.tonodeid);
- //weight of the edge
- jsadj.put("value", 1);
- //class code is a unique id corresponding to the text
- jsadj.put("data", tnf.class_code);
- links.put(jsadj);
- //create a data object for the node
- JSONObject jsdata = new JSONObject();
- jsdata.put("tonodeid", tnf.tonodeid);
- jsdata.put("nodefrom", node.username);
- jsdata.put("nodeto", tnf.tousername);
- jsdata.put("tweet", tnf.text);
-// jsdata.put("pubtime", tnf.date);
- //class code for tweet to be used to filter
-// jsdata.put("classcode", tnf.class_code);
- nodedata.put(jsdata);
- }
- //add node
- JSONObject nd = new JSONObject();
- nd.put("name", node.username);
- nd.put("group", node.group);
- nd.put("id", node.id);
- nd.put("size", node.size);
- nd.put("catColor", node.catColor);
- nd.put("catID", node.catID);
- nd.put("data", nodedata);
- nd.put("level", node.level);
- //clusterids for the node
-// JSONArray cids = new JSONArray();
-// if (node.clusterID != null) {
-// for (int code : node.clusterID) {
-// cids.put(code);
-// }
-// } else {
-// cids.put(node.id);
-// }
-// nd.put("clusterids", cids);
- //classcodes for the node
-// JSONArray codes = new JSONArray();
-// for (int c : node.class_codes) {
-// codes.put(c);
-// }
-// nd.put("classcodes", codes);
- nodes.put(nd);
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
- alltweets.put("nodes", nodes);
- alltweets.put("links", links);
- } catch (JSONException ex) {
- Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
- }
- return alltweets;
- }
-
- /**
- * Recursively traverses the list of nodes to identify all nodes reachable from a starting node.
- * @param userconnections A map containing the usernames as keys and the node information as value
- * @param cur_node Node currently being processed.
- * @param newnodes A list of nodes which can be reached from the current node
- * @return A map of the usernames and the node information for all nodes reachable
- */
- public HashMap<String,NetworkNode> GetNextHopConnections(HashMap<String,NetworkNode> userconnections,NetworkNode cur_node,HashMap<String,NetworkNode> newnodes)
- {
- cur_node.level = cur_node.level+1;
- newnodes.put(cur_node.username,cur_node);
- for(int i=0;i<cur_node.tonodes.size();i++)
- {
- ToNodeInfo tnf = cur_node.tonodes.get(i);
- if(newnodes.containsKey(tnf.tousername))
- {
- continue;
- }
-
- HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections, userconnections.get(tnf.tousername),newnodes);
- newnodes = rtnodes;
- }
- return newnodes;
- }
-
- /**
- * Divides a list of nodes into groups using the square root binning
- * technique. If a node has size x and there are y groups in total. Then the
- * group of the node is computed as ceil((sqrt(x)/sqrt(max))*y), where max is
- * the size of the largest node.
- * @param nodes A list of nodes
- * @param max The maximum size of a node
- * @param min The minimum size of a node
- * @param noofclasses Number of classes into which the nodes must be classified
- * @return A list of nodes along with their class
- */
- public ArrayList<NetworkNode> ComputeGroupsSqrt(ArrayList<NetworkNode> nodes, int max, int min, int noofclasses)
- {
- ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>();
- for(int i=0;i<nodes.size();i++)
- {
- NetworkNode node = nodes.get(i);
- int color_index = 0;
- if(node.size>0)
- {
- color_index = (int) Math.ceil(((double)Math.sqrt(node.size)/Math.sqrt(max))*noofclasses)-1;
-// node.size = color_index*6;
- }
- node.group = color_index;
- finalnodes.add(node);
- }
- return finalnodes;
- }
-
-
- //DEBUG use only
- public static void main(String[] args)
- {
- try {
- CreateD3Network cdn = new CreateD3Network();
- JSONObject jobj = new JSONObject();
- JSONObject obj = new JSONObject();
- obj.put("color", "#800000");
- JSONArray ja = new JSONArray();
- ja.put("zuccotti");
- obj.put("hts", ja);
- jobj.put("Group 1", obj);
- obj = new JSONObject();
- obj.put("color", "#0FFF00");
- ja = new JSONArray();
- ja.put("#nypd");
- obj.put("hts", ja);
- jobj.put("Group 2", obj);
- String filename = "D:\\Twitter Data Analytics\\Data\\testows.json";
- JSONObject nodes = cdn.ConvertTweetsToDiffusionPath(filename,7, jobj,5);
- } catch (JSONException ex) {
- ex.printStackTrace();
- }
- }
-}
diff --git a/src/Chapter5/network/ExtractUserTagNetwork.java b/src/Chapter5/network/ExtractUserTagNetwork.java
deleted file mode 100644
index 43ae680..0000000
--- a/src/Chapter5/network/ExtractUserTagNetwork.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.network;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class ExtractUserTagNetwork
-{
-
- static final String DEF_INFILENAME = "ows.json";
-
- /**
- * Extracts a map of all the hashtags a user has used in his tweets resulting in a bipartite network. The frequency of each tag is also returned in the form of a map.
- * @param inFilename File containing a list of tweets as JSON objects
- * @return A map containing the users as keys and a map containing the hashtags they use along with their frequency.
- */
- public HashMap<String,HashMap<String,Integer>> ExtractUserHashtagNetwork(String inFilename)
- {
- HashMap<String,HashMap<String,Integer>> usertagmap = new HashMap<String,HashMap<String,Integer>>();
- BufferedReader br = null;
- try{
- br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try{
- JSONObject tweetobj = new JSONObject(temp);
- String text;
- String username;
- HashMap<String,Integer> tags = new HashMap<String,Integer>();
- if(!tweetobj.isNull("entities"))
- {
- JSONObject entities = tweetobj.getJSONObject("entities");
- JSONArray hashtags;
- try {
- hashtags = entities.getJSONArray("hashtags");
- for(int i=0;i<hashtags.length();i++)
- {
- JSONObject tag = hashtags.getJSONObject(i);
- String tg = tag.getString("text").toLowerCase();
- if(!tags.containsKey(tg))
- {
- tags.put(tg,1);
- }
- else
- {
- tags.put(tg, tags.get(tg)+1);
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- else
- if(!tweetobj.isNull("text"))
- {
- text = tweetobj.getString("text");
- tags = ExtractHashTags(text);
- }
- if(!tweetobj.isNull("user"))
- {
- JSONObject userobj = tweetobj.getJSONObject("user");
- username = "@"+userobj.getString("screen_name").toLowerCase();
- if(usertagmap.containsKey(username))
- {
- HashMap<String,Integer> usertags = usertagmap.get(username);
- Set<String> keys = tags.keySet();
- for(String k:keys)
- {
- if(usertags.containsKey(k))
- {
- usertags.put(k, usertags.get(k)+tags.get(k));
- }
- else
- {
- usertags.put(k, tags.get(k));
- }
- }
- usertagmap.put(username, usertags);
- }
- else
- {
- usertagmap.put(username, tags);
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ExtractUserTagNetwork.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return usertagmap;
- }
-
- /**
- * Extracts all the hashtags mentioned in a tweet and creates a map with the frequency of their occurrence.
- * @param text
- * @return A map containing the hashtags as keys and their frequency as value
- */
- public HashMap<String,Integer> ExtractHashTags(String text)
- {
- Pattern p = Pattern.compile("#[a-zA-Z0-9]+");
- Matcher m = p.matcher(text);
- HashMap<String,Integer> tags = new HashMap<String,Integer>();
- while(m.find())
- {
- String tag = text.substring(m.start(),m.end()).toLowerCase();
- if(!tags.containsKey(tag))
- {
- tags.put(tag,1);
- }
- else
- {
- tags.put(tag, tags.get(tag)+1);
- }
- }
- return tags;
- }
-
- public static void main(String[] args)
- {
- ExtractUserTagNetwork eutn = new ExtractUserTagNetwork();
-
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- HashMap<String, HashMap<String,Integer>> usertagmap = eutn.ExtractUserHashtagNetwork(infilename);
- Set<String> keys = usertagmap.keySet();
- for(String key:keys)
- {
- System.out.println(key);
- HashMap<String,Integer> tags = usertagmap.get(key);
- Set<String> tagkeys = tags.keySet();
- for(String tag:tagkeys)
- {
- System.out.println(tag+","+tags.get(tag));
- }
- }
- }
-}
diff --git a/src/Chapter5/support/DateInfo.java b/src/Chapter5/support/DateInfo.java
deleted file mode 100644
index 9a32d4c..0000000
--- a/src/Chapter5/support/DateInfo.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.support;
-
-import java.util.Date;
-import java.util.HashMap;
-
-public class DateInfo implements Comparable
-{
- public Date d;
- public HashMap<String,Integer> catcounts = new HashMap<String,Integer>();
-
- public int compareTo(Object o) {
- DateInfo temp = (DateInfo) o;
- if(temp.d.after(this.d))
- {
- return 1;
- }
- else
- if(temp.d.before(this.d))
- {
- return -1;
- }
- else
- {
- return 0;
- }
- }
-}
diff --git a/src/Chapter5/support/HashTagDS.java b/src/Chapter5/support/HashTagDS.java
deleted file mode 100644
index b338b6d..0000000
--- a/src/Chapter5/support/HashTagDS.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-/**
- *
- * @author shamanth
- */
-public class HashTagDS
-{
- public String groupname;
- public String[] tags;
- public String color;
-
-}
diff --git a/src/Chapter5/support/NetworkNode.java b/src/Chapter5/support/NetworkNode.java
deleted file mode 100644
index 4f662e8..0000000
--- a/src/Chapter5/support/NetworkNode.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package Chapter5.support;
-
-
-import java.util.ArrayList;
-
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-/**
- *
- * @author shamanth
- */
-public class NetworkNode
-{
- public int id;
- public String username;
- public int size;
- public String catColor;
- public int group;
-// public int[] clusterID;
- public int catID;
-// public double lat;
-// public double lng;
- public ArrayList<String> data;
- public int level;
- public ArrayList<Integer> class_codes;
- public ArrayList<ToNodeInfo> tonodes;
-
- public NetworkNode Copy()
- {
- NetworkNode tempnode = new NetworkNode();
- tempnode.catColor = this.catColor;
- tempnode.id = this.id;
- tempnode.username= this.username;
- tempnode.size = this.size;
- tempnode.group = this.group;
-// tempnode.clusterID = this.clusterID;
- tempnode.catID = this.catID;
-// tempnode.lat = this.lat;
-// tempnode.lng = this.lng;
- tempnode.data = this.data;
-// tempnode.level = this.level;
- tempnode.class_codes = this.class_codes;
- tempnode.tonodes = this.tonodes;
- return tempnode;
- }
-}
diff --git a/src/Chapter5/support/NodeIDComparator.java b/src/Chapter5/support/NodeIDComparator.java
deleted file mode 100644
index 0b41ae7..0000000
--- a/src/Chapter5/support/NodeIDComparator.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-import java.util.Comparator;
-
-/**
- *
- * @author shamanth
- */
-public class NodeIDComparator implements Comparator
-{
-
- public int compare(Object o1, Object o2) {
- int id1 = ((NetworkNode) o1).id;
- int id2 = ((NetworkNode) o2).id;
- if(id1>id2)
- {
- return 1;
- }
- else
- if(id1<id2)
- return -1;
- else
- return 0;
- }
-
-
-}
diff --git a/src/Chapter5/support/NodeSizeComparator.java b/src/Chapter5/support/NodeSizeComparator.java
deleted file mode 100644
index 23ecb4e..0000000
--- a/src/Chapter5/support/NodeSizeComparator.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-import java.util.Comparator;
-
-/**
- *
- * @author shamanth
- */
-public class NodeSizeComparator implements Comparator
-{
- public int compare(Object o1, Object o2)
- {
- int size1 = ((NetworkNode) o1).size;
- int size2 = ((NetworkNode) o2).size;
- if(size1>size2)
- {
- return 1;
- }
- if(size1<size2)
- return -1;
- else
- return 0;
- }
-
-}
diff --git a/src/Chapter5/support/ToNodeInfo.java b/src/Chapter5/support/ToNodeInfo.java
deleted file mode 100644
index 725a10a..0000000
--- a/src/Chapter5/support/ToNodeInfo.java
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-/**
- *
- * @author shamanth
- */
-public class ToNodeInfo
-{
- public int tonodeid;
- public String text;
- public String tousername;
- public String date;
- public int class_code;
- public int catID;
- public String catColor;
- //this is the default direction invert option. If the library adds nodes to the adjacency then that should be set to true in the client side
-// public boolean direction = false;
-}
diff --git a/src/Chapter5/support/Tweet.java b/src/Chapter5/support/Tweet.java
deleted file mode 100644
index be53166..0000000
--- a/src/Chapter5/support/Tweet.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-
-package Chapter5.support;
-
-/**
- *
- * @author shamanth
- */
-public class Tweet {
- public String text;
- public long id;
- public double lat;
- public double lng;
- public String pubdate;
- public String user;
- public int catID;
- public String catColor;
-}
diff --git a/src/Chapter5/text/EventSummaryExtractor.java b/src/Chapter5/text/EventSummaryExtractor.java
deleted file mode 100644
index e76f42e..0000000
--- a/src/Chapter5/text/EventSummaryExtractor.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.text;
-
-import Chapter5.support.DateInfo;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class EventSummaryExtractor
-{
-
- final String DEF_INFILENAME = "ows.json";
- HashMap<String,ArrayList<String>> CATEGORIES = new HashMap<String,ArrayList<String>>();
- SimpleDateFormat twittersdm = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy");
- SimpleDateFormat dayhoursdm = new SimpleDateFormat("yyyy-MM-dd:HH");
-// SimpleDateFormat daysdm = new SimpleDateFormat("MM/dd/yyyy");
- SimpleDateFormat hoursdm = new SimpleDateFormat("HH");
-
- /**
- *
- */
- public void InitializeCategories()
- {
- ArrayList<String> people = new ArrayList<String>();
- people.add("protesters");
- people.add("people");
- CATEGORIES.put("People",people);
- ArrayList<String> police = new ArrayList<String>();
- police.add("police");
- police.add("cops");
- police.add("nypd");
- police.add("raid");
- CATEGORIES.put("Police",police);
- ArrayList<String> media = new ArrayList<String>();
- media.add("press");
- media.add("news");
- media.add("media");
- CATEGORIES.put("Media",media);
- ArrayList<String> city = new ArrayList<String>();
- city.add("nyc");
- city.add("zucotti");
- city.add("park");
- CATEGORIES.put("Location",city);
- ArrayList<String> judiciary = new ArrayList<String>();
- judiciary.add("judge");
- judiciary.add("eviction");
- judiciary.add("order");
- judiciary.add("court");
- CATEGORIES.put("Judiciary", judiciary);
- }
-
- /**
- *
- * @param filename
- * @return
- */
- public JSONObject ExtractCategoryTrends(String filename)
- {
- JSONObject result = new JSONObject();
- try {
- BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
- String temp = "";
- Set<String> catkeys = CATEGORIES.keySet();
- HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
- while((temp = br.readLine())!=null)
- {
- Date d = new Date();
- try {
- JSONObject jobj = new JSONObject(temp);
- //Published time
- if(!jobj.isNull("created_at"))
- {
- String time = "";
- try {
- time = jobj.getString("created_at");
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- if(time.isEmpty())
- {
- continue;
- }
- else
- {
- try {
- d = twittersdm.parse(time);
- } catch (ParseException ex) {
- continue;
- }
- }
- }
- else
- if(!jobj.isNull("timestamp"))
- {
- long time = new Date().getTime();
- try{
- time = jobj.getLong("timestamp");
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- d = new Date();
- d.setTime(time);
- }
- String datestr = dayhoursdm.format(d);
- String text = jobj.getString("text").toLowerCase();
-// System.out.println(text);
- for(String key:catkeys)
- {
- ArrayList<String> words = CATEGORIES.get(key);
- for(String word:words)
- {
- if(text.contains(word))
- {
- HashMap<String,Integer> categorycount = new HashMap<String,Integer>();
- if(datecount.containsKey(datestr))
- {
- categorycount = datecount.get(datestr);
- }
- if(categorycount.containsKey(key))
- {
- categorycount.put(key, categorycount.get(key)+1);
- }
- else
- {
- categorycount.put(key, 1);
- }
- //update the categorycount for the specific date
- datecount.put(datestr, categorycount);
- break;
- }
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- //sort the dates
- Set<String> datekeys = datecount.keySet();
- ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
- for(String date:datekeys)
- {
- Date d = null;
- try {
- d = dayhoursdm.parse(date);
- } catch (ParseException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- if(d!=null)
- {
- DateInfo info = new DateInfo();
- info.d = d;
- info.catcounts = datecount.get(date);
- dinfos.add(info);
- }
- }
- Collections.sort(dinfos, Collections.reverseOrder());
- try {
- result.put("axisxstep", dinfos.size()-1);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("axisystep", CATEGORIES.size()-1);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- JSONArray xcoordinates = new JSONArray();
- JSONArray ycoordinates = new JSONArray();
- //now add the data and the axis labels
- JSONArray axisxlabels = new JSONArray();
- JSONArray axisylabels = new JSONArray();
- JSONArray data = new JSONArray();
- for(String key:catkeys)
- {
- axisylabels.put(key);
- }
- //counters to mark the indices of the values added to data field. i is the x coordinate and j is the y coordinate
- int i=0,j=0;
-
- for(DateInfo date:dinfos)
- {
- String strdate = hoursdm.format(date.d);
- axisxlabels.put(strdate);
- HashMap<String,Integer> catcounts = date.catcounts;
- for(String key:catkeys)
- {
- xcoordinates.put(j);
- ycoordinates.put(i++);
- if(catcounts.containsKey(key))
- {
- data.put(catcounts.get(key));
- }
- else
- {
- data.put(0);
- }
- }
- //reset the x coordinate as we move to the next y item
- i=0;
- j++;
- }
- try {
- result.put("xcoordinates", xcoordinates);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("ycoordinates", ycoordinates);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("axisxlabels", axisxlabels);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("axisylabels", axisylabels);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- try {
- result.put("data", data);
- } catch (JSONException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- EventSummaryExtractor ese = new EventSummaryExtractor();
- String infilename = ese.DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- ese.InitializeCategories();
- System.out.println(ese.ExtractCategoryTrends(infilename).toString());
- }
-}
diff --git a/src/Chapter5/text/ExtractTopKeywords.java b/src/Chapter5/text/ExtractTopKeywords.java
deleted file mode 100644
index 8ab412a..0000000
--- a/src/Chapter5/text/ExtractTopKeywords.java
+++ /dev/null
@@ -1,151 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.text;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-import utils.Tags;
-import utils.TextUtils;
-
-public class ExtractTopKeywords
-{
-
- static final String DEF_INFILENAME = "ows.json";
- static final int DEF_K = 60;
-
- /**
- * Extracts the most frequently occurring keywords from the tweets by processing them sequentially. Stopwords are ignored.
- * @param inFilename File containing a list of tweets as JSON objects
- * @param K Count of the top keywords to return
- * @param ignoreHashtags If true, hashtags are not considered while counting the most frequent keywords
- * @param ignoreUsernames If true, usernames are not considered while counting the most frequent keywords
- * @param tu TextUtils object which handles the stopwords
- * @return a JSONArray containing an array of JSONObjects. Each object contains two elements "text" and "size" referring to the word and it's frequency
- */
- public JSONArray GetTopKeywords(String inFilename, int K, boolean ignoreHashtags, boolean ignoreUsernames, TextUtils tu)
- {
- HashMap<String, Integer> words = new HashMap<String,Integer>();
- BufferedReader br = null;
- try{
- br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try{
- JSONObject tweetobj = new JSONObject(temp);
- if(!tweetobj.isNull("text"))
- {
- String text = tweetobj.getString("text");
- //System.out.println(text);
- text = text.toLowerCase().replaceAll("\\s+", " ");
- /** Step 1: Tokenize tweets into individual words. and count their frequency in the corpus
- * Remove stop words and special characters. Ignore user names and hashtags if the user chooses to.
- */
- HashMap<String,Integer> tokens = tu.TokenizeText(text,ignoreHashtags,ignoreUsernames);
- Set<String> keys = tokens.keySet();
- for(String key:keys)
- {
- if(words.containsKey(key))
- {
- words.put(key, words.get(key)+tokens.get(key));
- }
- else
- {
- words.put(key, tokens.get(key));
- }
- }
- }
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ExtractTopKeywords.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- Set<String> keys = words.keySet();
- ArrayList<Tags> tags = new ArrayList<Tags>();
- for(String key:keys)
- {
- Tags tag = new Tags();
- tag.setKey(key);
- tag.setValue(words.get(key));
- tags.add(tag);
- }
- // Step 2: Sort the words in descending order of frequency
- Collections.sort(tags, Collections.reverseOrder());
- JSONArray cloudwords = new JSONArray();
- int numwords = K;
- if(tags.size()<numwords)
- {
- numwords = tags.size();
- }
- for(int i=0;i<numwords;i++)
- {
- JSONObject wordfreq = new JSONObject();
- Tags tag = tags.get(i);
- try{
- wordfreq.put("text", tag.getKey());
- wordfreq.put("size",tag.getValue());
- cloudwords.put(wordfreq);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- return cloudwords;
- }
-
- public static void main(String[] args)
- {
- ExtractTopKeywords etk = new ExtractTopKeywords();
-
- //Initialize the TextUtils class which handles all the processing of text.
- TextUtils tu = new TextUtils();
- tu.LoadStopWords("C:/tweettracker/stopwords.txt");
- String infilename = DEF_INFILENAME;
- int K = DEF_K;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- if(args.length>=2&&!args[1].isEmpty())
- {
- try{
- K = Integer.parseInt(args[1]);
- }catch(NumberFormatException ex)
- {
- ex.printStackTrace();
- }
- }
- }
- System.out.println(etk.GetTopKeywords(infilename, K, false,true,tu));
- }
-
-}
diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java
deleted file mode 100644
index 2df814f..0000000
--- a/src/Chapter5/trends/ControlChartExample.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class ControlChartExample
-{
- static final String DEF_INFILENAME = "ows.json";
- static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
-
- public JSONArray GenerateDataTrend(String inFilename)
- {
- BufferedReader br = null;
- JSONArray result = new JSONArray();
- HashMap<String,Integer> datecount = new HashMap<String,Integer>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- if(datecount.containsKey(strdate))
- {
- datecount.put(strdate, datecount.get(strdate)+1);
- }
- else
- {
- datecount.put(strdate, 1);
- }
- } catch (JSONException ex) {
- Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- DateInfo dinfo = new DateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.count = datecount.get(key);
- dinfos.add(dinfo);
- }
- double mean = this.GetMean(dinfos);
- double stddev = this.GetStandardDev(dinfos, mean);
- Collections.sort(dinfos);
- //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1
- for(DateInfo dinfo:dinfos)
- {
- try{
- JSONObject jobj = new JSONObject();
- jobj.put("date", SDM.format(dinfo.d));
- jobj.put("count", (dinfo.count-mean)/stddev);
- jobj.put("mean", 0);
- jobj.put("stdev+3", 3);
- jobj.put("stdev-3", -3);
- result.put(jobj);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public double GetStandardDev(ArrayList<DateInfo> dateinfos,double mean)
- {
- double intsum = 0;
- int numperiods = dateinfos.size();
- for(DateInfo dinfo:dateinfos)
- {
- intsum+=Math.pow((dinfo.count - mean),2);
- }
-// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size()));
- return Math.sqrt((double)intsum/numperiods);
- }
-
- public double GetMean(ArrayList<DateInfo> dateinfos)
- {
- int numperiods = dateinfos.size();
- int sum = 0;
- for(DateInfo dinfo:dateinfos)
- {
- sum +=dinfo.count;
- }
-// System.out.println((double)sum/numPeriods);
- return ((double)sum/numperiods);
- }
-
- public static void main(String[] args)
- {
- ControlChartExample cce = new ControlChartExample();
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- System.out.println(cce.GenerateDataTrend(infilename));
- }
-
-}
diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java
deleted file mode 100644
index 209f4a3..0000000
--- a/src/Chapter5/trends/DateInfo.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.util.Date;
-
-public class DateInfo implements Comparable
-{
- public Date d;
- public int count;
-
- public int compareTo(Object o) {
- DateInfo temp = (DateInfo) o;
- if(temp.d.after(this.d))
- {
- return -1;
- }
- else
- if(temp.d.before(this.d))
- {
- return 1;
- }
- else
- {
- return 0;
- }
- }
-}
diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java
deleted file mode 100644
index dad7f27..0000000
--- a/src/Chapter5/trends/ExtractDatasetTrend.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class ExtractDatasetTrend
-{
- static final String DEF_INFILENAME = "ows.json";
- // Date pattern used to count the volume of tweets
- final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
-
- public JSONArray GenerateDataTrend(String inFilename)
- {
- BufferedReader br = null;
- JSONArray result = new JSONArray();
- HashMap<String,Integer> datecount = new HashMap<String,Integer>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- if(datecount.containsKey(strdate))
- {
- datecount.put(strdate, datecount.get(strdate)+1);
- }
- else
- {
- datecount.put(strdate, 1);
- }
- } catch (JSONException ex) {
- Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- /** DateInfo consists of a date string and the corresponding count.
- * It also implements a Comparator for sorting by date
- */
- ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- DateInfo dinfo = new DateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.count = datecount.get(key);
- dinfos.add(dinfo);
- }
- Collections.sort(dinfos);
- // Format and return the date string and the corresponding count
- for(DateInfo dinfo:dinfos)
- {
- try{
- JSONObject jobj = new JSONObject();
- jobj.put("date", SDM.format(dinfo.d));
- jobj.put("count", dinfo.count);
- result.put(jobj);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- ExtractDatasetTrend edt = new ExtractDatasetTrend();
-
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- }
- System.out.println(edt.GenerateDataTrend(infilename));
- }
-
-}
diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java
deleted file mode 100644
index 4a0164b..0000000
--- a/src/Chapter5/trends/SparkLineExample.java
+++ /dev/null
@@ -1,163 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class SparkLineExample
-{
- static final String DEF_INFILENAME = "ows.json";
- static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH");
-
- public JSONObject GenerateDataTrend(String inFilename, ArrayList<String> keywords)
- {
- BufferedReader br = null;
- JSONObject result = new JSONObject();
- HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- String text = jobj.getString("text").toLowerCase();
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- for(String word:keywords)
- {
- if(text.contains(word))
- {
- HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
- if(datecount.containsKey(strdate))
- {
- wordcount = datecount.get(strdate);
- }
- if(wordcount.containsKey(word))
- {
- wordcount.put(word, wordcount.get(word)+1);
- }
- else
- {
- wordcount.put(word, 1);
- }
- //update the wordcount for the specific date
- datecount.put(strdate, wordcount);
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- //sort the dates
- ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- TCDateInfo dinfo = new TCDateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.wordcount = datecount.get(key);
- dinfos.add(dinfo);
- }
- Collections.sort(dinfos);
- JSONArray[] tseriesvals = new JSONArray[keywords.size()];
- for(int i=0;i<tseriesvals.length;i++)
- {
- tseriesvals[i] = new JSONArray();
- }
- //prepare the output
- for(TCDateInfo date:dinfos)
- {
- HashMap<String,Integer> wordcount = date.wordcount;
- int counter=0;
- for(String word:keywords)
- {
- if(wordcount.containsKey(word))
- {
- tseriesvals[counter].put(wordcount.get(word));
- }
- else
- {
- tseriesvals[counter].put(0);
- }
- counter++;
- }
- }
- int counter=0;
- for(String word:keywords)
- {
- try {
- result.put(word, tseriesvals[counter]);
- } catch (JSONException ex) {
- Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- counter++;
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- SparkLineExample sle = new SparkLineExample();
- ArrayList<String> words = new ArrayList<String>();
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- for(int i=1;i<args.length;i++)
- {
- if(args[i]!=null&&!args[i].isEmpty())
- {
- words.add(args[i]);
- }
- }
- }
- if(words.isEmpty())
- {
- words.add("#nypd");
- words.add("#ows");
- }
- System.out.println(sle.GenerateDataTrend(infilename,words));
- }
-
-}
diff --git a/src/Chapter5/trends/TCDateInfo.java b/src/Chapter5/trends/TCDateInfo.java
deleted file mode 100644
index 88450e9..0000000
--- a/src/Chapter5/trends/TCDateInfo.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.util.Date;
-import java.util.HashMap;
-
-public class TCDateInfo implements Comparable
-{
- public Date d;
- public HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
-
- public int compareTo(Object o) {
- TCDateInfo temp = (TCDateInfo) o;
- if(temp.d.after(this.d))
- {
- return -1;
- }
- else
- if(temp.d.before(this.d))
- {
- return 1;
- }
- else
- {
- return 0;
- }
- }
-
-}
diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java
deleted file mode 100644
index 20991cd..0000000
--- a/src/Chapter5/trends/TrendComparisonExample.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
- * @author shamanth
- */
-package Chapter5.trends;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.json.JSONArray;
-import org.json.JSONException;
-import org.json.JSONObject;
-
-public class TrendComparisonExample
-{
- static final String DEF_INFILENAME = "ows.json";
- static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
-
- public JSONArray GenerateDataTrend(String inFilename, ArrayList<String> keywords)
- {
- BufferedReader br = null;
- JSONArray result = new JSONArray();
- HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
- try{
- br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
- String temp = "";
- while((temp = br.readLine())!=null)
- {
- try {
- JSONObject jobj = new JSONObject(temp);
- String text = jobj.getString("text").toLowerCase();
- long timestamp = jobj.getLong("timestamp");
- Date d = new Date(timestamp);
- String strdate = SDM.format(d);
- for(String word:keywords)
- {
- if(text.contains(word))
- {
- HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
- if(datecount.containsKey(strdate))
- {
- wordcount = datecount.get(strdate);
- }
- if(wordcount.containsKey(word))
- {
- wordcount.put(word, wordcount.get(word)+1);
- }
- else
- {
- wordcount.put(word, 1);
- }
- //update the wordcount for the specific date
- datecount.put(strdate, wordcount);
- }
- }
- } catch (JSONException ex) {
- Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- //sort the dates
- ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>();
- Set<String> keys = datecount.keySet();
- for(String key:keys)
- {
- TCDateInfo dinfo = new TCDateInfo();
- try {
- dinfo.d = SDM.parse(key);
- } catch (ParseException ex) {
- ex.printStackTrace();
- continue;
- }
- dinfo.wordcount = datecount.get(key);
- dinfos.add(dinfo);
- }
- Collections.sort(dinfos);
- //prepare the output
- for(TCDateInfo date:dinfos)
- {
- JSONObject item = new JSONObject();
- String strdate = SDM.format(date.d);
- try{
- item.put("date",strdate);
- HashMap<String,Integer> wordcount = date.wordcount;
- for(String word:keywords)
- {
- if(wordcount.containsKey(word))
- {
- item.put(word, wordcount.get(word));
- }
- else
- {
- item.put(word, 0);
- }
- }
- result.put(item);
- }catch(JSONException ex)
- {
- ex.printStackTrace();
- }
- }
- }catch(IOException ex)
- {
- ex.printStackTrace();
- }finally{
- try {
- br.close();
- } catch (IOException ex) {
- Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex);
- }
- }
- return result;
- }
-
- public static void main(String[] args)
- {
- TrendComparisonExample tce = new TrendComparisonExample();
- ArrayList<String> words = new ArrayList<String>();
- String infilename = DEF_INFILENAME;
- if(args!=null)
- {
- if(args.length>=1&&!args[0].isEmpty())
- {
- File fl = new File(args[0]);
- if(fl.exists())
- {
- infilename = args[0];
- }
- }
- for(int i=1;i<args.length;i++)
- {
- if(args[i]!=null&&!args[i].isEmpty())
- {
- words.add(args[i]);
- }
- }
- }
- if(words.isEmpty())
- {
- words.add("#nypd");
- words.add("#ows");
- }
- System.out.println(tce.GenerateDataTrend(infilename,words));
- }
-
-}