diff options
Diffstat (limited to 'src/Chapter5/trends')
-rw-r--r-- | src/Chapter5/trends/ControlChartExample.java | 144 | ||||
-rw-r--r-- | src/Chapter5/trends/DateInfo.java | 29 | ||||
-rw-r--r-- | src/Chapter5/trends/ExtractDatasetTrend.java | 120 | ||||
-rw-r--r-- | src/Chapter5/trends/SparkLineExample.java | 163 | ||||
-rw-r--r-- | src/Chapter5/trends/TCDateInfo.java | 31 | ||||
-rw-r--r-- | src/Chapter5/trends/TrendComparisonExample.java | 155 |
6 files changed, 0 insertions, 642 deletions
diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java deleted file mode 100644 index 2df814f..0000000 --- a/src/Chapter5/trends/ControlChartExample.java +++ /dev/null @@ -1,144 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ControlChartExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap<String,Integer> datecount = new HashMap<String,Integer>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - if(datecount.containsKey(strdate)) - { - datecount.put(strdate, datecount.get(strdate)+1); - } - else - { - datecount.put(strdate, 1); - } - } catch (JSONException ex) { - Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - DateInfo dinfo = new DateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.count = datecount.get(key); - dinfos.add(dinfo); - } - double mean = this.GetMean(dinfos); - double stddev = this.GetStandardDev(dinfos, mean); - Collections.sort(dinfos); - //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1 - for(DateInfo dinfo:dinfos) - { - try{ - JSONObject jobj = new JSONObject(); - jobj.put("date", SDM.format(dinfo.d)); - jobj.put("count", (dinfo.count-mean)/stddev); - jobj.put("mean", 0); - jobj.put("stdev+3", 3); - jobj.put("stdev-3", -3); - result.put(jobj); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public double GetStandardDev(ArrayList<DateInfo> dateinfos,double mean) - { - double intsum = 0; - int numperiods = dateinfos.size(); - for(DateInfo dinfo:dateinfos) - { - intsum+=Math.pow((dinfo.count - mean),2); - } -// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size())); - return Math.sqrt((double)intsum/numperiods); - } - - public double GetMean(ArrayList<DateInfo> dateinfos) - { - int numperiods = dateinfos.size(); - int sum = 0; - for(DateInfo dinfo:dateinfos) - { - sum +=dinfo.count; - } -// System.out.println((double)sum/numPeriods); - return ((double)sum/numperiods); - } - - public static void main(String[] args) - { - ControlChartExample cce = new ControlChartExample(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - System.out.println(cce.GenerateDataTrend(infilename)); - } - -} diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java deleted file mode 100644 index 209f4a3..0000000 --- a/src/Chapter5/trends/DateInfo.java +++ /dev/null @@ -1,29 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.util.Date; - -public class DateInfo implements Comparable -{ - public Date d; - public int count; - - public int compareTo(Object o) { - DateInfo temp = (DateInfo) o; - if(temp.d.after(this.d)) - { - return -1; - } - else - if(temp.d.before(this.d)) - { - return 1; - } - else - { - return 0; - } - } -} diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java deleted file mode 100644 index dad7f27..0000000 --- a/src/Chapter5/trends/ExtractDatasetTrend.java +++ /dev/null @@ -1,120 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class ExtractDatasetTrend -{ - static final String DEF_INFILENAME = "ows.json"; - // Date pattern used to count the volume of tweets - final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap<String,Integer> datecount = new HashMap<String,Integer>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - if(datecount.containsKey(strdate)) - { - datecount.put(strdate, datecount.get(strdate)+1); - } - else - { - datecount.put(strdate, 1); - } - } catch (JSONException ex) { - Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); - } - } - /** DateInfo consists of a date string and the corresponding count. - * It also implements a Comparator for sorting by date - */ - ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - DateInfo dinfo = new DateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.count = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - // Format and return the date string and the corresponding count - for(DateInfo dinfo:dinfos) - { - try{ - JSONObject jobj = new JSONObject(); - jobj.put("date", SDM.format(dinfo.d)); - jobj.put("count", dinfo.count); - result.put(jobj); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - ExtractDatasetTrend edt = new ExtractDatasetTrend(); - - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - } - System.out.println(edt.GenerateDataTrend(infilename)); - } - -} diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java deleted file mode 100644 index 4a0164b..0000000 --- a/src/Chapter5/trends/SparkLineExample.java +++ /dev/null @@ -1,163 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class SparkLineExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH"); - - public JSONObject GenerateDataTrend(String inFilename, ArrayList<String> keywords) - { - BufferedReader br = null; - JSONObject result = new JSONObject(); - HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - String text = jobj.getString("text").toLowerCase(); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - for(String word:keywords) - { - if(text.contains(word)) - { - HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); - if(datecount.containsKey(strdate)) - { - wordcount = datecount.get(strdate); - } - if(wordcount.containsKey(word)) - { - wordcount.put(word, wordcount.get(word)+1); - } - else - { - wordcount.put(word, 1); - } - //update the wordcount for the specific date - datecount.put(strdate, wordcount); - } - } - } catch (JSONException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - TCDateInfo dinfo = new TCDateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.wordcount = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - JSONArray[] tseriesvals = new JSONArray[keywords.size()]; - for(int i=0;i<tseriesvals.length;i++) - { - tseriesvals[i] = new JSONArray(); - } - //prepare the output - for(TCDateInfo date:dinfos) - { - HashMap<String,Integer> wordcount = date.wordcount; - int counter=0; - for(String word:keywords) - { - if(wordcount.containsKey(word)) - { - tseriesvals[counter].put(wordcount.get(word)); - } - else - { - tseriesvals[counter].put(0); - } - counter++; - } - } - int counter=0; - for(String word:keywords) - { - try { - result.put(word, tseriesvals[counter]); - } catch (JSONException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - counter++; - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - SparkLineExample sle = new SparkLineExample(); - ArrayList<String> words = new ArrayList<String>(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - for(int i=1;i<args.length;i++) - { - if(args[i]!=null&&!args[i].isEmpty()) - { - words.add(args[i]); - } - } - } - if(words.isEmpty()) - { - words.add("#nypd"); - words.add("#ows"); - } - System.out.println(sle.GenerateDataTrend(infilename,words)); - } - -} diff --git a/src/Chapter5/trends/TCDateInfo.java b/src/Chapter5/trends/TCDateInfo.java deleted file mode 100644 index 88450e9..0000000 --- a/src/Chapter5/trends/TCDateInfo.java +++ /dev/null @@ -1,31 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.util.Date; -import java.util.HashMap; - -public class TCDateInfo implements Comparable -{ - public Date d; - public HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); - - public int compareTo(Object o) { - TCDateInfo temp = (TCDateInfo) o; - if(temp.d.after(this.d)) - { - return -1; - } - else - if(temp.d.before(this.d)) - { - return 1; - } - else - { - return 0; - } - } - -} diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java deleted file mode 100644 index 20991cd..0000000 --- a/src/Chapter5/trends/TrendComparisonExample.java +++ /dev/null @@ -1,155 +0,0 @@ -/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University - * @author shamanth - */ -package Chapter5.trends; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStreamReader; -import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Date; -import java.util.HashMap; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class TrendComparisonExample -{ - static final String DEF_INFILENAME = "ows.json"; - static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); - - public JSONArray GenerateDataTrend(String inFilename, ArrayList<String> keywords) - { - BufferedReader br = null; - JSONArray result = new JSONArray(); - HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); - try{ - br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); - String temp = ""; - while((temp = br.readLine())!=null) - { - try { - JSONObject jobj = new JSONObject(temp); - String text = jobj.getString("text").toLowerCase(); - long timestamp = jobj.getLong("timestamp"); - Date d = new Date(timestamp); - String strdate = SDM.format(d); - for(String word:keywords) - { - if(text.contains(word)) - { - HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); - if(datecount.containsKey(strdate)) - { - wordcount = datecount.get(strdate); - } - if(wordcount.containsKey(word)) - { - wordcount.put(word, wordcount.get(word)+1); - } - else - { - wordcount.put(word, 1); - } - //update the wordcount for the specific date - datecount.put(strdate, wordcount); - } - } - } catch (JSONException ex) { - Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - //sort the dates - ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>(); - Set<String> keys = datecount.keySet(); - for(String key:keys) - { - TCDateInfo dinfo = new TCDateInfo(); - try { - dinfo.d = SDM.parse(key); - } catch (ParseException ex) { - ex.printStackTrace(); - continue; - } - dinfo.wordcount = datecount.get(key); - dinfos.add(dinfo); - } - Collections.sort(dinfos); - //prepare the output - for(TCDateInfo date:dinfos) - { - JSONObject item = new JSONObject(); - String strdate = SDM.format(date.d); - try{ - item.put("date",strdate); - HashMap<String,Integer> wordcount = date.wordcount; - for(String word:keywords) - { - if(wordcount.containsKey(word)) - { - item.put(word, wordcount.get(word)); - } - else - { - item.put(word, 0); - } - } - result.put(item); - }catch(JSONException ex) - { - ex.printStackTrace(); - } - } - }catch(IOException ex) - { - ex.printStackTrace(); - }finally{ - try { - br.close(); - } catch (IOException ex) { - Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); - } - } - return result; - } - - public static void main(String[] args) - { - TrendComparisonExample tce = new TrendComparisonExample(); - ArrayList<String> words = new ArrayList<String>(); - String infilename = DEF_INFILENAME; - if(args!=null) - { - if(args.length>=1&&!args[0].isEmpty()) - { - File fl = new File(args[0]); - if(fl.exists()) - { - infilename = args[0]; - } - } - for(int i=1;i<args.length;i++) - { - if(args[i]!=null&&!args[i].isEmpty()) - { - words.add(args[i]); - } - } - } - if(words.isEmpty()) - { - words.add("#nypd"); - words.add("#ows"); - } - System.out.println(tce.GenerateDataTrend(infilename,words)); - } - -} |