summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore12
-rw-r--r--build.xml73
-rw-r--r--lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jarbin0 -> 22970 bytes
-rw-r--r--lib/collections-generic-4.01.jarbin0 -> 531557 bytes
-rw-r--r--lib/colt-1.2.0.jarbin0 -> 581945 bytes
-rw-r--r--lib/commons-codec-1.7.jarbin0 -> 259600 bytes
-rw-r--r--lib/commons-httpclient-3.1_1.jarbin0 -> 305001 bytes
-rw-r--r--lib/commons-lang-2.6.jarbin0 -> 284220 bytes
-rw-r--r--lib/commons-logging-1.1.1.jarbin0 -> 60841 bytes
-rw-r--r--lib/concurrent-1.3.4.jarbin0 -> 189284 bytes
-rw-r--r--lib/gson-2.2.4.jarbin0 -> 190418 bytes
-rw-r--r--lib/httpclient-4.2.1.jarbin0 -> 427022 bytes
-rw-r--r--lib/httpcore-4.2.1.jarbin0 -> 223374 bytes
-rw-r--r--lib/j3d-core-1.3.1.jarbin0 -> 2513498 bytes
-rw-r--r--lib/jfig-1.5.2.jarbin0 -> 48767 bytes
-rw-r--r--lib/json.jarbin0 -> 93396 bytes
-rw-r--r--lib/jung-3d-2.0.1.jarbin0 -> 73271 bytes
-rw-r--r--lib/jung-3d-demos-2.0.1.jarbin0 -> 3966 bytes
-rw-r--r--lib/jung-algorithms-2.0.1.jarbin0 -> 233113 bytes
-rw-r--r--lib/jung-api-2.0.1.jarbin0 -> 40975 bytes
-rw-r--r--lib/jung-graph-impl-2.0.1.jarbin0 -> 62329 bytes
-rw-r--r--lib/jung-io-2.0.1.jarbin0 -> 79372 bytes
-rw-r--r--lib/jung-jai-2.0.1.jarbin0 -> 20440 bytes
-rw-r--r--lib/jung-jai-samples-2.0.1.jarbin0 -> 46790 bytes
-rw-r--r--lib/jung-samples-2.0.1.jarbin0 -> 811841 bytes
-rw-r--r--lib/jung-visualization-2.0.1.jarbin0 -> 324398 bytes
-rw-r--r--lib/junit/junit-3.8.2-api.zipbin0 -> 72555 bytes
-rw-r--r--lib/junit/junit-3.8.2.jarbin0 -> 118932 bytes
-rw-r--r--lib/junit_4/junit-4.5-api.zipbin0 -> 184067 bytes
-rw-r--r--lib/junit_4/junit-4.5-src.jarbin0 -> 109014 bytes
-rw-r--r--lib/junit_4/junit-4.5.jarbin0 -> 196787 bytes
-rw-r--r--lib/log4j-1.2.15.jarbin0 -> 391834 bytes
-rw-r--r--lib/mallet-deps.jarbin0 -> 2644050 bytes
-rw-r--r--lib/mallet.jarbin0 -> 2125173 bytes
-rw-r--r--lib/nblibraries.properties14
-rw-r--r--lib/signpost-commonshttp4-1.2.1.2.jarbin0 -> 6512 bytes
-rw-r--r--lib/signpost-core-1.2.1.2.jarbin0 -> 45277 bytes
-rw-r--r--lib/stax-api-1.0.1.jarbin0 -> 26514 bytes
-rw-r--r--lib/vecmath-1.3.1.jarbin0 -> 289881 bytes
-rw-r--r--lib/wstx-asl-3.2.6.jarbin0 -> 520389 bytes
-rw-r--r--manifest.mf3
-rw-r--r--nbproject/build-impl.xml1415
-rw-r--r--nbproject/genfiles.properties8
-rw-r--r--nbproject/project.properties135
-rw-r--r--nbproject/project.xml16
-rw-r--r--src/Chapter2/Location/LocationTranslationExample.java124
-rw-r--r--src/Chapter2/openauthentication/OAuthExample.java79
-rw-r--r--src/Chapter2/restapi/RESTApiExample.java676
-rw-r--r--src/Chapter2/restapi/RESTSearchExample.java311
-rw-r--r--src/Chapter2/streamingapi/StreamingApiExample.java372
-rw-r--r--src/Chapter2/support/APIType.java12
-rw-r--r--src/Chapter2/support/InfoType.java12
-rw-r--r--src/Chapter2/support/Location.java28
-rw-r--r--src/Chapter2/support/OAuthTokenSecret.java38
-rw-r--r--src/Chapter4/GraphElements/RetweetEdge.java53
-rw-r--r--src/Chapter4/GraphElements/UserNode.java34
-rw-r--r--src/Chapter4/centrality/examples/BetweennessCentralityExample.java31
-rw-r--r--src/Chapter4/centrality/examples/EigenvectorCentralityExample.java36
-rw-r--r--src/Chapter4/centrality/examples/InDegreeCentralityExample.java30
-rw-r--r--src/Chapter4/centrality/examples/PageRankCentralityExample.java39
-rw-r--r--src/Chapter4/classification/bayes/Classification.java22
-rw-r--r--src/Chapter4/classification/bayes/NBCxv.java60
-rw-r--r--src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java264
-rw-r--r--src/Chapter4/classification/bayes/StopwordsList.java10
-rw-r--r--src/Chapter4/classification/bayes/TestNBC.java49
-rw-r--r--src/Chapter4/classification/bayes/WordCountPair.java34
-rw-r--r--src/Chapter4/graph/visualization/SimpleGraphViewer.java86
-rw-r--r--src/Chapter4/tweetlda/LDA.java89
-rw-r--r--src/Chapter4/tweetlda/PorterStemmer.java33
-rw-r--r--src/Chapter4/tweetlda/Stemmer.java428
-rw-r--r--src/Chapter4/util/BetweennessScorer.java25
-rw-r--r--src/Chapter4/util/EigenVectorScorer.java64
-rw-r--r--src/Chapter4/util/InDegreeScorer.java30
-rw-r--r--src/Chapter4/util/TweetFileProcessor.java76
-rw-r--r--src/Chapter4/util/TweetFileToGraph.java77
-rw-r--r--src/Chapter5/network/CreateD3Network.java716
-rw-r--r--src/Chapter5/network/ExtractUserTagNetwork.java173
-rw-r--r--src/Chapter5/support/DateInfo.java30
-rw-r--r--src/Chapter5/support/HashTagDS.java18
-rw-r--r--src/Chapter5/support/NetworkNode.java49
-rw-r--r--src/Chapter5/support/NodeIDComparator.java32
-rw-r--r--src/Chapter5/support/NodeSizeComparator.java29
-rw-r--r--src/Chapter5/support/ToNodeInfo.java23
-rw-r--r--src/Chapter5/support/Tweet.java21
-rw-r--r--src/Chapter5/text/EventSummaryExtractor.java269
-rw-r--r--src/Chapter5/text/ExtractTopKeywords.java151
-rw-r--r--src/Chapter5/trends/ControlChartExample.java144
-rw-r--r--src/Chapter5/trends/DateInfo.java29
-rw-r--r--src/Chapter5/trends/ExtractDatasetTrend.java120
-rw-r--r--src/Chapter5/trends/SparkLineExample.java163
-rw-r--r--src/Chapter5/trends/TCDateInfo.java31
-rw-r--r--src/Chapter5/trends/TrendComparisonExample.java155
-rw-r--r--src/utils/OAuthUtils.java21
-rw-r--r--src/utils/Tags.java52
-rw-r--r--src/utils/TextUtils.java212
-rw-r--r--streaming/streaming.config3
96 files changed, 7339 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bae604d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,12 @@
+/nbproject/private/
+/build/
+/dist/
+
+# JaCoCO test coverage tool
+.jacocoverage/
+jacoco.exec-*
+
+# Editor temp files, diff, etc.
+*~
+.*.sw?
+*.orig
diff --git a/build.xml b/build.xml
new file mode 100644
index 0000000..24aba47
--- /dev/null
+++ b/build.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- You may freely edit this file. See commented blocks below for -->
+<!-- some examples of how to customize the build. -->
+<!-- (If you delete it and reopen the project it will be recreated.) -->
+<!-- By default, only the Clean and Build commands use this build script. -->
+<!-- Commands such as Run, Debug, and Test only use this build script if -->
+<!-- the Compile on Save feature is turned off for the project. -->
+<!-- You can turn off the Compile on Save (or Deploy on Save) setting -->
+<!-- in the project's Project Properties dialog box.-->
+<project name="TwitterDataAnalytics" default="default" basedir=".">
+ <description>Builds, tests, and runs the project TwitterDataAnalytics.</description>
+ <import file="nbproject/build-impl.xml"/>
+ <!--
+
+ There exist several targets which are by default empty and which can be
+ used for execution of your tasks. These targets are usually executed
+ before and after some main targets. They are:
+
+ -pre-init: called before initialization of project properties
+ -post-init: called after initialization of project properties
+ -pre-compile: called before javac compilation
+ -post-compile: called after javac compilation
+ -pre-compile-single: called before javac compilation of single file
+ -post-compile-single: called after javac compilation of single file
+ -pre-compile-test: called before javac compilation of JUnit tests
+ -post-compile-test: called after javac compilation of JUnit tests
+ -pre-compile-test-single: called before javac compilation of single JUnit test
+ -post-compile-test-single: called after javac compilation of single JUunit test
+ -pre-jar: called before JAR building
+ -post-jar: called after JAR building
+ -post-clean: called after cleaning build products
+
+ (Targets beginning with '-' are not intended to be called on their own.)
+
+ Example of inserting an obfuscator after compilation could look like this:
+
+ <target name="-post-compile">
+ <obfuscate>
+ <fileset dir="${build.classes.dir}"/>
+ </obfuscate>
+ </target>
+
+ For list of available properties check the imported
+ nbproject/build-impl.xml file.
+
+
+ Another way to customize the build is by overriding existing main targets.
+ The targets of interest are:
+
+ -init-macrodef-javac: defines macro for javac compilation
+ -init-macrodef-junit: defines macro for junit execution
+ -init-macrodef-debug: defines macro for class debugging
+ -init-macrodef-java: defines macro for class execution
+ -do-jar: JAR building
+ run: execution of project
+ -javadoc-build: Javadoc generation
+ test-report: JUnit report generation
+
+ An example of overriding the target for project execution could look like this:
+
+ <target name="run" depends="TwitterDataAnalytics-impl.jar">
+ <exec dir="bin" executable="launcher.exe">
+ <arg file="${dist.jar}"/>
+ </exec>
+ </target>
+
+ Notice that the overridden target depends on the jar target and not only on
+ the compile target as the regular run target does. Again, for a list of available
+ properties which you can use, check the target you are overriding in the
+ nbproject/build-impl.xml file.
+
+ -->
+</project>
diff --git a/lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar b/lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar
new file mode 100644
index 0000000..ff1abcc
--- /dev/null
+++ b/lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar
Binary files differ
diff --git a/lib/collections-generic-4.01.jar b/lib/collections-generic-4.01.jar
new file mode 100644
index 0000000..92d009c
--- /dev/null
+++ b/lib/collections-generic-4.01.jar
Binary files differ
diff --git a/lib/colt-1.2.0.jar b/lib/colt-1.2.0.jar
new file mode 100644
index 0000000..a7192f6
--- /dev/null
+++ b/lib/colt-1.2.0.jar
Binary files differ
diff --git a/lib/commons-codec-1.7.jar b/lib/commons-codec-1.7.jar
new file mode 100644
index 0000000..efa7f72
--- /dev/null
+++ b/lib/commons-codec-1.7.jar
Binary files differ
diff --git a/lib/commons-httpclient-3.1_1.jar b/lib/commons-httpclient-3.1_1.jar
new file mode 100644
index 0000000..7c59774
--- /dev/null
+++ b/lib/commons-httpclient-3.1_1.jar
Binary files differ
diff --git a/lib/commons-lang-2.6.jar b/lib/commons-lang-2.6.jar
new file mode 100644
index 0000000..98467d3
--- /dev/null
+++ b/lib/commons-lang-2.6.jar
Binary files differ
diff --git a/lib/commons-logging-1.1.1.jar b/lib/commons-logging-1.1.1.jar
new file mode 100644
index 0000000..8758a96
--- /dev/null
+++ b/lib/commons-logging-1.1.1.jar
Binary files differ
diff --git a/lib/concurrent-1.3.4.jar b/lib/concurrent-1.3.4.jar
new file mode 100644
index 0000000..551f347
--- /dev/null
+++ b/lib/concurrent-1.3.4.jar
Binary files differ
diff --git a/lib/gson-2.2.4.jar b/lib/gson-2.2.4.jar
new file mode 100644
index 0000000..9478253
--- /dev/null
+++ b/lib/gson-2.2.4.jar
Binary files differ
diff --git a/lib/httpclient-4.2.1.jar b/lib/httpclient-4.2.1.jar
new file mode 100644
index 0000000..1d52333
--- /dev/null
+++ b/lib/httpclient-4.2.1.jar
Binary files differ
diff --git a/lib/httpcore-4.2.1.jar b/lib/httpcore-4.2.1.jar
new file mode 100644
index 0000000..16d75e1
--- /dev/null
+++ b/lib/httpcore-4.2.1.jar
Binary files differ
diff --git a/lib/j3d-core-1.3.1.jar b/lib/j3d-core-1.3.1.jar
new file mode 100644
index 0000000..cfe6364
--- /dev/null
+++ b/lib/j3d-core-1.3.1.jar
Binary files differ
diff --git a/lib/jfig-1.5.2.jar b/lib/jfig-1.5.2.jar
new file mode 100644
index 0000000..d671f83
--- /dev/null
+++ b/lib/jfig-1.5.2.jar
Binary files differ
diff --git a/lib/json.jar b/lib/json.jar
new file mode 100644
index 0000000..5a93e51
--- /dev/null
+++ b/lib/json.jar
Binary files differ
diff --git a/lib/jung-3d-2.0.1.jar b/lib/jung-3d-2.0.1.jar
new file mode 100644
index 0000000..05c3f18
--- /dev/null
+++ b/lib/jung-3d-2.0.1.jar
Binary files differ
diff --git a/lib/jung-3d-demos-2.0.1.jar b/lib/jung-3d-demos-2.0.1.jar
new file mode 100644
index 0000000..10fd834
--- /dev/null
+++ b/lib/jung-3d-demos-2.0.1.jar
Binary files differ
diff --git a/lib/jung-algorithms-2.0.1.jar b/lib/jung-algorithms-2.0.1.jar
new file mode 100644
index 0000000..5b98f9c
--- /dev/null
+++ b/lib/jung-algorithms-2.0.1.jar
Binary files differ
diff --git a/lib/jung-api-2.0.1.jar b/lib/jung-api-2.0.1.jar
new file mode 100644
index 0000000..6dcac89
--- /dev/null
+++ b/lib/jung-api-2.0.1.jar
Binary files differ
diff --git a/lib/jung-graph-impl-2.0.1.jar b/lib/jung-graph-impl-2.0.1.jar
new file mode 100644
index 0000000..a64f6f7
--- /dev/null
+++ b/lib/jung-graph-impl-2.0.1.jar
Binary files differ
diff --git a/lib/jung-io-2.0.1.jar b/lib/jung-io-2.0.1.jar
new file mode 100644
index 0000000..4059dcd
--- /dev/null
+++ b/lib/jung-io-2.0.1.jar
Binary files differ
diff --git a/lib/jung-jai-2.0.1.jar b/lib/jung-jai-2.0.1.jar
new file mode 100644
index 0000000..feeb09d
--- /dev/null
+++ b/lib/jung-jai-2.0.1.jar
Binary files differ
diff --git a/lib/jung-jai-samples-2.0.1.jar b/lib/jung-jai-samples-2.0.1.jar
new file mode 100644
index 0000000..784cd88
--- /dev/null
+++ b/lib/jung-jai-samples-2.0.1.jar
Binary files differ
diff --git a/lib/jung-samples-2.0.1.jar b/lib/jung-samples-2.0.1.jar
new file mode 100644
index 0000000..838461d
--- /dev/null
+++ b/lib/jung-samples-2.0.1.jar
Binary files differ
diff --git a/lib/jung-visualization-2.0.1.jar b/lib/jung-visualization-2.0.1.jar
new file mode 100644
index 0000000..c611e77
--- /dev/null
+++ b/lib/jung-visualization-2.0.1.jar
Binary files differ
diff --git a/lib/junit/junit-3.8.2-api.zip b/lib/junit/junit-3.8.2-api.zip
new file mode 100644
index 0000000..6d792fd
--- /dev/null
+++ b/lib/junit/junit-3.8.2-api.zip
Binary files differ
diff --git a/lib/junit/junit-3.8.2.jar b/lib/junit/junit-3.8.2.jar
new file mode 100644
index 0000000..d835872
--- /dev/null
+++ b/lib/junit/junit-3.8.2.jar
Binary files differ
diff --git a/lib/junit_4/junit-4.5-api.zip b/lib/junit_4/junit-4.5-api.zip
new file mode 100644
index 0000000..5748c44
--- /dev/null
+++ b/lib/junit_4/junit-4.5-api.zip
Binary files differ
diff --git a/lib/junit_4/junit-4.5-src.jar b/lib/junit_4/junit-4.5-src.jar
new file mode 100644
index 0000000..18774a5
--- /dev/null
+++ b/lib/junit_4/junit-4.5-src.jar
Binary files differ
diff --git a/lib/junit_4/junit-4.5.jar b/lib/junit_4/junit-4.5.jar
new file mode 100644
index 0000000..83f8bc7
--- /dev/null
+++ b/lib/junit_4/junit-4.5.jar
Binary files differ
diff --git a/lib/log4j-1.2.15.jar b/lib/log4j-1.2.15.jar
new file mode 100644
index 0000000..c930a6a
--- /dev/null
+++ b/lib/log4j-1.2.15.jar
Binary files differ
diff --git a/lib/mallet-deps.jar b/lib/mallet-deps.jar
new file mode 100644
index 0000000..05517df
--- /dev/null
+++ b/lib/mallet-deps.jar
Binary files differ
diff --git a/lib/mallet.jar b/lib/mallet.jar
new file mode 100644
index 0000000..fb8fef5
--- /dev/null
+++ b/lib/mallet.jar
Binary files differ
diff --git a/lib/nblibraries.properties b/lib/nblibraries.properties
new file mode 100644
index 0000000..52864ae
--- /dev/null
+++ b/lib/nblibraries.properties
@@ -0,0 +1,14 @@
+libs.CopyLibs.classpath=\
+ ${base}/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar
+libs.CopyLibs.displayName=CopyLibs Task
+libs.CopyLibs.prop-version=2.0
+libs.junit.classpath=\
+ ${base}/junit/junit-3.8.2.jar
+libs.junit.javadoc=\
+ ${base}/junit/junit-3.8.2-api.zip
+libs.junit_4.classpath=\
+ ${base}/junit_4/junit-4.5.jar
+libs.junit_4.javadoc=\
+ ${base}/junit_4/junit-4.5-api.zip
+libs.junit_4.src=\
+ ${base}/junit_4/junit-4.5-src.jar
diff --git a/lib/signpost-commonshttp4-1.2.1.2.jar b/lib/signpost-commonshttp4-1.2.1.2.jar
new file mode 100644
index 0000000..fd37cfa
--- /dev/null
+++ b/lib/signpost-commonshttp4-1.2.1.2.jar
Binary files differ
diff --git a/lib/signpost-core-1.2.1.2.jar b/lib/signpost-core-1.2.1.2.jar
new file mode 100644
index 0000000..8871730
--- /dev/null
+++ b/lib/signpost-core-1.2.1.2.jar
Binary files differ
diff --git a/lib/stax-api-1.0.1.jar b/lib/stax-api-1.0.1.jar
new file mode 100644
index 0000000..d9a1665
--- /dev/null
+++ b/lib/stax-api-1.0.1.jar
Binary files differ
diff --git a/lib/vecmath-1.3.1.jar b/lib/vecmath-1.3.1.jar
new file mode 100644
index 0000000..fc2244b
--- /dev/null
+++ b/lib/vecmath-1.3.1.jar
Binary files differ
diff --git a/lib/wstx-asl-3.2.6.jar b/lib/wstx-asl-3.2.6.jar
new file mode 100644
index 0000000..aee5f0c
--- /dev/null
+++ b/lib/wstx-asl-3.2.6.jar
Binary files differ
diff --git a/manifest.mf b/manifest.mf
new file mode 100644
index 0000000..1574df4
--- /dev/null
+++ b/manifest.mf
@@ -0,0 +1,3 @@
+Manifest-Version: 1.0
+X-COMMENT: Main-Class will be added automatically by build
+
diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml
new file mode 100644
index 0000000..7e854aa
--- /dev/null
+++ b/nbproject/build-impl.xml
@@ -0,0 +1,1415 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+*** GENERATED FROM project.xml - DO NOT EDIT ***
+*** EDIT ../build.xml INSTEAD ***
+
+For the purpose of easier reading the script
+is divided into following sections:
+
+ - initialization
+ - compilation
+ - jar
+ - execution
+ - debugging
+ - javadoc
+ - test compilation
+ - test execution
+ - test debugging
+ - applet
+ - cleanup
+
+ -->
+<project xmlns:j2seproject1="http://www.netbeans.org/ns/j2se-project/1" xmlns:j2seproject3="http://www.netbeans.org/ns/j2se-project/3" xmlns:jaxrpc="http://www.netbeans.org/ns/j2se-project/jax-rpc" basedir=".." default="default" name="TwitterDataAnalytics-impl">
+ <fail message="Please build using Ant 1.8.0 or higher.">
+ <condition>
+ <not>
+ <antversion atleast="1.8.0"/>
+ </not>
+ </condition>
+ </fail>
+ <target depends="test,jar,javadoc" description="Build and test whole project." name="default"/>
+ <!--
+ ======================
+ INITIALIZATION SECTION
+ ======================
+ -->
+ <target name="-pre-init">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="-pre-init" name="-init-private">
+ <property file="nbproject/private/config.properties"/>
+ <property file="nbproject/private/configs/${config}.properties"/>
+ <property file="nbproject/private/private.properties"/>
+ </target>
+ <target name="-pre-init-libraries">
+ <property location=".\lib\nblibraries.properties" name="libraries.path"/>
+ <dirname file="${libraries.path}" property="libraries.dir.nativedirsep"/>
+ <pathconvert dirsep="/" property="libraries.dir">
+ <path path="${libraries.dir.nativedirsep}"/>
+ </pathconvert>
+ <basename file="${libraries.path}" property="libraries.basename" suffix=".properties"/>
+ <available file="${libraries.dir}/${libraries.basename}-private.properties" property="private.properties.available"/>
+ </target>
+ <target depends="-pre-init-libraries" if="private.properties.available" name="-init-private-libraries">
+ <loadproperties encoding="ISO-8859-1" srcfile="${libraries.dir}/${libraries.basename}-private.properties">
+ <filterchain>
+ <replacestring from="$${base}" to="${libraries.dir}"/>
+ <escapeunicode/>
+ </filterchain>
+ </loadproperties>
+ </target>
+ <target depends="-pre-init,-init-private,-init-private-libraries" name="-init-libraries">
+ <loadproperties encoding="ISO-8859-1" srcfile="${libraries.path}">
+ <filterchain>
+ <replacestring from="$${base}" to="${libraries.dir}"/>
+ <escapeunicode/>
+ </filterchain>
+ </loadproperties>
+ </target>
+ <target depends="-pre-init,-init-private,-init-libraries" name="-init-user">
+ <property file="${user.properties.file}"/>
+ <!-- The two properties below are usually overridden -->
+ <!-- by the active platform. Just a fallback. -->
+ <property name="default.javac.source" value="1.4"/>
+ <property name="default.javac.target" value="1.4"/>
+ </target>
+ <target depends="-pre-init,-init-private,-init-libraries,-init-user" name="-init-project">
+ <property file="nbproject/configs/${config}.properties"/>
+ <property file="nbproject/project.properties"/>
+ </target>
+ <target depends="-pre-init,-init-private,-init-libraries,-init-user,-init-project,-init-macrodef-property" name="-do-init">
+ <property name="platform.java" value="${java.home}/bin/java"/>
+ <available file="${manifest.file}" property="manifest.available"/>
+ <condition property="splashscreen.available">
+ <and>
+ <not>
+ <equals arg1="${application.splash}" arg2="" trim="true"/>
+ </not>
+ <available file="${application.splash}"/>
+ </and>
+ </condition>
+ <condition property="main.class.available">
+ <and>
+ <isset property="main.class"/>
+ <not>
+ <equals arg1="${main.class}" arg2="" trim="true"/>
+ </not>
+ </and>
+ </condition>
+ <condition property="profile.available">
+ <and>
+ <isset property="javac.profile"/>
+ <length length="0" string="${javac.profile}" when="greater"/>
+ <matches pattern="1\.[89](\..*)?" string="${javac.source}"/>
+ </and>
+ </condition>
+ <condition property="do.archive">
+ <not>
+ <istrue value="${jar.archive.disabled}"/>
+ </not>
+ </condition>
+ <condition property="do.mkdist">
+ <and>
+ <isset property="do.archive"/>
+ <isset property="libs.CopyLibs.classpath"/>
+ <not>
+ <istrue value="${mkdist.disabled}"/>
+ </not>
+ </and>
+ </condition>
+ <condition property="do.archive+manifest.available">
+ <and>
+ <isset property="manifest.available"/>
+ <istrue value="${do.archive}"/>
+ </and>
+ </condition>
+ <condition property="do.archive+main.class.available">
+ <and>
+ <isset property="main.class.available"/>
+ <istrue value="${do.archive}"/>
+ </and>
+ </condition>
+ <condition property="do.archive+splashscreen.available">
+ <and>
+ <isset property="splashscreen.available"/>
+ <istrue value="${do.archive}"/>
+ </and>
+ </condition>
+ <condition property="do.archive+profile.available">
+ <and>
+ <isset property="profile.available"/>
+ <istrue value="${do.archive}"/>
+ </and>
+ </condition>
+ <condition property="have.tests">
+ <or/>
+ </condition>
+ <condition property="have.sources">
+ <or>
+ <available file="${src.dir}"/>
+ </or>
+ </condition>
+ <condition property="netbeans.home+have.tests">
+ <and>
+ <isset property="netbeans.home"/>
+ <isset property="have.tests"/>
+ </and>
+ </condition>
+ <condition property="no.javadoc.preview">
+ <and>
+ <isset property="javadoc.preview"/>
+ <isfalse value="${javadoc.preview}"/>
+ </and>
+ </condition>
+ <property name="run.jvmargs" value=""/>
+ <property name="run.jvmargs.ide" value=""/>
+ <property name="javac.compilerargs" value=""/>
+ <property name="work.dir" value="${basedir}"/>
+ <condition property="no.deps">
+ <and>
+ <istrue value="${no.dependencies}"/>
+ </and>
+ </condition>
+ <property name="javac.debug" value="true"/>
+ <property name="javadoc.preview" value="true"/>
+ <property name="application.args" value=""/>
+ <property name="source.encoding" value="${file.encoding}"/>
+ <property name="runtime.encoding" value="${source.encoding}"/>
+ <condition property="javadoc.encoding.used" value="${javadoc.encoding}">
+ <and>
+ <isset property="javadoc.encoding"/>
+ <not>
+ <equals arg1="${javadoc.encoding}" arg2=""/>
+ </not>
+ </and>
+ </condition>
+ <property name="javadoc.encoding.used" value="${source.encoding}"/>
+ <property name="includes" value="**"/>
+ <property name="excludes" value=""/>
+ <property name="do.depend" value="false"/>
+ <condition property="do.depend.true">
+ <istrue value="${do.depend}"/>
+ </condition>
+ <path id="endorsed.classpath.path" path="${endorsed.classpath}"/>
+ <condition else="" property="endorsed.classpath.cmd.line.arg" value="-Xbootclasspath/p:'${toString:endorsed.classpath.path}'">
+ <and>
+ <isset property="endorsed.classpath"/>
+ <not>
+ <equals arg1="${endorsed.classpath}" arg2="" trim="true"/>
+ </not>
+ </and>
+ </condition>
+ <condition else="" property="javac.profile.cmd.line.arg" value="-profile ${javac.profile}">
+ <isset property="profile.available"/>
+ </condition>
+ <condition else="false" property="jdkBug6558476">
+ <and>
+ <matches pattern="1\.[56]" string="${java.specification.version}"/>
+ <not>
+ <os family="unix"/>
+ </not>
+ </and>
+ </condition>
+ <property name="javac.fork" value="${jdkBug6558476}"/>
+ <property name="jar.index" value="false"/>
+ <property name="jar.index.metainf" value="${jar.index}"/>
+ <property name="copylibs.rebase" value="true"/>
+ <available file="${meta.inf.dir}/persistence.xml" property="has.persistence.xml"/>
+ <condition property="junit.available">
+ <or>
+ <available classname="org.junit.Test" classpath="${run.test.classpath}"/>
+ <available classname="junit.framework.Test" classpath="${run.test.classpath}"/>
+ </or>
+ </condition>
+ <condition property="testng.available">
+ <available classname="org.testng.annotations.Test" classpath="${run.test.classpath}"/>
+ </condition>
+ <condition property="junit+testng.available">
+ <and>
+ <istrue value="${junit.available}"/>
+ <istrue value="${testng.available}"/>
+ </and>
+ </condition>
+ <condition else="testng" property="testng.mode" value="mixed">
+ <istrue value="${junit+testng.available}"/>
+ </condition>
+ <condition else="" property="testng.debug.mode" value="-mixed">
+ <istrue value="${junit+testng.available}"/>
+ </condition>
+ </target>
+ <target name="-post-init">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="-pre-init,-init-private,-init-libraries,-init-user,-init-project,-do-init" name="-init-check">
+ <fail unless="src.dir">Must set src.dir</fail>
+ <fail unless="build.dir">Must set build.dir</fail>
+ <fail unless="dist.dir">Must set dist.dir</fail>
+ <fail unless="build.classes.dir">Must set build.classes.dir</fail>
+ <fail unless="dist.javadoc.dir">Must set dist.javadoc.dir</fail>
+ <fail unless="build.test.classes.dir">Must set build.test.classes.dir</fail>
+ <fail unless="build.test.results.dir">Must set build.test.results.dir</fail>
+ <fail unless="build.classes.excludes">Must set build.classes.excludes</fail>
+ <fail unless="dist.jar">Must set dist.jar</fail>
+ </target>
+ <target name="-init-macrodef-property">
+ <macrodef name="property" uri="http://www.netbeans.org/ns/j2se-project/1">
+ <attribute name="name"/>
+ <attribute name="value"/>
+ <sequential>
+ <property name="@{name}" value="${@{value}}"/>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-ap-cmdline-properties" if="ap.supported.internal" name="-init-macrodef-javac-with-processors">
+ <macrodef name="javac" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${src.dir}" name="srcdir"/>
+ <attribute default="${build.classes.dir}" name="destdir"/>
+ <attribute default="${javac.classpath}" name="classpath"/>
+ <attribute default="${javac.processorpath}" name="processorpath"/>
+ <attribute default="${build.generated.sources.dir}/ap-source-output" name="apgeneratedsrcdir"/>
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="${javac.debug}" name="debug"/>
+ <attribute default="${empty.dir}" name="sourcepath"/>
+ <attribute default="${empty.dir}" name="gensrcdir"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property location="${build.dir}/empty" name="empty.dir"/>
+ <mkdir dir="${empty.dir}"/>
+ <mkdir dir="@{apgeneratedsrcdir}"/>
+ <javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" fork="${javac.fork}" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}">
+ <src>
+ <dirset dir="@{gensrcdir}" erroronmissingdir="false">
+ <include name="*"/>
+ </dirset>
+ </src>
+ <classpath>
+ <path path="@{classpath}"/>
+ </classpath>
+ <compilerarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <compilerarg line="${javac.profile.cmd.line.arg}"/>
+ <compilerarg line="${javac.compilerargs}"/>
+ <compilerarg value="-processorpath"/>
+ <compilerarg path="@{processorpath}:${empty.dir}"/>
+ <compilerarg line="${ap.processors.internal}"/>
+ <compilerarg line="${annotation.processing.processor.options}"/>
+ <compilerarg value="-s"/>
+ <compilerarg path="@{apgeneratedsrcdir}"/>
+ <compilerarg line="${ap.proc.none.internal}"/>
+ <customize/>
+ </javac>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-ap-cmdline-properties" name="-init-macrodef-javac-without-processors" unless="ap.supported.internal">
+ <macrodef name="javac" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${src.dir}" name="srcdir"/>
+ <attribute default="${build.classes.dir}" name="destdir"/>
+ <attribute default="${javac.classpath}" name="classpath"/>
+ <attribute default="${javac.processorpath}" name="processorpath"/>
+ <attribute default="${build.generated.sources.dir}/ap-source-output" name="apgeneratedsrcdir"/>
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="${javac.debug}" name="debug"/>
+ <attribute default="${empty.dir}" name="sourcepath"/>
+ <attribute default="${empty.dir}" name="gensrcdir"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property location="${build.dir}/empty" name="empty.dir"/>
+ <mkdir dir="${empty.dir}"/>
+ <javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" fork="${javac.fork}" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}">
+ <src>
+ <dirset dir="@{gensrcdir}" erroronmissingdir="false">
+ <include name="*"/>
+ </dirset>
+ </src>
+ <classpath>
+ <path path="@{classpath}"/>
+ </classpath>
+ <compilerarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <compilerarg line="${javac.profile.cmd.line.arg}"/>
+ <compilerarg line="${javac.compilerargs}"/>
+ <customize/>
+ </javac>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-javac-with-processors,-init-macrodef-javac-without-processors" name="-init-macrodef-javac">
+ <macrodef name="depend" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${src.dir}" name="srcdir"/>
+ <attribute default="${build.classes.dir}" name="destdir"/>
+ <attribute default="${javac.classpath}" name="classpath"/>
+ <sequential>
+ <depend cache="${build.dir}/depcache" destdir="@{destdir}" excludes="${excludes}" includes="${includes}" srcdir="@{srcdir}">
+ <classpath>
+ <path path="@{classpath}"/>
+ </classpath>
+ </depend>
+ </sequential>
+ </macrodef>
+ <macrodef name="force-recompile" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${build.classes.dir}" name="destdir"/>
+ <sequential>
+ <fail unless="javac.includes">Must set javac.includes</fail>
+ <pathconvert pathsep="${line.separator}" property="javac.includes.binary">
+ <path>
+ <filelist dir="@{destdir}" files="${javac.includes}"/>
+ </path>
+ <globmapper from="*.java" to="*.class"/>
+ </pathconvert>
+ <tempfile deleteonexit="true" property="javac.includesfile.binary"/>
+ <echo file="${javac.includesfile.binary}" message="${javac.includes.binary}"/>
+ <delete>
+ <files includesfile="${javac.includesfile.binary}"/>
+ </delete>
+ <delete>
+ <fileset file="${javac.includesfile.binary}"/>
+ </delete>
+ </sequential>
+ </macrodef>
+ </target>
+ <target if="${junit.available}" name="-init-macrodef-junit-init">
+ <condition else="false" property="nb.junit.batch" value="true">
+ <and>
+ <istrue value="${junit.available}"/>
+ <not>
+ <isset property="test.method"/>
+ </not>
+ </and>
+ </condition>
+ <condition else="false" property="nb.junit.single" value="true">
+ <and>
+ <istrue value="${junit.available}"/>
+ <isset property="test.method"/>
+ </and>
+ </condition>
+ </target>
+ <target name="-init-test-properties">
+ <property name="test.binaryincludes" value="&lt;nothing&gt;"/>
+ <property name="test.binarytestincludes" value=""/>
+ <property name="test.binaryexcludes" value=""/>
+ </target>
+ <target if="${nb.junit.single}" name="-init-macrodef-junit-single" unless="${nb.junit.batch}">
+ <macrodef name="junit" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property name="junit.forkmode" value="perTest"/>
+ <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
+ <test methods="@{testmethods}" name="@{testincludes}" todir="${build.test.results.dir}"/>
+ <syspropertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <formatter type="brief" usefile="false"/>
+ <formatter type="xml"/>
+ <jvmarg value="-ea"/>
+ <customize/>
+ </junit>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-test-properties" if="${nb.junit.batch}" name="-init-macrodef-junit-batch" unless="${nb.junit.single}">
+ <macrodef name="junit" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property name="junit.forkmode" value="perTest"/>
+ <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
+ <batchtest todir="${build.test.results.dir}">
+ <fileset dir="${build.test.classes.dir}" excludes="@{excludes},${excludes},${test.binaryexcludes}" includes="${test.binaryincludes}">
+ <filename name="${test.binarytestincludes}"/>
+ </fileset>
+ </batchtest>
+ <syspropertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <formatter type="brief" usefile="false"/>
+ <formatter type="xml"/>
+ <jvmarg value="-ea"/>
+ <customize/>
+ </junit>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-junit-init,-init-macrodef-junit-single, -init-macrodef-junit-batch" if="${junit.available}" name="-init-macrodef-junit"/>
+ <target if="${testng.available}" name="-init-macrodef-testng">
+ <macrodef name="testng" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <condition else="" property="testng.methods.arg" value="@{testincludes}.@{testmethods}">
+ <isset property="test.method"/>
+ </condition>
+ <union id="test.set"/>
+ <taskdef classname="org.testng.TestNGAntTask" classpath="${run.test.classpath}" name="testng"/>
+ <testng classfilesetref="test.set" failureProperty="tests.failed" listeners="org.testng.reporters.VerboseReporter" methods="${testng.methods.arg}" mode="${testng.mode}" outputdir="${build.test.results.dir}" suitename="TwitterDataAnalytics" testname="TestNG tests" workingDir="${work.dir}">
+ <xmlfileset dir="${build.test.classes.dir}" includes="@{testincludes}"/>
+ <propertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </propertyset>
+ <customize/>
+ </testng>
+ </sequential>
+ </macrodef>
+ </target>
+ <target name="-init-macrodef-test-impl">
+ <macrodef name="test-impl" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element implicit="true" name="customize" optional="true"/>
+ <sequential>
+ <echo>No tests executed.</echo>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-junit" if="${junit.available}" name="-init-macrodef-junit-impl">
+ <macrodef name="test-impl" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element implicit="true" name="customize" optional="true"/>
+ <sequential>
+ <j2seproject3:junit excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}">
+ <customize/>
+ </j2seproject3:junit>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-testng" if="${testng.available}" name="-init-macrodef-testng-impl">
+ <macrodef name="test-impl" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element implicit="true" name="customize" optional="true"/>
+ <sequential>
+ <j2seproject3:testng excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}">
+ <customize/>
+ </j2seproject3:testng>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-test-impl,-init-macrodef-junit-impl,-init-macrodef-testng-impl" name="-init-macrodef-test">
+ <macrodef name="test" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <sequential>
+ <j2seproject3:test-impl excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}">
+ <customize>
+ <classpath>
+ <path path="${run.test.classpath}"/>
+ </classpath>
+ <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <jvmarg line="${run.jvmargs}"/>
+ <jvmarg line="${run.jvmargs.ide}"/>
+ </customize>
+ </j2seproject3:test-impl>
+ </sequential>
+ </macrodef>
+ </target>
+ <target if="${junit.available}" name="-init-macrodef-junit-debug" unless="${nb.junit.batch}">
+ <macrodef name="junit-debug" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property name="junit.forkmode" value="perTest"/>
+ <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
+ <test methods="@{testmethods}" name="@{testincludes}" todir="${build.test.results.dir}"/>
+ <syspropertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <formatter type="brief" usefile="false"/>
+ <formatter type="xml"/>
+ <jvmarg value="-ea"/>
+ <jvmarg line="${debug-args-line}"/>
+ <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/>
+ <customize/>
+ </junit>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-test-properties" if="${nb.junit.batch}" name="-init-macrodef-junit-debug-batch">
+ <macrodef name="junit-debug" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property name="junit.forkmode" value="perTest"/>
+ <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
+ <batchtest todir="${build.test.results.dir}">
+ <fileset dir="${build.test.classes.dir}" excludes="@{excludes},${excludes},${test.binaryexcludes}" includes="${test.binaryincludes}">
+ <filename name="${test.binarytestincludes}"/>
+ </fileset>
+ </batchtest>
+ <syspropertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <formatter type="brief" usefile="false"/>
+ <formatter type="xml"/>
+ <jvmarg value="-ea"/>
+ <jvmarg line="${debug-args-line}"/>
+ <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/>
+ <customize/>
+ </junit>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-junit-debug,-init-macrodef-junit-debug-batch" if="${junit.available}" name="-init-macrodef-junit-debug-impl">
+ <macrodef name="test-debug-impl" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <element implicit="true" name="customize" optional="true"/>
+ <sequential>
+ <j2seproject3:junit-debug excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}">
+ <customize/>
+ </j2seproject3:junit-debug>
+ </sequential>
+ </macrodef>
+ </target>
+ <target if="${testng.available}" name="-init-macrodef-testng-debug">
+ <macrodef name="testng-debug" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${main.class}" name="testClass"/>
+ <attribute default="" name="testMethod"/>
+ <element name="customize2" optional="true"/>
+ <sequential>
+ <condition else="-testclass @{testClass}" property="test.class.or.method" value="-methods @{testClass}.@{testMethod}">
+ <isset property="test.method"/>
+ </condition>
+ <condition else="-suitename TwitterDataAnalytics -testname @{testClass} ${test.class.or.method}" property="testng.cmd.args" value="@{testClass}">
+ <matches pattern=".*\.xml" string="@{testClass}"/>
+ </condition>
+ <delete dir="${build.test.results.dir}" quiet="true"/>
+ <mkdir dir="${build.test.results.dir}"/>
+ <j2seproject3:debug classname="org.testng.TestNG" classpath="${debug.test.classpath}">
+ <customize>
+ <customize2/>
+ <jvmarg value="-ea"/>
+ <arg line="${testng.debug.mode}"/>
+ <arg line="-d ${build.test.results.dir}"/>
+ <arg line="-listener org.testng.reporters.VerboseReporter"/>
+ <arg line="${testng.cmd.args}"/>
+ </customize>
+ </j2seproject3:debug>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-testng-debug" if="${testng.available}" name="-init-macrodef-testng-debug-impl">
+ <macrodef name="testng-debug-impl" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${main.class}" name="testClass"/>
+ <attribute default="" name="testMethod"/>
+ <element implicit="true" name="customize2" optional="true"/>
+ <sequential>
+ <j2seproject3:testng-debug testClass="@{testClass}" testMethod="@{testMethod}">
+ <customize2/>
+ </j2seproject3:testng-debug>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-junit-debug-impl" if="${junit.available}" name="-init-macrodef-test-debug-junit">
+ <macrodef name="test-debug" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <attribute default="${main.class}" name="testClass"/>
+ <attribute default="" name="testMethod"/>
+ <sequential>
+ <j2seproject3:test-debug-impl excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}">
+ <customize>
+ <classpath>
+ <path path="${run.test.classpath}"/>
+ </classpath>
+ <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <jvmarg line="${run.jvmargs}"/>
+ <jvmarg line="${run.jvmargs.ide}"/>
+ </customize>
+ </j2seproject3:test-debug-impl>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-testng-debug-impl" if="${testng.available}" name="-init-macrodef-test-debug-testng">
+ <macrodef name="test-debug" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${includes}" name="includes"/>
+ <attribute default="${excludes}" name="excludes"/>
+ <attribute default="**" name="testincludes"/>
+ <attribute default="" name="testmethods"/>
+ <attribute default="${main.class}" name="testClass"/>
+ <attribute default="" name="testMethod"/>
+ <sequential>
+ <j2seproject3:testng-debug-impl testClass="@{testClass}" testMethod="@{testMethod}">
+ <customize2>
+ <syspropertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ </customize2>
+ </j2seproject3:testng-debug-impl>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-init-macrodef-test-debug-junit,-init-macrodef-test-debug-testng" name="-init-macrodef-test-debug"/>
+ <!--
+ pre NB7.2 profiling section; consider it deprecated
+ -->
+ <target depends="-profile-pre-init, init, -profile-post-init, -profile-init-macrodef-profile, -profile-init-check" if="profiler.info.jvmargs.agent" name="profile-init"/>
+ <target if="profiler.info.jvmargs.agent" name="-profile-pre-init">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target if="profiler.info.jvmargs.agent" name="-profile-post-init">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target if="profiler.info.jvmargs.agent" name="-profile-init-macrodef-profile">
+ <macrodef name="resolve">
+ <attribute name="name"/>
+ <attribute name="value"/>
+ <sequential>
+ <property name="@{name}" value="${env.@{value}}"/>
+ </sequential>
+ </macrodef>
+ <macrodef name="profile">
+ <attribute default="${main.class}" name="classname"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property environment="env"/>
+ <resolve name="profiler.current.path" value="${profiler.info.pathvar}"/>
+ <java classname="@{classname}" dir="${profiler.info.dir}" fork="true" jvm="${profiler.info.jvm}">
+ <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <jvmarg value="${profiler.info.jvmargs.agent}"/>
+ <jvmarg line="${profiler.info.jvmargs}"/>
+ <env key="${profiler.info.pathvar}" path="${profiler.info.agentpath}:${profiler.current.path}"/>
+ <arg line="${application.args}"/>
+ <classpath>
+ <path path="${run.classpath}"/>
+ </classpath>
+ <syspropertyset>
+ <propertyref prefix="run-sys-prop."/>
+ <mapper from="run-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <customize/>
+ </java>
+ </sequential>
+ </macrodef>
+ </target>
+ <target depends="-profile-pre-init, init, -profile-post-init, -profile-init-macrodef-profile" if="profiler.info.jvmargs.agent" name="-profile-init-check">
+ <fail unless="profiler.info.jvm">Must set JVM to use for profiling in profiler.info.jvm</fail>
+ <fail unless="profiler.info.jvmargs.agent">Must set profiler agent JVM arguments in profiler.info.jvmargs.agent</fail>
+ </target>
+ <!--
+ end of pre NB7.2 profiling section
+ -->
+ <target depends="-init-debug-args" name="-init-macrodef-nbjpda">
+ <macrodef name="nbjpdastart" uri="http://www.netbeans.org/ns/j2se-project/1">
+ <attribute default="${main.class}" name="name"/>
+ <attribute default="${debug.classpath}" name="classpath"/>
+ <attribute default="" name="stopclassname"/>
+ <sequential>
+ <nbjpdastart addressproperty="jpda.address" name="@{name}" stopclassname="@{stopclassname}" transport="${debug-transport}">
+ <classpath>
+ <path path="@{classpath}"/>
+ </classpath>
+ </nbjpdastart>
+ </sequential>
+ </macrodef>
+ <macrodef name="nbjpdareload" uri="http://www.netbeans.org/ns/j2se-project/1">
+ <attribute default="${build.classes.dir}" name="dir"/>
+ <sequential>
+ <nbjpdareload>
+ <fileset dir="@{dir}" includes="${fix.classes}">
+ <include name="${fix.includes}*.class"/>
+ </fileset>
+ </nbjpdareload>
+ </sequential>
+ </macrodef>
+ </target>
+ <target name="-init-debug-args">
+ <property name="version-output" value="java version &quot;${ant.java.version}"/>
+ <condition property="have-jdk-older-than-1.4">
+ <or>
+ <contains string="${version-output}" substring="java version &quot;1.0"/>
+ <contains string="${version-output}" substring="java version &quot;1.1"/>
+ <contains string="${version-output}" substring="java version &quot;1.2"/>
+ <contains string="${version-output}" substring="java version &quot;1.3"/>
+ </or>
+ </condition>
+ <condition else="-Xdebug" property="debug-args-line" value="-Xdebug -Xnoagent -Djava.compiler=none">
+ <istrue value="${have-jdk-older-than-1.4}"/>
+ </condition>
+ <condition else="dt_socket" property="debug-transport-by-os" value="dt_shmem">
+ <os family="windows"/>
+ </condition>
+ <condition else="${debug-transport-by-os}" property="debug-transport" value="${debug.transport}">
+ <isset property="debug.transport"/>
+ </condition>
+ </target>
+ <target depends="-init-debug-args" name="-init-macrodef-debug">
+ <macrodef name="debug" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${main.class}" name="classname"/>
+ <attribute default="${debug.classpath}" name="classpath"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <java classname="@{classname}" dir="${work.dir}" fork="true">
+ <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <jvmarg line="${debug-args-line}"/>
+ <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/>
+ <jvmarg value="-Dfile.encoding=${runtime.encoding}"/>
+ <redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/>
+ <jvmarg line="${run.jvmargs}"/>
+ <jvmarg line="${run.jvmargs.ide}"/>
+ <classpath>
+ <path path="@{classpath}"/>
+ </classpath>
+ <syspropertyset>
+ <propertyref prefix="run-sys-prop."/>
+ <mapper from="run-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <customize/>
+ </java>
+ </sequential>
+ </macrodef>
+ </target>
+ <target name="-init-macrodef-java">
+ <macrodef name="java" uri="http://www.netbeans.org/ns/j2se-project/1">
+ <attribute default="${main.class}" name="classname"/>
+ <attribute default="${run.classpath}" name="classpath"/>
+ <attribute default="jvm" name="jvm"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <java classname="@{classname}" dir="${work.dir}" fork="true">
+ <jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
+ <jvmarg value="-Dfile.encoding=${runtime.encoding}"/>
+ <redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/>
+ <jvmarg line="${run.jvmargs}"/>
+ <jvmarg line="${run.jvmargs.ide}"/>
+ <classpath>
+ <path path="@{classpath}"/>
+ </classpath>
+ <syspropertyset>
+ <propertyref prefix="run-sys-prop."/>
+ <mapper from="run-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <customize/>
+ </java>
+ </sequential>
+ </macrodef>
+ </target>
+ <target name="-init-macrodef-copylibs">
+ <macrodef name="copylibs" uri="http://www.netbeans.org/ns/j2se-project/3">
+ <attribute default="${manifest.file}" name="manifest"/>
+ <element name="customize" optional="true"/>
+ <sequential>
+ <property location="${build.classes.dir}" name="build.classes.dir.resolved"/>
+ <pathconvert property="run.classpath.without.build.classes.dir">
+ <path path="${run.classpath}"/>
+ <map from="${build.classes.dir.resolved}" to=""/>
+ </pathconvert>
+ <pathconvert pathsep=" " property="jar.classpath">
+ <path path="${run.classpath.without.build.classes.dir}"/>
+ <chainedmapper>
+ <flattenmapper/>
+ <filtermapper>
+ <replacestring from=" " to="%20"/>
+ </filtermapper>
+ <globmapper from="*" to="lib/*"/>
+ </chainedmapper>
+ </pathconvert>
+ <taskdef classname="org.netbeans.modules.java.j2seproject.copylibstask.CopyLibs" classpath="${libs.CopyLibs.classpath}" name="copylibs"/>
+ <copylibs compress="${jar.compress}" excludeFromCopy="${copylibs.excludes}" index="${jar.index}" indexMetaInf="${jar.index.metainf}" jarfile="${dist.jar}" manifest="@{manifest}" rebase="${copylibs.rebase}" runtimeclasspath="${run.classpath.without.build.classes.dir}">
+ <fileset dir="${build.classes.dir}" excludes="${dist.archive.excludes}"/>
+ <manifest>
+ <attribute name="Class-Path" value="${jar.classpath}"/>
+ <customize/>
+ </manifest>
+ </copylibs>
+ </sequential>
+ </macrodef>
+ </target>
+ <target name="-init-presetdef-jar">
+ <presetdef name="jar" uri="http://www.netbeans.org/ns/j2se-project/1">
+ <jar compress="${jar.compress}" index="${jar.index}" jarfile="${dist.jar}">
+ <j2seproject1:fileset dir="${build.classes.dir}" excludes="${dist.archive.excludes}"/>
+ </jar>
+ </presetdef>
+ </target>
+ <target name="-init-ap-cmdline-properties">
+ <property name="annotation.processing.enabled" value="true"/>
+ <property name="annotation.processing.processors.list" value=""/>
+ <property name="annotation.processing.processor.options" value=""/>
+ <property name="annotation.processing.run.all.processors" value="true"/>
+ <property name="javac.processorpath" value="${javac.classpath}"/>
+ <property name="javac.test.processorpath" value="${javac.test.classpath}"/>
+ <condition property="ap.supported.internal" value="true">
+ <not>
+ <matches pattern="1\.[0-5](\..*)?" string="${javac.source}"/>
+ </not>
+ </condition>
+ </target>
+ <target depends="-init-ap-cmdline-properties" if="ap.supported.internal" name="-init-ap-cmdline-supported">
+ <condition else="" property="ap.processors.internal" value="-processor ${annotation.processing.processors.list}">
+ <isfalse value="${annotation.processing.run.all.processors}"/>
+ </condition>
+ <condition else="" property="ap.proc.none.internal" value="-proc:none">
+ <isfalse value="${annotation.processing.enabled}"/>
+ </condition>
+ </target>
+ <target depends="-init-ap-cmdline-properties,-init-ap-cmdline-supported" name="-init-ap-cmdline">
+ <property name="ap.cmd.line.internal" value=""/>
+ </target>
+ <target depends="-pre-init,-init-private,-init-libraries,-init-user,-init-project,-do-init,-post-init,-init-check,-init-macrodef-property,-init-macrodef-javac,-init-macrodef-test,-init-macrodef-test-debug,-init-macrodef-nbjpda,-init-macrodef-debug,-init-macrodef-java,-init-presetdef-jar,-init-ap-cmdline" name="init"/>
+ <!--
+ ===================
+ COMPILATION SECTION
+ ===================
+ -->
+ <target name="-deps-jar-init" unless="built-jar.properties">
+ <property location="${build.dir}/built-jar.properties" name="built-jar.properties"/>
+ <delete file="${built-jar.properties}" quiet="true"/>
+ </target>
+ <target if="already.built.jar.${basedir}" name="-warn-already-built-jar">
+ <echo level="warn" message="Cycle detected: TwitterDataAnalytics was already built"/>
+ </target>
+ <target depends="init,-deps-jar-init" name="deps-jar" unless="no.deps">
+ <mkdir dir="${build.dir}"/>
+ <touch file="${built-jar.properties}" verbose="false"/>
+ <property file="${built-jar.properties}" prefix="already.built.jar."/>
+ <antcall target="-warn-already-built-jar"/>
+ <propertyfile file="${built-jar.properties}">
+ <entry key="${basedir}" value=""/>
+ </propertyfile>
+ </target>
+ <target depends="init,-check-automatic-build,-clean-after-automatic-build" name="-verify-automatic-build"/>
+ <target depends="init" name="-check-automatic-build">
+ <available file="${build.classes.dir}/.netbeans_automatic_build" property="netbeans.automatic.build"/>
+ </target>
+ <target depends="init" if="netbeans.automatic.build" name="-clean-after-automatic-build">
+ <antcall target="clean"/>
+ </target>
+ <target depends="init,deps-jar" name="-pre-pre-compile">
+ <mkdir dir="${build.classes.dir}"/>
+ </target>
+ <target name="-pre-compile">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target if="do.depend.true" name="-compile-depend">
+ <pathconvert property="build.generated.subdirs">
+ <dirset dir="${build.generated.sources.dir}" erroronmissingdir="false">
+ <include name="*"/>
+ </dirset>
+ </pathconvert>
+ <j2seproject3:depend srcdir="${src.dir}:${build.generated.subdirs}"/>
+ </target>
+ <target depends="init,deps-jar,-pre-pre-compile,-pre-compile, -copy-persistence-xml,-compile-depend" if="have.sources" name="-do-compile">
+ <j2seproject3:javac gensrcdir="${build.generated.sources.dir}"/>
+ <copy todir="${build.classes.dir}">
+ <fileset dir="${src.dir}" excludes="${build.classes.excludes},${excludes}" includes="${includes}"/>
+ </copy>
+ </target>
+ <target if="has.persistence.xml" name="-copy-persistence-xml">
+ <mkdir dir="${build.classes.dir}/META-INF"/>
+ <copy todir="${build.classes.dir}/META-INF">
+ <fileset dir="${meta.inf.dir}" includes="persistence.xml orm.xml"/>
+ </copy>
+ </target>
+ <target name="-post-compile">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,deps-jar,-verify-automatic-build,-pre-pre-compile,-pre-compile,-do-compile,-post-compile" description="Compile project." name="compile"/>
+ <target name="-pre-compile-single">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,deps-jar,-pre-pre-compile" name="-do-compile-single">
+ <fail unless="javac.includes">Must select some files in the IDE or set javac.includes</fail>
+ <j2seproject3:force-recompile/>
+ <j2seproject3:javac excludes="" gensrcdir="${build.generated.sources.dir}" includes="${javac.includes}" sourcepath="${src.dir}"/>
+ </target>
+ <target name="-post-compile-single">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,deps-jar,-verify-automatic-build,-pre-pre-compile,-pre-compile-single,-do-compile-single,-post-compile-single" name="compile-single"/>
+ <!--
+ ====================
+ JAR BUILDING SECTION
+ ====================
+ -->
+ <target depends="init" name="-pre-pre-jar">
+ <dirname file="${dist.jar}" property="dist.jar.dir"/>
+ <mkdir dir="${dist.jar.dir}"/>
+ </target>
+ <target name="-pre-jar">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init" if="do.archive" name="-do-jar-create-manifest" unless="manifest.available">
+ <tempfile deleteonexit="true" destdir="${build.dir}" property="tmp.manifest.file"/>
+ <touch file="${tmp.manifest.file}" verbose="false"/>
+ </target>
+ <target depends="init" if="do.archive+manifest.available" name="-do-jar-copy-manifest">
+ <tempfile deleteonexit="true" destdir="${build.dir}" property="tmp.manifest.file"/>
+ <copy file="${manifest.file}" tofile="${tmp.manifest.file}"/>
+ </target>
+ <target depends="init,-do-jar-create-manifest,-do-jar-copy-manifest" if="do.archive+main.class.available" name="-do-jar-set-mainclass">
+ <manifest file="${tmp.manifest.file}" mode="update">
+ <attribute name="Main-Class" value="${main.class}"/>
+ </manifest>
+ </target>
+ <target depends="init,-do-jar-create-manifest,-do-jar-copy-manifest" if="do.archive+profile.available" name="-do-jar-set-profile">
+ <manifest file="${tmp.manifest.file}" mode="update">
+ <attribute name="Profile" value="${javac.profile}"/>
+ </manifest>
+ </target>
+ <target depends="init,-do-jar-create-manifest,-do-jar-copy-manifest" if="do.archive+splashscreen.available" name="-do-jar-set-splashscreen">
+ <basename file="${application.splash}" property="splashscreen.basename"/>
+ <mkdir dir="${build.classes.dir}/META-INF"/>
+ <copy failonerror="false" file="${application.splash}" todir="${build.classes.dir}/META-INF"/>
+ <manifest file="${tmp.manifest.file}" mode="update">
+ <attribute name="SplashScreen-Image" value="META-INF/${splashscreen.basename}"/>
+ </manifest>
+ </target>
+ <target depends="init,-init-macrodef-copylibs,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen" if="do.mkdist" name="-do-jar-copylibs">
+ <j2seproject3:copylibs manifest="${tmp.manifest.file}"/>
+ <echo level="info">To run this application from the command line without Ant, try:</echo>
+ <property location="${dist.jar}" name="dist.jar.resolved"/>
+ <echo level="info">java -jar "${dist.jar.resolved}"</echo>
+ </target>
+ <target depends="init,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen" if="do.archive" name="-do-jar-jar" unless="do.mkdist">
+ <j2seproject1:jar manifest="${tmp.manifest.file}"/>
+ <property location="${build.classes.dir}" name="build.classes.dir.resolved"/>
+ <property location="${dist.jar}" name="dist.jar.resolved"/>
+ <pathconvert property="run.classpath.with.dist.jar">
+ <path path="${run.classpath}"/>
+ <map from="${build.classes.dir.resolved}" to="${dist.jar.resolved}"/>
+ </pathconvert>
+ <condition else="" property="jar.usage.message" value="To run this application from the command line without Ant, try:${line.separator}${platform.java} -cp ${run.classpath.with.dist.jar} ${main.class}">
+ <isset property="main.class.available"/>
+ </condition>
+ <condition else="debug" property="jar.usage.level" value="info">
+ <isset property="main.class.available"/>
+ </condition>
+ <echo level="${jar.usage.level}" message="${jar.usage.message}"/>
+ </target>
+ <target depends="-do-jar-copylibs" if="do.archive" name="-do-jar-delete-manifest">
+ <delete>
+ <fileset file="${tmp.manifest.file}"/>
+ </delete>
+ </target>
+ <target depends="init,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen,-do-jar-jar,-do-jar-delete-manifest" name="-do-jar-without-libraries"/>
+ <target depends="init,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen,-do-jar-copylibs,-do-jar-delete-manifest" name="-do-jar-with-libraries"/>
+ <target name="-post-jar">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,compile,-pre-jar,-do-jar-without-libraries,-do-jar-with-libraries,-post-jar" name="-do-jar"/>
+ <target depends="init,compile,-pre-jar,-do-jar,-post-jar" description="Build JAR." name="jar"/>
+ <!--
+ =================
+ EXECUTION SECTION
+ =================
+ -->
+ <target depends="init,compile" description="Run a main class." name="run">
+ <j2seproject1:java>
+ <customize>
+ <arg line="${application.args}"/>
+ </customize>
+ </j2seproject1:java>
+ </target>
+ <target name="-do-not-recompile">
+ <property name="javac.includes.binary" value=""/>
+ </target>
+ <target depends="init,compile-single" name="run-single">
+ <fail unless="run.class">Must select one file in the IDE or set run.class</fail>
+ <j2seproject1:java classname="${run.class}"/>
+ </target>
+ <target depends="init,compile-test-single" name="run-test-with-main">
+ <fail unless="run.class">Must select one file in the IDE or set run.class</fail>
+ <j2seproject1:java classname="${run.class}" classpath="${run.test.classpath}"/>
+ </target>
+ <!--
+ =================
+ DEBUGGING SECTION
+ =================
+ -->
+ <target depends="init" if="netbeans.home" name="-debug-start-debugger">
+ <j2seproject1:nbjpdastart name="${debug.class}"/>
+ </target>
+ <target depends="init" if="netbeans.home" name="-debug-start-debugger-main-test">
+ <j2seproject1:nbjpdastart classpath="${debug.test.classpath}" name="${debug.class}"/>
+ </target>
+ <target depends="init,compile" name="-debug-start-debuggee">
+ <j2seproject3:debug>
+ <customize>
+ <arg line="${application.args}"/>
+ </customize>
+ </j2seproject3:debug>
+ </target>
+ <target depends="init,compile,-debug-start-debugger,-debug-start-debuggee" description="Debug project in IDE." if="netbeans.home" name="debug"/>
+ <target depends="init" if="netbeans.home" name="-debug-start-debugger-stepinto">
+ <j2seproject1:nbjpdastart stopclassname="${main.class}"/>
+ </target>
+ <target depends="init,compile,-debug-start-debugger-stepinto,-debug-start-debuggee" if="netbeans.home" name="debug-stepinto"/>
+ <target depends="init,compile-single" if="netbeans.home" name="-debug-start-debuggee-single">
+ <fail unless="debug.class">Must select one file in the IDE or set debug.class</fail>
+ <j2seproject3:debug classname="${debug.class}"/>
+ </target>
+ <target depends="init,compile-single,-debug-start-debugger,-debug-start-debuggee-single" if="netbeans.home" name="debug-single"/>
+ <target depends="init,compile-test-single" if="netbeans.home" name="-debug-start-debuggee-main-test">
+ <fail unless="debug.class">Must select one file in the IDE or set debug.class</fail>
+ <j2seproject3:debug classname="${debug.class}" classpath="${debug.test.classpath}"/>
+ </target>
+ <target depends="init,compile-test-single,-debug-start-debugger-main-test,-debug-start-debuggee-main-test" if="netbeans.home" name="debug-test-with-main"/>
+ <target depends="init" name="-pre-debug-fix">
+ <fail unless="fix.includes">Must set fix.includes</fail>
+ <property name="javac.includes" value="${fix.includes}.java"/>
+ </target>
+ <target depends="init,-pre-debug-fix,compile-single" if="netbeans.home" name="-do-debug-fix">
+ <j2seproject1:nbjpdareload/>
+ </target>
+ <target depends="init,-pre-debug-fix,-do-debug-fix" if="netbeans.home" name="debug-fix"/>
+ <!--
+ =================
+ PROFILING SECTION
+ =================
+ -->
+ <!--
+ pre NB7.2 profiler integration
+ -->
+ <target depends="profile-init,compile" description="Profile a project in the IDE." if="profiler.info.jvmargs.agent" name="-profile-pre72">
+ <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail>
+ <nbprofiledirect>
+ <classpath>
+ <path path="${run.classpath}"/>
+ </classpath>
+ </nbprofiledirect>
+ <profile/>
+ </target>
+ <target depends="profile-init,compile-single" description="Profile a selected class in the IDE." if="profiler.info.jvmargs.agent" name="-profile-single-pre72">
+ <fail unless="profile.class">Must select one file in the IDE or set profile.class</fail>
+ <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail>
+ <nbprofiledirect>
+ <classpath>
+ <path path="${run.classpath}"/>
+ </classpath>
+ </nbprofiledirect>
+ <profile classname="${profile.class}"/>
+ </target>
+ <target depends="profile-init,compile-single" if="profiler.info.jvmargs.agent" name="-profile-applet-pre72">
+ <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail>
+ <nbprofiledirect>
+ <classpath>
+ <path path="${run.classpath}"/>
+ </classpath>
+ </nbprofiledirect>
+ <profile classname="sun.applet.AppletViewer">
+ <customize>
+ <arg value="${applet.url}"/>
+ </customize>
+ </profile>
+ </target>
+ <target depends="profile-init,compile-test-single" if="profiler.info.jvmargs.agent" name="-profile-test-single-pre72">
+ <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail>
+ <nbprofiledirect>
+ <classpath>
+ <path path="${run.test.classpath}"/>
+ </classpath>
+ </nbprofiledirect>
+ <junit dir="${profiler.info.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" jvm="${profiler.info.jvm}" showoutput="true">
+ <env key="${profiler.info.pathvar}" path="${profiler.info.agentpath}:${profiler.current.path}"/>
+ <jvmarg value="${profiler.info.jvmargs.agent}"/>
+ <jvmarg line="${profiler.info.jvmargs}"/>
+ <test name="${profile.class}"/>
+ <classpath>
+ <path path="${run.test.classpath}"/>
+ </classpath>
+ <syspropertyset>
+ <propertyref prefix="test-sys-prop."/>
+ <mapper from="test-sys-prop.*" to="*" type="glob"/>
+ </syspropertyset>
+ <formatter type="brief" usefile="false"/>
+ <formatter type="xml"/>
+ </junit>
+ </target>
+ <!--
+ end of pre NB72 profiling section
+ -->
+ <target if="netbeans.home" name="-profile-check">
+ <condition property="profiler.configured">
+ <or>
+ <contains casesensitive="true" string="${run.jvmargs.ide}" substring="-agentpath:"/>
+ <contains casesensitive="true" string="${run.jvmargs.ide}" substring="-javaagent:"/>
+ </or>
+ </condition>
+ </target>
+ <target depends="-profile-check,-profile-pre72" description="Profile a project in the IDE." if="profiler.configured" name="profile" unless="profiler.info.jvmargs.agent">
+ <startprofiler/>
+ <antcall target="run"/>
+ </target>
+ <target depends="-profile-check,-profile-single-pre72" description="Profile a selected class in the IDE." if="profiler.configured" name="profile-single" unless="profiler.info.jvmargs.agent">
+ <fail unless="run.class">Must select one file in the IDE or set run.class</fail>
+ <startprofiler/>
+ <antcall target="run-single"/>
+ </target>
+ <target depends="-profile-test-single-pre72" description="Profile a selected test in the IDE." name="profile-test-single"/>
+ <target depends="-profile-check" description="Profile a selected test in the IDE." if="profiler.configured" name="profile-test" unless="profiler.info.jvmargs">
+ <fail unless="test.includes">Must select some files in the IDE or set test.includes</fail>
+ <startprofiler/>
+ <antcall target="test-single"/>
+ </target>
+ <target depends="-profile-check" description="Profile a selected class in the IDE." if="profiler.configured" name="profile-test-with-main">
+ <fail unless="run.class">Must select one file in the IDE or set run.class</fail>
+ <startprofiler/>
+ <antcal target="run-test-with-main"/>
+ </target>
+ <target depends="-profile-check,-profile-applet-pre72" if="profiler.configured" name="profile-applet" unless="profiler.info.jvmargs.agent">
+ <fail unless="applet.url">Must select one file in the IDE or set applet.url</fail>
+ <startprofiler/>
+ <antcall target="run-applet"/>
+ </target>
+ <!--
+ ===============
+ JAVADOC SECTION
+ ===============
+ -->
+ <target depends="init" if="have.sources" name="-javadoc-build">
+ <mkdir dir="${dist.javadoc.dir}"/>
+ <condition else="" property="javadoc.endorsed.classpath.cmd.line.arg" value="-J${endorsed.classpath.cmd.line.arg}">
+ <and>
+ <isset property="endorsed.classpath.cmd.line.arg"/>
+ <not>
+ <equals arg1="${endorsed.classpath.cmd.line.arg}" arg2=""/>
+ </not>
+ </and>
+ </condition>
+ <javadoc additionalparam="${javadoc.additionalparam}" author="${javadoc.author}" charset="UTF-8" destdir="${dist.javadoc.dir}" docencoding="UTF-8" encoding="${javadoc.encoding.used}" failonerror="true" noindex="${javadoc.noindex}" nonavbar="${javadoc.nonavbar}" notree="${javadoc.notree}" private="${javadoc.private}" source="${javac.source}" splitindex="${javadoc.splitindex}" use="${javadoc.use}" useexternalfile="true" version="${javadoc.version}" windowtitle="${javadoc.windowtitle}">
+ <classpath>
+ <path path="${javac.classpath}"/>
+ </classpath>
+ <fileset dir="${src.dir}" excludes="*.java,${excludes}" includes="${includes}">
+ <filename name="**/*.java"/>
+ </fileset>
+ <fileset dir="${build.generated.sources.dir}" erroronmissingdir="false">
+ <include name="**/*.java"/>
+ <exclude name="*.java"/>
+ </fileset>
+ <arg line="${javadoc.endorsed.classpath.cmd.line.arg}"/>
+ </javadoc>
+ <copy todir="${dist.javadoc.dir}">
+ <fileset dir="${src.dir}" excludes="${excludes}" includes="${includes}">
+ <filename name="**/doc-files/**"/>
+ </fileset>
+ <fileset dir="${build.generated.sources.dir}" erroronmissingdir="false">
+ <include name="**/doc-files/**"/>
+ </fileset>
+ </copy>
+ </target>
+ <target depends="init,-javadoc-build" if="netbeans.home" name="-javadoc-browse" unless="no.javadoc.preview">
+ <nbbrowse file="${dist.javadoc.dir}/index.html"/>
+ </target>
+ <target depends="init,-javadoc-build,-javadoc-browse" description="Build Javadoc." name="javadoc"/>
+ <!--
+ =========================
+ TEST COMPILATION SECTION
+ =========================
+ -->
+ <target depends="init,compile" if="have.tests" name="-pre-pre-compile-test">
+ <mkdir dir="${build.test.classes.dir}"/>
+ </target>
+ <target name="-pre-compile-test">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target if="do.depend.true" name="-compile-test-depend">
+ <j2seproject3:depend classpath="${javac.test.classpath}" destdir="${build.test.classes.dir}" srcdir=""/>
+ </target>
+ <target depends="init,deps-jar,compile,-pre-pre-compile-test,-pre-compile-test,-compile-test-depend" if="have.tests" name="-do-compile-test">
+ <j2seproject3:javac apgeneratedsrcdir="${build.test.classes.dir}" classpath="${javac.test.classpath}" debug="true" destdir="${build.test.classes.dir}" processorpath="${javac.test.processorpath}" srcdir=""/>
+ <copy todir="${build.test.classes.dir}"/>
+ </target>
+ <target name="-post-compile-test">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,compile,-pre-pre-compile-test,-pre-compile-test,-do-compile-test,-post-compile-test" name="compile-test"/>
+ <target name="-pre-compile-test-single">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,deps-jar,compile,-pre-pre-compile-test,-pre-compile-test-single" if="have.tests" name="-do-compile-test-single">
+ <fail unless="javac.includes">Must select some files in the IDE or set javac.includes</fail>
+ <j2seproject3:force-recompile destdir="${build.test.classes.dir}"/>
+ <j2seproject3:javac apgeneratedsrcdir="${build.test.classes.dir}" classpath="${javac.test.classpath}" debug="true" destdir="${build.test.classes.dir}" excludes="" includes="${javac.includes}" processorpath="${javac.test.processorpath}" sourcepath="" srcdir=""/>
+ <copy todir="${build.test.classes.dir}"/>
+ </target>
+ <target name="-post-compile-test-single">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,compile,-pre-pre-compile-test,-pre-compile-test-single,-do-compile-test-single,-post-compile-test-single" name="compile-test-single"/>
+ <!--
+ =======================
+ TEST EXECUTION SECTION
+ =======================
+ -->
+ <target depends="init" if="have.tests" name="-pre-test-run">
+ <mkdir dir="${build.test.results.dir}"/>
+ </target>
+ <target depends="init,compile-test,-pre-test-run" if="have.tests" name="-do-test-run">
+ <j2seproject3:test testincludes="**/*Test.java"/>
+ </target>
+ <target depends="init,compile-test,-pre-test-run,-do-test-run" if="have.tests" name="-post-test-run">
+ <fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail>
+ </target>
+ <target depends="init" if="have.tests" name="test-report"/>
+ <target depends="init" if="netbeans.home+have.tests" name="-test-browse"/>
+ <target depends="init,compile-test,-pre-test-run,-do-test-run,test-report,-post-test-run,-test-browse" description="Run unit tests." name="test"/>
+ <target depends="init" if="have.tests" name="-pre-test-run-single">
+ <mkdir dir="${build.test.results.dir}"/>
+ </target>
+ <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-do-test-run-single">
+ <fail unless="test.includes">Must select some files in the IDE or set test.includes</fail>
+ <j2seproject3:test excludes="" includes="${test.includes}" testincludes="${test.includes}"/>
+ </target>
+ <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single" if="have.tests" name="-post-test-run-single">
+ <fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail>
+ </target>
+ <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single,-post-test-run-single" description="Run single unit test." name="test-single"/>
+ <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-do-test-run-single-method">
+ <fail unless="test.class">Must select some files in the IDE or set test.class</fail>
+ <fail unless="test.method">Must select some method in the IDE or set test.method</fail>
+ <j2seproject3:test excludes="" includes="${javac.includes}" testincludes="${test.class}" testmethods="${test.method}"/>
+ </target>
+ <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single-method" if="have.tests" name="-post-test-run-single-method">
+ <fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail>
+ </target>
+ <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single-method,-post-test-run-single-method" description="Run single unit test." name="test-single-method"/>
+ <!--
+ =======================
+ TEST DEBUGGING SECTION
+ =======================
+ -->
+ <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-debug-start-debuggee-test">
+ <fail unless="test.class">Must select one file in the IDE or set test.class</fail>
+ <j2seproject3:test-debug excludes="" includes="${javac.includes}" testClass="${test.class}" testincludes="${javac.includes}"/>
+ </target>
+ <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-debug-start-debuggee-test-method">
+ <fail unless="test.class">Must select one file in the IDE or set test.class</fail>
+ <fail unless="test.method">Must select some method in the IDE or set test.method</fail>
+ <j2seproject3:test-debug excludes="" includes="${javac.includes}" testClass="${test.class}" testMethod="${test.method}" testincludes="${test.class}" testmethods="${test.method}"/>
+ </target>
+ <target depends="init,compile-test" if="netbeans.home+have.tests" name="-debug-start-debugger-test">
+ <j2seproject1:nbjpdastart classpath="${debug.test.classpath}" name="${test.class}"/>
+ </target>
+ <target depends="init,compile-test-single,-debug-start-debugger-test,-debug-start-debuggee-test" name="debug-test"/>
+ <target depends="init,compile-test-single,-debug-start-debugger-test,-debug-start-debuggee-test-method" name="debug-test-method"/>
+ <target depends="init,-pre-debug-fix,compile-test-single" if="netbeans.home" name="-do-debug-fix-test">
+ <j2seproject1:nbjpdareload dir="${build.test.classes.dir}"/>
+ </target>
+ <target depends="init,-pre-debug-fix,-do-debug-fix-test" if="netbeans.home" name="debug-fix-test"/>
+ <!--
+ =========================
+ APPLET EXECUTION SECTION
+ =========================
+ -->
+ <target depends="init,compile-single" name="run-applet">
+ <fail unless="applet.url">Must select one file in the IDE or set applet.url</fail>
+ <j2seproject1:java classname="sun.applet.AppletViewer">
+ <customize>
+ <arg value="${applet.url}"/>
+ </customize>
+ </j2seproject1:java>
+ </target>
+ <!--
+ =========================
+ APPLET DEBUGGING SECTION
+ =========================
+ -->
+ <target depends="init,compile-single" if="netbeans.home" name="-debug-start-debuggee-applet">
+ <fail unless="applet.url">Must select one file in the IDE or set applet.url</fail>
+ <j2seproject3:debug classname="sun.applet.AppletViewer">
+ <customize>
+ <arg value="${applet.url}"/>
+ </customize>
+ </j2seproject3:debug>
+ </target>
+ <target depends="init,compile-single,-debug-start-debugger,-debug-start-debuggee-applet" if="netbeans.home" name="debug-applet"/>
+ <!--
+ ===============
+ CLEANUP SECTION
+ ===============
+ -->
+ <target name="-deps-clean-init" unless="built-clean.properties">
+ <property location="${build.dir}/built-clean.properties" name="built-clean.properties"/>
+ <delete file="${built-clean.properties}" quiet="true"/>
+ </target>
+ <target if="already.built.clean.${basedir}" name="-warn-already-built-clean">
+ <echo level="warn" message="Cycle detected: TwitterDataAnalytics was already built"/>
+ </target>
+ <target depends="init,-deps-clean-init" name="deps-clean" unless="no.deps">
+ <mkdir dir="${build.dir}"/>
+ <touch file="${built-clean.properties}" verbose="false"/>
+ <property file="${built-clean.properties}" prefix="already.built.clean."/>
+ <antcall target="-warn-already-built-clean"/>
+ <propertyfile file="${built-clean.properties}">
+ <entry key="${basedir}" value=""/>
+ </propertyfile>
+ </target>
+ <target depends="init" name="-do-clean">
+ <delete dir="${build.dir}"/>
+ <delete dir="${dist.dir}" followsymlinks="false" includeemptydirs="true"/>
+ </target>
+ <target name="-post-clean">
+ <!-- Empty placeholder for easier customization. -->
+ <!-- You can override this target in the ../build.xml file. -->
+ </target>
+ <target depends="init,deps-clean,-do-clean,-post-clean" description="Clean build products." name="clean"/>
+ <target name="-check-call-dep">
+ <property file="${call.built.properties}" prefix="already.built."/>
+ <condition property="should.call.dep">
+ <and>
+ <not>
+ <isset property="already.built.${call.subproject}"/>
+ </not>
+ <available file="${call.script}"/>
+ </and>
+ </condition>
+ </target>
+ <target depends="-check-call-dep" if="should.call.dep" name="-maybe-call-dep">
+ <ant antfile="${call.script}" inheritall="false" target="${call.target}">
+ <propertyset>
+ <propertyref prefix="transfer."/>
+ <mapper from="transfer.*" to="*" type="glob"/>
+ </propertyset>
+ </ant>
+ </target>
+</project>
diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties
new file mode 100644
index 0000000..b42a5d3
--- /dev/null
+++ b/nbproject/genfiles.properties
@@ -0,0 +1,8 @@
+build.xml.data.CRC32=72787bde
+build.xml.script.CRC32=57d18e43
+build.xml.stylesheet.CRC32=8064a381@1.68.1.46
+# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
+# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
+nbproject/build-impl.xml.data.CRC32=72787bde
+nbproject/build-impl.xml.script.CRC32=4304d30d
+nbproject/build-impl.xml.stylesheet.CRC32=5a01deb7@1.68.1.46
diff --git a/nbproject/project.properties b/nbproject/project.properties
new file mode 100644
index 0000000..e32b494
--- /dev/null
+++ b/nbproject/project.properties
@@ -0,0 +1,135 @@
+annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=TwitterDataAnalytics
+application.vendor=skumar34
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/TwitterDataAnalytics.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.collections-generic-4.01.jar=lib/collections-generic-4.01.jar
+file.reference.colt-1.2.0.jar=lib/colt-1.2.0.jar
+file.reference.commons-codec-1.7.jar=lib/commons-codec-1.7.jar
+file.reference.commons-httpclient-3.1_1.jar=lib/commons-httpclient-3.1_1.jar
+file.reference.commons-lang-2.6.jar=lib/commons-lang-2.6.jar
+file.reference.commons-logging-1.1.1.jar=lib/commons-logging-1.1.1.jar
+file.reference.concurrent-1.3.4.jar=lib/concurrent-1.3.4.jar
+file.reference.gson-2.2.4.jar=lib/gson-2.2.4.jar
+file.reference.httpclient-4.2.1.jar=lib/httpclient-4.2.1.jar
+file.reference.httpcore-4.2.1.jar=lib/httpcore-4.2.1.jar
+file.reference.j3d-core-1.3.1.jar=lib/j3d-core-1.3.1.jar
+file.reference.jfig-1.5.2.jar=lib/jfig-1.5.2.jar
+file.reference.json.jar=lib/json.jar
+file.reference.jung-3d-2.0.1.jar=lib/jung-3d-2.0.1.jar
+file.reference.jung-3d-demos-2.0.1.jar=lib/jung-3d-demos-2.0.1.jar
+file.reference.jung-algorithms-2.0.1.jar=lib/jung-algorithms-2.0.1.jar
+file.reference.jung-api-2.0.1.jar=lib/jung-api-2.0.1.jar
+file.reference.jung-graph-impl-2.0.1.jar=lib/jung-graph-impl-2.0.1.jar
+file.reference.jung-io-2.0.1.jar=lib/jung-io-2.0.1.jar
+file.reference.jung-jai-2.0.1.jar=lib/jung-jai-2.0.1.jar
+file.reference.jung-jai-samples-2.0.1.jar=lib/jung-jai-samples-2.0.1.jar
+file.reference.jung-samples-2.0.1.jar=lib/jung-samples-2.0.1.jar
+file.reference.jung-visualization-2.0.1.jar=lib/jung-visualization-2.0.1.jar
+file.reference.log4j-1.2.15.jar=lib/log4j-1.2.15.jar
+file.reference.mallet-deps.jar=lib/mallet-deps.jar
+file.reference.mallet.jar=lib/mallet.jar
+file.reference.signpost-commonshttp4-1.2.1.2.jar=lib/signpost-commonshttp4-1.2.1.2.jar
+file.reference.signpost-core-1.2.1.2.jar=lib/signpost-core-1.2.1.2.jar
+file.reference.stax-api-1.0.1.jar=lib/stax-api-1.0.1.jar
+file.reference.TwitterDataAnalytics-src=src
+file.reference.vecmath-1.3.1.jar=lib/vecmath-1.3.1.jar
+file.reference.wstx-asl-3.2.6.jar=lib/wstx-asl-3.2.6.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${file.reference.collections-generic-4.01.jar}:\
+ ${file.reference.colt-1.2.0.jar}:\
+ ${file.reference.commons-codec-1.7.jar}:\
+ ${file.reference.commons-httpclient-3.1_1.jar}:\
+ ${file.reference.commons-lang-2.6.jar}:\
+ ${file.reference.commons-logging-1.1.1.jar}:\
+ ${file.reference.concurrent-1.3.4.jar}:\
+ ${file.reference.gson-2.2.4.jar}:\
+ ${file.reference.httpclient-4.2.1.jar}:\
+ ${file.reference.httpcore-4.2.1.jar}:\
+ ${file.reference.j3d-core-1.3.1.jar}:\
+ ${file.reference.jfig-1.5.2.jar}:\
+ ${file.reference.json.jar}:\
+ ${file.reference.jung-3d-2.0.1.jar}:\
+ ${file.reference.jung-3d-demos-2.0.1.jar}:\
+ ${file.reference.jung-algorithms-2.0.1.jar}:\
+ ${file.reference.jung-api-2.0.1.jar}:\
+ ${file.reference.jung-graph-impl-2.0.1.jar}:\
+ ${file.reference.jung-io-2.0.1.jar}:\
+ ${file.reference.jung-jai-2.0.1.jar}:\
+ ${file.reference.jung-jai-samples-2.0.1.jar}:\
+ ${file.reference.jung-samples-2.0.1.jar}:\
+ ${file.reference.jung-visualization-2.0.1.jar}:\
+ ${file.reference.log4j-1.2.15.jar}:\
+ ${file.reference.mallet-deps.jar}:\
+ ${file.reference.mallet.jar}:\
+ ${file.reference.signpost-commonshttp4-1.2.1.2.jar}:\
+ ${file.reference.signpost-core-1.2.1.2.jar}:\
+ ${file.reference.stax-api-1.0.1.jar}:\
+ ${file.reference.vecmath-1.3.1.jar}:\
+ ${file.reference.wstx-asl-3.2.6.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+javac.test.processorpath=\
+ ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=${file.reference.TwitterDataAnalytics-src}
diff --git a/nbproject/project.xml b/nbproject/project.xml
new file mode 100644
index 0000000..c85b6f7
--- /dev/null
+++ b/nbproject/project.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://www.netbeans.org/ns/project/1">
+ <type>org.netbeans.modules.java.j2seproject</type>
+ <configuration>
+ <data xmlns="http://www.netbeans.org/ns/j2se-project/3">
+ <name>TwitterDataAnalytics</name>
+ <source-roots>
+ <root id="src.dir"/>
+ </source-roots>
+ <test-roots/>
+ </data>
+ <libraries xmlns="http://www.netbeans.org/ns/ant-project-libraries/1">
+ <definitions>.\lib\nblibraries.properties</definitions>
+ </libraries>
+ </configuration>
+</project>
diff --git a/src/Chapter2/Location/LocationTranslationExample.java b/src/Chapter2/Location/LocationTranslationExample.java
new file mode 100644
index 0000000..69178dc
--- /dev/null
+++ b/src/Chapter2/Location/LocationTranslationExample.java
@@ -0,0 +1,124 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.Location;
+
+import Chapter2.support.Location;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLEncoder;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+
+public class LocationTranslationExample
+{
+
+ /**
+ * Translates a location string to coordinates using the database or Nominatim Service
+ * @param loc
+ * @return
+ */
+ public Location TranslateLoc(String loc)
+ {
+ if(loc!=null&&!loc.isEmpty())
+ {
+ String encodedLoc="";
+ try {
+ //Step 1: Encode the location name
+ encodedLoc = URLEncoder.encode(loc, "UTF-8");
+ } catch (UnsupportedEncodingException ex) {
+ Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ //Step 2: Create a get request to MapQuest API with the name of the location
+ String url= "http://open.mapquestapi.com/nominatim/v1/search?q="+encodedLoc+"&format=json";
+ String page = ReadHTML(url);
+ if(page!=null)
+ {
+ try{
+ JSONArray results = new JSONArray(page);
+ if(results.length()>0)
+ {
+ //Step 3: Read and extract the coordinates of the location as a JSONObject
+ Location loca = new Location(results.getJSONObject(0).getDouble("lat"),results.getJSONObject(0).getDouble("lon"));
+ return loca;
+ }
+ }catch(JSONException ex)
+ {
+ Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Extracts the html content of a URL
+ * @param url
+ * @return html page
+ */
+ public String ReadHTML(String url)
+ {
+ URLConnection conn = null;
+ URL theURL = null;
+ try
+ {
+ theURL = new URL(url);
+ }
+ catch ( MalformedURLException e)
+ {
+ System.out.println("Bad URL: " + theURL);
+ return null;
+ }
+ String page = "";
+ try
+ {
+ conn = theURL.openConnection();
+ HttpURLConnection huc = (HttpURLConnection) conn;
+ conn.setConnectTimeout(2000);
+ huc.setRequestProperty("User-Agent", "Mozilla/4.5");
+ //Set your email address in the request so MapQuest knows how to reach you in the event of problems
+ huc.setRequestProperty("Email", "twitterdataanalytics@gmail.com");
+ if(huc.getResponseCode()>=400&&huc.getResponseCode()<=404)
+ {
+ return null;
+ }
+ conn.connect();
+ BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) conn.getContent()));
+ String temp=null;
+ while( (temp= bRead.readLine())!=null)
+ {
+ page = page+"\n"+temp;
+ }
+ bRead.close();
+ }
+ catch (IOException e) {
+ //System.out.print("ReadHTML IO Error:" + e.getMessage()+" \n");
+ return null;
+ }
+ return page;
+ }
+
+ public static void main(String[] args)
+ {
+ LocationTranslationExample lte = new LocationTranslationExample();
+ if(args!=null)
+ {
+ if(args.length>0)
+ {
+ for(int i=0;i<args.length;i++)
+ {
+ System.out.println(lte.TranslateLoc(args[i]).toString());
+ }
+ }
+ }
+ }
+}
diff --git a/src/Chapter2/openauthentication/OAuthExample.java b/src/Chapter2/openauthentication/OAuthExample.java
new file mode 100644
index 0000000..9b2ec7a
--- /dev/null
+++ b/src/Chapter2/openauthentication/OAuthExample.java
@@ -0,0 +1,79 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.openauthentication;
+
+import Chapter2.support.OAuthTokenSecret;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import oauth.signpost.OAuth;
+import oauth.signpost.OAuthConsumer;
+import oauth.signpost.OAuthProvider;
+import oauth.signpost.basic.DefaultOAuthProvider;
+import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer;
+import oauth.signpost.exception.OAuthCommunicationException;
+import oauth.signpost.exception.OAuthExpectationFailedException;
+import oauth.signpost.exception.OAuthMessageSignerException;
+import oauth.signpost.exception.OAuthNotAuthorizedException;
+import utils.OAuthUtils;
+
+public class OAuthExample
+{
+ public OAuthTokenSecret GetUserAccessKeySecret()
+ {
+ try {
+ //consumer key for Twitter Data Analytics application
+ if(OAuthUtils.CONSUMER_KEY.isEmpty())
+ {
+ System.out.println("Register an application and copy the consumer key into the configuration file.");
+ return null;
+ }
+ if(OAuthUtils.CONSUMER_SECRET.isEmpty())
+ {
+ System.out.println("Register an application and copy the consumer secret into the configuration file.");
+ return null;
+ }
+ OAuthConsumer consumer = new CommonsHttpOAuthConsumer(OAuthUtils.CONSUMER_KEY,OAuthUtils.CONSUMER_SECRET);
+ OAuthProvider provider = new DefaultOAuthProvider(OAuthUtils.REQUEST_TOKEN_URL, OAuthUtils.ACCESS_TOKEN_URL, OAuthUtils.AUTHORIZE_URL);
+ String authUrl = provider.retrieveRequestToken(consumer, OAuth.OUT_OF_BAND);
+ System.out.println("Now visit:\n" + authUrl + "\n and grant this app authorization");
+ System.out.println("Enter the PIN code and hit ENTER when you're done:");
+ BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
+ String pin = br.readLine();
+ System.out.println("Fetching access token from Twitter");
+ provider.retrieveAccessToken(consumer,pin);
+ String accesstoken = consumer.getToken();
+ String accesssecret = consumer.getTokenSecret();
+ OAuthTokenSecret tokensecret = new OAuthTokenSecret(accesstoken,accesssecret);
+ return tokensecret;
+ } catch (OAuthNotAuthorizedException ex) {
+ ex.printStackTrace();
+ } catch (OAuthMessageSignerException ex) {
+ ex.printStackTrace();
+ } catch (OAuthExpectationFailedException ex) {
+ ex.printStackTrace();
+ } catch (OAuthCommunicationException ex) {
+ ex.printStackTrace();
+ } catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }
+ return null;
+ }
+
+ public static OAuthTokenSecret DEBUGUserAccessSecret()
+ {
+ String accesstoken = "1262619914-tcCPB1SyXy3BMuui9OAhprcPmqg3z2csSjDSCNY";
+ String accesssecret = "cXXO0qFLBjLXGtE97pnf5Vv1RZGxZ2FZ97wCYiaVU";
+ OAuthTokenSecret tokensecret = new OAuthTokenSecret(accesstoken,accesssecret);
+ return tokensecret;
+ }
+
+ public static void main(String[] args)
+ {
+ OAuthExample aue = new OAuthExample();
+ OAuthTokenSecret tokensecret = aue.GetUserAccessKeySecret();
+ System.out.println(tokensecret.toString());
+ }
+}
diff --git a/src/Chapter2/restapi/RESTApiExample.java b/src/Chapter2/restapi/RESTApiExample.java
new file mode 100644
index 0000000..9ceb88b
--- /dev/null
+++ b/src/Chapter2/restapi/RESTApiExample.java
@@ -0,0 +1,676 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.restapi;
+
+import Chapter2.support.APIType;
+import Chapter2.support.OAuthTokenSecret;
+import Chapter2.openauthentication.OAuthExample;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import oauth.signpost.OAuthConsumer;
+import oauth.signpost.basic.DefaultOAuthConsumer;
+import oauth.signpost.exception.OAuthCommunicationException;
+import oauth.signpost.exception.OAuthExpectationFailedException;
+import oauth.signpost.exception.OAuthMessageSignerException;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class RESTApiExample
+{
+ //file handlers to store the collected user information
+ BufferedWriter OutFileWriter;
+ OAuthTokenSecret OAuthTokens;
+ /**
+ * name of the file containing a list of users
+ */
+ final String DEF_FILENAME = "users.txt";
+ final String DEF_OUTFILENAME = "restapiresults.json";
+ ArrayList<String> Usernames = new ArrayList<String>();
+ OAuthConsumer Consumer;
+
+ /**
+ * Creates a OAuthConsumer with the current consumer & user access tokens and secrets
+ * @return consumer
+ */
+ public OAuthConsumer GetConsumer()
+ {
+ OAuthConsumer consumer = new DefaultOAuthConsumer(utils.OAuthUtils.CONSUMER_KEY,utils.OAuthUtils.CONSUMER_SECRET);
+ consumer.setTokenWithSecret(OAuthTokens.getAccessToken(),OAuthTokens.getAccessSecret());
+ return consumer;
+ }
+
+ /**
+ * Reads the file and loads the users in the file to be crawled
+ * @param filename
+ */
+ public void ReadUsers(String filename)
+ {
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ if(!temp.isEmpty())
+ {
+ Usernames.add(temp);
+ }
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+ }
+
+ /**
+ * Load the User Access Token, and the User Access Secret
+ */
+ public void LoadTwitterToken()
+ {
+ //Un-comment before release
+// OAuthExample oae = new OAuthExample();
+// OAuthTokens = oae.GetUserAccessKeySecret();
+ //Remove before release
+ OAuthTokens = OAuthExample.DEBUGUserAccessSecret();
+ }
+
+ public static void main(String[] args)
+ {
+ RESTApiExample rae = new RESTApiExample();
+ rae.LoadTwitterToken();
+ rae.Consumer = rae.GetConsumer();
+// System.out.println(rae.GetStatuses("twtanalyticsbk"));
+ System.out.println(rae.GetRateLimitStatus());
+// int apicode = InfoType.PROFILE_INFO;
+// String infilename = rae.DEF_FILENAME;
+// String outfilename = rae.DEF_OUTFILENAME;
+// if(args!=null)
+// {
+// if(args.length>2)
+// {
+// apicode = Integer.parseInt(args[2]);
+// outfilename = args[1];
+// infilename = args[0];
+// }
+// if(args.length>1)
+// {
+// outfilename = args[1];
+// infilename = args[0];
+// }
+// else
+// if(args.length>0)
+// {
+// infilename = args[0];
+// }
+// }
+// rae.InitializeWriters(outfilename);
+// rae.ReadUsers(infilename);
+// if(apicode!=InfoType.PROFILE_INFO&&apicode!=InfoType.FOLLOWER_INFO&&apicode!=InfoType.FRIEND_INFO&&apicode!=InfoType.STATUSES_INFO)
+// {
+// System.out.println("Invalid API type: Use 0 for Profile, 1 for Followers, 2 for Friends, and 3 for Statuses");
+// System.exit(0);
+// }
+// if(rae.Usernames.size()>0)
+// {
+// //TO-DO: Print the possible API types and get user selection to crawl the users.
+// rae.LoadTwitterToken();
+// for(String user:rae.Usernames)
+// {
+// if(apicode==InfoType.PROFILE_INFO)
+// {
+// JSONObject jobj = rae.GetProfile(user);
+// if(jobj!=null&&jobj.length()==0)
+// {
+// rae.WriteToFile(user, jobj.toString());
+// }
+// }
+// else
+// if(apicode==InfoType.FRIEND_INFO)
+// {
+// JSONArray statusarr = rae.GetFriends(user);
+// if(statusarr.length()>0)
+// {
+// rae.WriteToFile(user, statusarr.toString());
+// }
+// }
+// else
+// if(apicode == InfoType.FOLLOWER_INFO)
+// {
+// JSONArray statusarr = rae.GetFollowers(user);
+// if(statusarr.length()>0)
+// {
+// rae.WriteToFile(user, statusarr.toString());
+// }
+// }
+// else
+// if(apicode == InfoType.STATUSES_INFO)
+// {
+// JSONArray statusarr = rae.GetStatuses(user);
+// if(statusarr.length()>0)
+// {
+// rae.GetStatuses(user);
+// }
+// }
+// }
+// }
+//// now you can close the files as all the threads have finished
+// rae.CleanupAfterFinish();
+ }
+
+ /**
+ * Retrieves the rate limit status of the application
+ * @return
+ */
+ public JSONObject GetRateLimitStatus()
+ {
+ try{
+ URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json");
+ HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+ huc.setReadTimeout(5000);
+ Consumer.sign(huc);
+ huc.connect();
+ BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
+ StringBuffer page = new StringBuffer();
+ String temp= "";
+ while((temp = bRead.readLine())!=null)
+ {
+ page.append(temp);
+ }
+ bRead.close();
+ return (new JSONObject(page.toString()));
+ } catch (JSONException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthCommunicationException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthMessageSignerException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthExpectationFailedException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }catch(IOException ex)
+ {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ return null;
+ }
+
+ /**
+ * Initialize the file writer
+ * @param path of the file
+ * @param outFilename name of the file
+ */
+ public void InitializeWriters(String outFilename) {
+ try {
+ File fl = new File(outFilename);
+ if(!fl.exists())
+ {
+ fl.createNewFile();
+ }
+ /**
+ * Use UTF-8 encoding when saving files to avoid
+ * losing Unicode characters in the data
+ */
+ OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8"));
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+
+ /**
+ * Close the opened filewriter to save the data
+ */
+ public void CleanupAfterFinish()
+ {
+ try {
+ OutFileWriter.close();
+ } catch (IOException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+
+ /**
+ * Writes the retrieved data to the output file
+ * @param data containing the retrived information in JSON
+ * @param user name of the user currently being written
+ */
+ public void WriteToFile(String user, String data)
+ {
+ try
+ {
+ OutFileWriter.write(data);
+ OutFileWriter.newLine();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+
+ /**
+ * Retrives the profile information of the user
+ * @param username of the user whose profile needs to be retrieved
+ * @return the profile information as a JSONObject
+ */
+ public JSONObject GetProfile(String username)
+ {
+ BufferedReader bRead = null;
+ JSONObject profile = null;
+ try {
+ System.out.println("Processing profile of "+username);
+ boolean flag = true;
+ URL url = new URL("https://api.twitter.com/1.1/users/show.json?screen_name="+username);
+ HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+ huc.setReadTimeout(5000);
+ // Step 2: Sign the request using the OAuth Secret
+ Consumer.sign(huc);
+ huc.connect();
+ if(huc.getResponseCode()==404||huc.getResponseCode()==401)
+ {
+ System.out.println(huc.getResponseMessage());
+ }
+ else
+ if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
+ {
+ try {
+ huc.disconnect();
+ System.out.println(huc.getResponseMessage());
+ Thread.sleep(3000);
+ } catch (InterruptedException ex) {
+ ex.printStackTrace();
+ }
+ }
+ else
+ // Step 3: If the requests have been exhausted, then wait until the quota is renewed
+ if(huc.getResponseCode()==429)
+ {
+ try {
+ huc.disconnect();
+ Thread.sleep(this.GetWaitTime("/users/show/:id"));
+ flag = false;
+ } catch (InterruptedException ex) {
+ ex.printStackTrace();
+ }
+ }
+ if(!flag)
+ {
+ //recreate the connection because something went wrong the first time.
+ huc.connect();
+ }
+ StringBuilder content=new StringBuilder();
+ if(flag)
+ {
+ bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
+ String temp= "";
+ while((temp = bRead.readLine())!=null)
+ {
+ content.append(temp);
+ }
+ }
+ huc.disconnect();
+ try {
+ profile = new JSONObject(content.toString());
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ } catch (OAuthCommunicationException ex) {
+ ex.printStackTrace();
+ } catch (OAuthMessageSignerException ex) {
+ ex.printStackTrace();
+ } catch (OAuthExpectationFailedException ex) {
+ ex.printStackTrace();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ return profile;
+ }
+
+ /**
+ * Retrieves the followers of a user
+ * @param username the name of the user whose followers need to be retrieved
+ * @return a list of user objects corresponding to the followers of the user
+ */
+ public JSONArray GetFollowers(String username)
+ {
+ BufferedReader bRead = null;
+ JSONArray followers = new JSONArray();
+ try {
+ System.out.println(" followers user = "+username);
+ long cursor = -1;
+ while(true)
+ {
+ if(cursor==0)
+ {
+ break;
+ }
+ // Step 1: Create the APi request using the supplied username
+ URL url = new URL("https://api.twitter.com/1.1/followers/list.json?screen_name="+username+"&cursor=" + cursor);
+ HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+ huc.setReadTimeout(5000);
+ // Step 2: Sign the request using the OAuth Secret
+ Consumer.sign(huc);
+ huc.connect();
+ if(huc.getResponseCode()==400||huc.getResponseCode()==404)
+ {
+ System.out.println(huc.getResponseMessage());
+ break;
+ }
+ else
+ if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503||huc.getResponseCode()==504)
+ {
+ try{
+ System.out.println(huc.getResponseMessage());
+ huc.disconnect();
+ Thread.sleep(3000);
+ continue;
+ } catch (InterruptedException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ else
+ // Step 3: If the requests have been exhausted, then wait until the quota is renewed
+ if(huc.getResponseCode()==429)
+ {
+ try {
+ huc.disconnect();
+ Thread.sleep(this.GetWaitTime("/followers/list"));
+ continue;
+ } catch (InterruptedException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ // Step 4: Retrieve the followers list from Twitter
+ bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
+ StringBuilder content = new StringBuilder();
+ String temp = "";
+ while((temp = bRead.readLine())!=null)
+ {
+ content.append(temp);
+ }
+ try {
+ JSONObject jobj = new JSONObject(content.toString());
+ // Step 5: Retrieve the token for the next request
+ cursor = jobj.getLong("next_cursor");
+ JSONArray idlist = jobj.getJSONArray("users");
+ if(idlist.length()==0)
+ {
+ break;
+ }
+ for(int i=0;i<idlist.length();i++)
+ {
+ followers.put(idlist.getJSONObject(i));
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ } catch (OAuthCommunicationException ex) {
+ ex.printStackTrace();
+ } catch (OAuthMessageSignerException ex) {
+ ex.printStackTrace();
+ } catch (OAuthExpectationFailedException ex) {
+ ex.printStackTrace();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ return followers;
+ }
+
+ /**
+ * Retrieved the status messages of a user
+ * @param username the name of the user whose status messages need to be retrieved
+ * @return a list of status messages
+ */
+ public JSONArray GetStatuses(String username)
+ {
+ BufferedReader bRead = null;
+ //Get the maximum number of tweets possible in a single page 200
+ int tweetcount = 200;
+ //Include include_rts because it is counted towards the limit anyway.
+ boolean include_rts = true;
+ JSONArray statuses = new JSONArray();
+ try {
+ System.out.println("Processing status messages of "+username);
+ long maxid = 0;
+ while(true)
+ {
+ URL url = null;
+ if(maxid==0)
+ {
+ url = new URL("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + username+"&include_rts="+include_rts+"&count="+tweetcount);
+ }
+ else
+ {
+ //use max_id to get the tweets in the next page. Use max_id-1 to avoid getting redundant tweets.
+ url = new URL("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + username+"&include_rts="+include_rts+"&count="+tweetcount+"&max_id="+(maxid-1));
+ }
+ HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+ huc.setReadTimeout(5000);
+ Consumer.sign(huc);
+ huc.connect();
+ if(huc.getResponseCode()==400||huc.getResponseCode()==404)
+ {
+ System.out.println(huc.getResponseCode());
+ break;
+ }
+ else
+ if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
+ {
+ try {System.out.println(huc.getResponseCode());
+ Thread.sleep(3000);
+ } catch (InterruptedException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ else
+ // Step 3: If the requests have been exhausted, then wait until the quota is renewed
+ if(huc.getResponseCode()==429)
+ {
+ try {
+ huc.disconnect();
+ Thread.sleep(this.GetWaitTime("/statuses/user_timeline"));
+ continue;
+ } catch (InterruptedException ex) {
+ ex.printStackTrace();
+ }
+ }
+ bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getInputStream()));
+ StringBuilder content = new StringBuilder();
+ String temp = "";
+ while((temp = bRead.readLine())!=null)
+ {
+ content.append(temp);
+ }
+ try {
+ JSONArray statusarr = new JSONArray(content.toString());
+ if(statusarr.length()==0)
+ {
+ break;
+ }
+ for(int i=0;i<statusarr.length();i++)
+ {
+ JSONObject jobj = statusarr.getJSONObject(i);
+ statuses.put(jobj);
+ //Get the max_id to get the next batch of tweets
+ if(!jobj.isNull("id"))
+ {
+ maxid = jobj.getLong("id");
+ }
+ }
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ }
+ System.out.println(statuses.length());
+ } catch (OAuthCommunicationException ex) {
+ ex.printStackTrace();
+ } catch (OAuthMessageSignerException ex) {
+ ex.printStackTrace();
+ } catch (OAuthExpectationFailedException ex) {
+ ex.printStackTrace();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ return statuses;
+ }
+
+ /**
+ * Retrieves the friends of a user
+ * @param username the name of the user whose friends need to be fetched
+ * @return a list of user objects who are friends of the user
+ */
+ public JSONArray GetFriends(String username)
+ {
+ BufferedReader bRead = null;
+ JSONArray friends = new JSONArray();
+ try {
+ System.out.println("Processing friends of "+username);
+ long cursor = -1;
+ while(true)
+ {
+ if(cursor==0)
+ {
+ break;
+ }
+ // Step 1: Create the APi request using the supplied username
+ URL url = new URL("https://api.twitter.com/1.1/friends/list.json?screen_name="+username+"&cursor="+cursor);
+ HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+ huc.setReadTimeout(5000);
+ //Step 2: Sign the request using the OAuth Secret
+ Consumer.sign(huc);
+ huc.connect();
+ if(huc.getResponseCode()==400||huc.getResponseCode()==401)
+ {
+ System.out.println(huc.getResponseMessage());
+ break;
+ }
+ else
+ if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
+ {
+ try {
+ System.out.println(huc.getResponseMessage());
+ Thread.sleep(3000);
+ continue;
+ } catch (InterruptedException ex) {
+ ex.printStackTrace();
+ }
+ }
+ else
+ // Step 3: If the requests have been exhausted, then wait until the quota is renewed
+ if(huc.getResponseCode()==429)
+ {
+ try {
+ huc.disconnect();
+ Thread.sleep(this.GetWaitTime("/friends/list"));
+ continue;
+ } catch (InterruptedException ex) {
+ ex.printStackTrace();
+ }
+ }
+ // Step 4: Retrieve the friends list from Twitter
+ bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
+ StringBuilder content = new StringBuilder();
+ String temp = "";
+ while((temp = bRead.readLine())!=null)
+ {
+ content.append(temp);
+ }
+ try {
+ JSONObject jobj = new JSONObject(content.toString());
+ // Step 5: Retrieve the token for the next request
+ cursor = jobj.getLong("next_cursor");
+ JSONArray userlist = jobj.getJSONArray("users");
+ if(userlist.length()==0)
+ {
+ break;
+ }
+ for(int i=0;i<userlist.length();i++)
+ {
+ friends.put(userlist.get(i));
+ }
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ huc.disconnect();
+ }
+ } catch (OAuthCommunicationException ex) {
+ ex.printStackTrace();
+ } catch (OAuthMessageSignerException ex) {
+ ex.printStackTrace();
+ } catch (OAuthExpectationFailedException ex) {
+ ex.printStackTrace();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ return friends;
+ }
+
+ /**
+ * Retrieves the wait time if the API Rate Limit has been hit
+ * @param api the name of the API currently being used
+ * @return the number of milliseconds to wait before initiating a new request
+ */
+ public long GetWaitTime(String api)
+ {
+ JSONObject jobj = this.GetRateLimitStatus();
+ if(jobj!=null)
+ {
+ try {
+ if(!jobj.isNull("resources"))
+ {
+ JSONObject resourcesobj = jobj.getJSONObject("resources");
+ JSONObject apilimit = null;
+ if(api.equals(APIType.USER_TIMELINE))
+ {
+ JSONObject statusobj = resourcesobj.getJSONObject("statuses");
+ apilimit = statusobj.getJSONObject(api);
+ }
+ else
+ if(api.equals(APIType.FOLLOWERS))
+ {
+ JSONObject followersobj = resourcesobj.getJSONObject("followers");
+ apilimit = followersobj.getJSONObject(api);
+ }
+ else
+ if(api.equals(APIType.FRIENDS))
+ {
+ JSONObject friendsobj = resourcesobj.getJSONObject("friends");
+ apilimit = friendsobj.getJSONObject(api);
+ }
+ else
+ if(api.equals(APIType.USER_PROFILE))
+ {
+ JSONObject userobj = resourcesobj.getJSONObject("users");
+ apilimit = userobj.getJSONObject(api);
+ }
+ int numremhits = apilimit.getInt("remaining");
+ if(numremhits<=1)
+ {
+ long resettime = apilimit.getInt("reset");
+ resettime = resettime*1000; //convert to milliseconds
+ return resettime;
+ }
+ }
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ }
+ return 0;
+ }
+}
diff --git a/src/Chapter2/restapi/RESTSearchExample.java b/src/Chapter2/restapi/RESTSearchExample.java
new file mode 100644
index 0000000..510661c
--- /dev/null
+++ b/src/Chapter2/restapi/RESTSearchExample.java
@@ -0,0 +1,311 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.restapi;
+
+import Chapter2.support.OAuthTokenSecret;
+import Chapter2.openauthentication.OAuthExample;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import oauth.signpost.OAuthConsumer;
+import oauth.signpost.basic.DefaultOAuthConsumer;
+import oauth.signpost.exception.OAuthCommunicationException;
+import oauth.signpost.exception.OAuthExpectationFailedException;
+import oauth.signpost.exception.OAuthMessageSignerException;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONTokener;
+
+public class RESTSearchExample
+{
+ BufferedWriter OutFileWriter;
+ OAuthTokenSecret OAuthTokens;
+ OAuthConsumer Consumer;
+ String query = "#protest";
+ String DEF_FILENAME = "searchresults.json";
+
+ /**
+ * Creates a OAuthConsumer with the current consumer & user access tokens and secrets
+ * @return consumer
+ */
+ public OAuthConsumer GetConsumer()
+ {
+ OAuthConsumer consumer = new DefaultOAuthConsumer(utils.OAuthUtils.CONSUMER_KEY,utils.OAuthUtils.CONSUMER_SECRET);
+ consumer.setTokenWithSecret(OAuthTokens.getAccessToken(), OAuthTokens.getAccessSecret());
+ return consumer;
+ }
+
+ /**
+ * Load the User Access Token, and the User Access Secret
+ */
+ public void LoadTwitterToken()
+ {
+ //Un-comment before release
+// OAuthExample oae = new OAuthExample();
+// OAuthTokens = oae.GetUserAccessKeySecret();
+ //Remove before release
+ OAuthTokens = OAuthExample.DEBUGUserAccessSecret();
+ }
+
+ /**
+ * Fetches tweets matching a query
+ * @param query for which tweets need to be fetched
+ * @return an array of status objects
+ */
+ public JSONArray GetSearchResults(String query)
+ {
+ try{
+ //construct the request url
+ String URL_PARAM_SEPERATOR = "&";
+ StringBuilder url = new StringBuilder();
+ url.append("https://api.twitter.com/1.1/search/tweets.json?q=");
+ //query needs to be encoded
+ url.append(URLEncoder.encode(query, "UTF-8"));
+ url.append(URL_PARAM_SEPERATOR);
+ url.append("count=100");
+ URL navurl = new URL(url.toString());
+ HttpURLConnection huc = (HttpURLConnection) navurl.openConnection();
+ huc.setReadTimeout(5000);
+ Consumer.sign(huc);
+ huc.connect();
+ if(huc.getResponseCode()==400||huc.getResponseCode()==404||huc.getResponseCode()==429)
+ {
+ System.out.println(huc.getResponseMessage());
+ try {
+ huc.disconnect();
+ Thread.sleep(this.GetWaitTime("/friends/list"));
+ } catch (InterruptedException ex) {
+ ex.printStackTrace();
+ }
+ }
+ if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503)
+ {
+ System.out.println(huc.getResponseMessage());
+ try {
+ Thread.sleep(2000);
+ } catch (InterruptedException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getInputStream()));
+ String temp;
+ StringBuilder page = new StringBuilder();
+ while( (temp = bRead.readLine())!=null)
+ {
+ page.append(temp);
+ }
+ JSONTokener jsonTokener = new JSONTokener(page.toString());
+ try {
+ JSONObject json = new JSONObject(jsonTokener);
+ JSONArray results = json.getJSONArray("statuses");
+ return results;
+ } catch (JSONException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ } catch (OAuthCommunicationException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthMessageSignerException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthExpectationFailedException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }
+ return null;
+ }
+
+ /**
+ * Retrieves the rate limit status of the application
+ * @return
+ */
+ public JSONObject GetRateLimitStatus()
+ {
+ try{
+ URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json");
+ HttpURLConnection huc = (HttpURLConnection) url.openConnection();
+ huc.setReadTimeout(5000);
+ OAuthConsumer consumer = new DefaultOAuthConsumer(utils.OAuthUtils.CONSUMER_KEY,utils.OAuthUtils.CONSUMER_SECRET);
+ consumer.setTokenWithSecret(OAuthTokens.getAccessToken(), OAuthTokens.getAccessSecret());
+ consumer.sign(huc);
+ huc.connect();
+ BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent()));
+ StringBuffer page = new StringBuffer();
+ String temp= "";
+ while((temp = bRead.readLine())!=null)
+ {
+ page.append(temp);
+ }
+ bRead.close();
+ return (new JSONObject(page.toString()));
+ } catch (JSONException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthCommunicationException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthMessageSignerException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (OAuthExpectationFailedException ex) {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }catch(IOException ex)
+ {
+ Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ return null;
+ }
+
+ /**
+ * Initialize the file writer
+ * @param path of the file
+ * @param outFilename name of the file
+ */
+ public void InitializeWriters(String outFilename) {
+ try {
+ File fl = new File(outFilename);
+ if(!fl.exists())
+ {
+ fl.createNewFile();
+ }
+ /**
+ * Use UTF-8 encoding when saving files to avoid
+ * losing Unicode characters in the data
+ */
+ OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8"));
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+
+ /**
+ * Close the opened filewriter to save the data
+ */
+ public void CleanupAfterFinish()
+ {
+ try {
+ OutFileWriter.close();
+ } catch (IOException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+
+ /**
+ * Writes the retrieved data to the output file
+ * @param data containing the retrived information in JSON
+ * @param user name of the user currently being written
+ */
+ public void WriteToFile(JSONArray searchResults)
+ {
+ try
+ {
+ for(int i=0;i<searchResults.length();i++)
+ {
+ try {
+ OutFileWriter.write(searchResults.getJSONObject(i).toString());
+ OutFileWriter.newLine();
+ } catch (JSONException ex) {
+ Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+
+ /**
+ * Retrieves the wait time if the API Rate Limit has been hit
+ * @param api the name of the API currently being used
+ * @return the number of milliseconds to wait before initiating a new request
+ */
+ public long GetWaitTime(String api)
+ {
+ JSONObject jobj = this.GetRateLimitStatus();
+ if(jobj!=null)
+ {
+ try {
+ if(!jobj.isNull("resources"))
+ {
+ JSONObject resourcesobj = jobj.getJSONObject("resources");
+ JSONObject statusobj = resourcesobj.getJSONObject("statuses");
+ JSONObject apilimit = statusobj.getJSONObject(api);
+ int numremhits = apilimit.getInt("remaining");
+ if(numremhits<=1)
+ {
+ long resettime = apilimit.getInt("reset");
+ resettime = resettime*1000; //convert to milliseconds
+ return resettime;
+ }
+ }
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ }
+ return 0;
+ }
+
+ /**
+ * Creates an OR search query from the supplied terms
+ * @param queryTerms
+ * @return a String formatted as term1 OR term2
+ */
+ public String CreateORQuery(ArrayList<String> queryTerms)
+ {
+ String OR_Operator = " OR ";
+ StringBuffer querystr = new StringBuffer();
+ int count = 1;
+ for(String term:queryTerms)
+ {
+ if(count==1)
+ {
+ querystr.append(term);
+ }
+ else
+ {
+ querystr.append(OR_Operator).append(term);
+ }
+ }
+ return querystr.toString();
+ }
+
+ public static void main(String[] args)
+ {
+ RESTSearchExample rse = new RESTSearchExample();
+ ArrayList<String> queryterms = new ArrayList<String>();
+ String outfilename = rse.DEF_FILENAME;
+ if(args!=null)
+ {
+ if(args.length>0)
+ {
+ for(int i=0;i<args.length;i++)
+ {
+ queryterms.add(args[i]);
+ }
+ }
+ else
+ {
+ queryterms.add(rse.query);
+ }
+ }
+ rse.LoadTwitterToken();
+ rse.Consumer = rse.GetConsumer();
+ System.out.println(rse.GetRateLimitStatus());
+ rse.InitializeWriters(outfilename);
+ JSONArray results = rse.GetSearchResults(rse.CreateORQuery(queryterms));
+ if(results!=null)
+ {
+ rse.WriteToFile(results);
+ }
+ rse.CleanupAfterFinish();
+ }
+}
diff --git a/src/Chapter2/streamingapi/StreamingApiExample.java b/src/Chapter2/streamingapi/StreamingApiExample.java
new file mode 100644
index 0000000..8abfff4
--- /dev/null
+++ b/src/Chapter2/streamingapi/StreamingApiExample.java
@@ -0,0 +1,372 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.streamingapi;
+
+import Chapter2.support.OAuthTokenSecret;
+import Chapter2.openauthentication.OAuthExample;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import oauth.signpost.OAuthConsumer;
+import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer;
+import oauth.signpost.exception.OAuthCommunicationException;
+import oauth.signpost.exception.OAuthExpectationFailedException;
+import oauth.signpost.exception.OAuthMessageSignerException;
+import org.apache.commons.httpclient.HttpStatus;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.HttpClient;
+import org.apache.http.HttpEntity;
+import org.apache.http.NameValuePair;
+import org.apache.http.client.entity.UrlEncodedFormEntity;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.message.BasicNameValuePair;
+import org.apache.http.params.CoreConnectionPNames;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.json.JSONTokener;
+import utils.OAuthUtils;
+
+public class StreamingApiExample
+{
+ OAuthTokenSecret OAuthToken;
+ final int RECORDS_TO_PROCESS = 1000;
+ final int MAX_GEOBOXES = 25;
+ final int MAX_KEYWORDS = 400;
+ final int MAX_USERS = 5000;
+ HashSet<String> Keywords;
+ HashSet<String> Geoboxes;
+ HashSet<String> Userids;
+ final String CONFIG_FILE_PATH = "streaming/streaming.config";
+ final String DEF_OUTPATH = "streaming/";
+
+ /**
+ * Loads the Twitter access token and secret for a user
+ */
+ public void LoadTwitterToken()
+ {
+// OAuthExample oae = new OAuthExample();
+// OAuthToken = oae.GetUserAccessKeySecret();
+ OAuthToken = OAuthExample.DEBUGUserAccessSecret();
+ }
+
+ /**
+ * Creates a connection to the Streaming Filter API
+ * @param baseUrl the URL for Twitter Filter API
+ * @param outFilePath Location to place the exported file
+ */
+ public void CreateStreamingConnection(String baseUrl, String outFilePath)
+ {
+ HttpClient httpClient = new DefaultHttpClient();
+ httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, new Integer(90000));
+ //Step 1: Initialize OAuth Consumer
+ OAuthConsumer consumer = new CommonsHttpOAuthConsumer(OAuthUtils.CONSUMER_KEY,OAuthUtils.CONSUMER_SECRET);
+ consumer.setTokenWithSecret(OAuthToken.getAccessToken(),OAuthToken.getAccessSecret());
+ //Step 2: Create a new HTTP POST request and set parameters
+ HttpPost httppost = new HttpPost(baseUrl);
+ try {
+ httppost.setEntity(new UrlEncodedFormEntity(CreateRequestBody(), "UTF-8"));
+ } catch (UnsupportedEncodingException ex) {
+ ex.printStackTrace();
+ }
+ try {
+ //Step 3: Sign the request
+ consumer.sign(httppost);
+ } catch (OAuthMessageSignerException ex) {
+ ex.printStackTrace();
+ } catch (OAuthExpectationFailedException ex) {
+ ex.printStackTrace();
+ } catch (OAuthCommunicationException ex) {
+ ex.printStackTrace();
+ }
+ HttpResponse response;
+ InputStream is = null;
+ try {
+ //Step 4: Connect to the API
+ response = httpClient.execute(httppost);
+ if (response.getStatusLine().getStatusCode()!= HttpStatus.SC_OK)
+ {
+ throw new IOException("Got status " +response.getStatusLine().getStatusCode());
+ }
+ else
+ {
+ System.out.println(OAuthToken.getAccessToken()+ ": Processing from " + baseUrl);
+ HttpEntity entity = response.getEntity();
+ try {
+ is = entity.getContent();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ } catch (IllegalStateException ex) {
+ ex.printStackTrace();
+ }
+ //Step 5: Process the incoming Tweet Stream
+ this.ProcessTwitterStream(is, outFilePath);
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }finally {
+ // Abort the method, otherwise releaseConnection() will
+ // attempt to finish reading the never-ending response.
+ // These methods do not throw exceptions.
+ if(is!=null)
+ {
+ try {
+ is.close();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+ }
+ }
+
+ /**
+ * Processes a stream of tweets and writes them to a file one tweet per line. Each tweet here is represented by a JSON document.
+ * @param is input stream already connected to the streaming API
+ * @param outFilePath file to put the collected tweets in
+ * @throws InterruptedException
+ * @throws IOException
+ */
+ public void ProcessTwitterStream(InputStream is, String outFilePath)
+ {
+ BufferedWriter bwrite = null;
+ try {
+ JSONTokener jsonTokener = new JSONTokener(new InputStreamReader(is, "UTF-8"));
+ ArrayList<JSONObject> rawtweets = new ArrayList<JSONObject>();
+ int nooftweetsuploaded = 0;
+ while (true) {
+ try {
+ JSONObject temp = new JSONObject(jsonTokener);
+ rawtweets.add(temp);
+// System.out.println(temp);
+ if (rawtweets.size() >= RECORDS_TO_PROCESS)
+ {
+ Calendar cal = Calendar.getInstance();
+ String filename = outFilePath + "tweets_" + cal.getTimeInMillis() + ".json";
+ bwrite = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8"));
+ nooftweetsuploaded += RECORDS_TO_PROCESS;
+ //Write the collected tweets to a file
+ for (JSONObject jobj : rawtweets) {
+ bwrite.write(jobj.toString());
+ bwrite.newLine();
+ }
+ System.out.println("Written "+nooftweetsuploaded+" records so far");
+ bwrite.close();
+ rawtweets.clear();
+ }
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+
+ public static void main(String[] args)
+ {
+ StreamingApiExample sae = new StreamingApiExample();
+ sae.LoadTwitterToken();
+ //load parameters from a TSV file
+ String filename = sae.CONFIG_FILE_PATH;
+ String outfilepath = sae.DEF_OUTPATH;
+ if(args!=null)
+ {
+ if(args.length>0)
+ {
+ filename = args[0];
+ }
+ if(args.length>1)
+ {
+ File fl = new File(args[1]);
+ if(fl.exists()&&fl.isDirectory())
+ {
+ outfilepath = args[1];
+ }
+ }
+ }
+ sae.ReadParameters(filename);
+ sae.CreateStreamingConnection("https://stream.twitter.com/1.1/statuses/filter.json", outfilepath);
+ }
+
+ /**
+ * Reads the file and loads the parameters to be crawled. Expects that the parameters are tab separated values and the
+ * @param filename
+ */
+ public void ReadParameters(String filename)
+ {
+ BufferedReader br = null;
+ try {
+ br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
+ String temp = "";
+ int count = 1;
+ if(Userids==null)
+ {
+ Userids = new HashSet<String>();
+ }
+ if(Geoboxes==null)
+ {
+ Geoboxes = new HashSet<String>();
+ }
+ if(Keywords==null)
+ {
+ Keywords = new HashSet<String>();
+ }
+ while((temp = br.readLine())!=null)
+ {
+ if(!temp.isEmpty())
+ {
+ if(count==1)
+ {
+ String[] keywords = temp.split("\t");
+ HashSet<String> temptags = new HashSet<String>();
+ for(String word:keywords)
+ {
+ if(!temptags.contains(word))
+ {
+ temptags.add(word);
+ }
+ }
+ FilterKeywords(temptags);
+ }
+ else
+ if(count==2)
+ {
+ String[] geoboxes = temp.split("\t");
+ HashSet<String> tempboxes = new HashSet<String>();
+ for(String box:geoboxes)
+ {
+ if(!tempboxes.contains(box))
+ {
+ tempboxes.add(box);
+ }
+ }
+ FilterGeoboxes(tempboxes);
+ }
+ else
+ if(count==3)
+ {
+ String[] userids = temp.split("\t");
+ HashSet<String> tempids = new HashSet<String>();
+ for(String id:userids)
+ {
+ if(!tempids.contains(id))
+ {
+ tempids.add(id);
+ }
+ }
+ FilterUserids(tempids);
+ }
+ count++;
+ }
+ }
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ ex.printStackTrace();
+ }
+ }
+ }
+
+ private void FilterUserids(HashSet<String> userids)
+ {
+ if(userids!=null)
+ {
+ int maxsize = MAX_USERS;
+ if(userids.size()<maxsize)
+ {
+ maxsize = userids.size();
+ }
+ for(String id:userids)
+ {
+ Userids.add(id);
+ }
+ }
+ }
+
+ private void FilterGeoboxes(HashSet<String> geoboxes)
+ {
+ if(geoboxes!=null)
+ {
+ int maxsize = MAX_GEOBOXES;
+ if(geoboxes.size()<maxsize)
+ {
+ maxsize = geoboxes.size();
+ }
+ for(String box:geoboxes)
+ {
+ Geoboxes.add(box);
+ }
+ }
+ }
+ /**
+ * Keep only the maximum permitted number of parameters for a connection. Ignoring the rest.
+ * This can be extended to create multiple sets to be crawled by different threads.
+ */
+ private void FilterKeywords(HashSet<String> hashtags)
+ {
+ if(hashtags!=null)
+ {
+ int maxsize = MAX_KEYWORDS;
+ if(hashtags.size()<maxsize)
+ {
+ maxsize = hashtags.size();
+ }
+ for(String tag:hashtags)
+ {
+ Keywords.add(tag);
+ }
+ }
+
+ }
+
+ private List<NameValuePair> CreateRequestBody()
+ {
+ List<NameValuePair> params = new ArrayList<NameValuePair>();
+ if(Userids != null&&Userids.size()>0)
+ {
+ params.add(CreateNameValuePair("follow", Userids));
+ System.out.println("userids = "+Userids);
+ }
+ if (Geoboxes != null&&Geoboxes.size()>0) {
+ params.add(CreateNameValuePair("locations", Geoboxes));
+ System.out.println("locations = "+Geoboxes);
+
+ }
+ if (Keywords != null&&Keywords.size()>0) {
+ params.add(CreateNameValuePair("track", Keywords));
+ System.out.println("keywords = "+Keywords);
+ }
+ return params;
+ }
+
+ private NameValuePair CreateNameValuePair(String name, Collection<String> items)
+ {
+ StringBuilder sb = new StringBuilder();
+ boolean needComma = false;
+ for (String item : items) {
+ if (needComma) {
+ sb.append(',');
+ }
+ needComma = true;
+ sb.append(item);
+ }
+ return new BasicNameValuePair(name, sb.toString());
+ }
+}
diff --git a/src/Chapter2/support/APIType.java b/src/Chapter2/support/APIType.java
new file mode 100644
index 0000000..94449f8
--- /dev/null
+++ b/src/Chapter2/support/APIType.java
@@ -0,0 +1,12 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.support;
+
+public class APIType
+{
+ public static String USER_TIMELINE = "/statuses/user_timeline";
+ public static String FOLLOWERS = "/followers/list";
+ public static String FRIENDS = "/friends/list";
+ public static String USER_PROFILE = "/users/show";
+}
diff --git a/src/Chapter2/support/InfoType.java b/src/Chapter2/support/InfoType.java
new file mode 100644
index 0000000..42b0334
--- /dev/null
+++ b/src/Chapter2/support/InfoType.java
@@ -0,0 +1,12 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.support;
+
+public class InfoType
+{
+ public static final int PROFILE_INFO = 0;
+ public static final int FOLLOWER_INFO = 1;
+ public static final int FRIEND_INFO = 2;
+ public static final int STATUSES_INFO = 3;
+}
diff --git a/src/Chapter2/support/Location.java b/src/Chapter2/support/Location.java
new file mode 100644
index 0000000..7f6234f
--- /dev/null
+++ b/src/Chapter2/support/Location.java
@@ -0,0 +1,28 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter2.support;
+
+/**
+ *
+ * @author shamanth
+ */
+public class Location
+{
+ public Double latitude;
+ public Double longitude;
+
+ public Location(Double lat,Double lng)
+ {
+ latitude = lat;
+ longitude = lng;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "Latitude: "+latitude+" & Longitude: "+longitude;
+ }
+}
diff --git a/src/Chapter2/support/OAuthTokenSecret.java b/src/Chapter2/support/OAuthTokenSecret.java
new file mode 100644
index 0000000..8fee4a8
--- /dev/null
+++ b/src/Chapter2/support/OAuthTokenSecret.java
@@ -0,0 +1,38 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter2.support;
+
+public class OAuthTokenSecret
+{
+ String UserAccessToken;
+ String UserAccessSecret;
+
+ public String getAccessSecret() {
+ return UserAccessSecret;
+ }
+
+ public void setAccessSecret(String AccessSecret) {
+ this.UserAccessSecret = AccessSecret;
+ }
+
+ public String getAccessToken() {
+ return UserAccessToken;
+ }
+
+ public void setAccessToken(String AccessToken) {
+ this.UserAccessToken = AccessToken;
+ }
+
+ public OAuthTokenSecret(String token,String secret)
+ {
+ this.setAccessToken(token);
+ this.setAccessSecret(secret);
+ }
+
+ @Override
+ public String toString()
+ {
+ return "Access Token: "+getAccessToken()+" Access Secret: "+getAccessSecret();
+ }
+}
diff --git a/src/Chapter4/GraphElements/RetweetEdge.java b/src/Chapter4/GraphElements/RetweetEdge.java
new file mode 100644
index 0000000..83836a0
--- /dev/null
+++ b/src/Chapter4/GraphElements/RetweetEdge.java
@@ -0,0 +1,53 @@
+package GraphElements;
+
+
+public class RetweetEdge {
+ private UserNode to, from;
+ private int retweetCount;
+
+ public RetweetEdge(UserNode to, UserNode from){
+ this.to = to;
+ this.from = from;
+ retweetCount = 1;
+ }
+
+ public void incrementRTCount(){
+ retweetCount++;
+ }
+
+ public UserNode getTo() {
+ return to;
+ }
+ public void setTo(UserNode to) {
+ this.to = to;
+ }
+ public UserNode getFrom() {
+ return from;
+ }
+ public void setFrom(UserNode from) {
+ this.from = from;
+ }
+ public int getRetweetCount() {
+ return retweetCount;
+ }
+ public void setRetweetCount(int retweetCount) {
+ this.retweetCount = retweetCount;
+ }
+
+ public boolean equals(Object maybeEdge){
+ if(maybeEdge instanceof RetweetEdge){
+ RetweetEdge edge = (RetweetEdge) maybeEdge;
+ return edge.to.equals(to) && edge.from.equals(from);
+ }
+ return false;
+
+ }
+
+ public String toString(){
+ return from + " -> " + to;
+ }
+
+ public int hashCode(){
+ return toString().hashCode();
+ }
+}
diff --git a/src/Chapter4/GraphElements/UserNode.java b/src/Chapter4/GraphElements/UserNode.java
new file mode 100644
index 0000000..fba4419
--- /dev/null
+++ b/src/Chapter4/GraphElements/UserNode.java
@@ -0,0 +1,34 @@
+package GraphElements;
+
+
+
+public class UserNode {
+ private String username;
+
+ public UserNode(String username){
+ this.username = username;
+ }
+
+ public String getUsername() {
+ return username;
+ }
+
+ public void setUsername(String username) {
+ this.username = username;
+ }
+
+ public boolean equals(Object un){
+ if(un instanceof UserNode){
+ return username.equals(((UserNode)un).username);
+ }
+ return false;
+ }
+
+ public String toString(){
+ return username;
+ }
+
+ public int hashCode(){
+ return username.hashCode();
+ }
+}
diff --git a/src/Chapter4/centrality/examples/BetweennessCentralityExample.java b/src/Chapter4/centrality/examples/BetweennessCentralityExample.java
new file mode 100644
index 0000000..ab9f7e6
--- /dev/null
+++ b/src/Chapter4/centrality/examples/BetweennessCentralityExample.java
@@ -0,0 +1,31 @@
+package centrality.examples;
+
+import Chapter4.util.TweetFileToGraph;
+import java.io.File;
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.algorithms.importance.BetweennessCentrality;
+import edu.uci.ics.jung.graph.DirectedGraph;
+
+public class BetweennessCentralityExample {
+ public static void main(String[] args){
+
+ File tweetFile;
+
+ if(args.length > 0){
+ tweetFile = new File(args[0]);
+ }
+ else{
+ tweetFile = new File("synthetic_retweet_network.json");
+ }
+
+ DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
+
+ //calculate the betweenness centrality
+ BetweennessCentrality<UserNode, RetweetEdge> betweenness = new BetweennessCentrality<UserNode, RetweetEdge>(retweetGraph);
+
+ betweenness.evaluate();
+ betweenness.printRankings(true, true);
+
+ }
+}
diff --git a/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java b/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java
new file mode 100644
index 0000000..172dd16
--- /dev/null
+++ b/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java
@@ -0,0 +1,36 @@
+package centrality.examples;
+
+import Chapter4.util.TweetFileToGraph;
+import java.io.File;
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality;
+import edu.uci.ics.jung.graph.DirectedGraph;
+
+public class EigenvectorCentralityExample {
+ public static void main(String[] args){
+
+ File tweetFile;
+
+ if(args.length > 0){
+ tweetFile = new File(args[0]);
+ }
+ else{
+ tweetFile = new File("synthetic_retweet_network.json");
+ }
+
+ DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
+
+// EigenVectorScorer scorer = new EigenVectorScorer(retweetGraph);
+// for(UserNode node : retweetGraph.getVertices()){
+// System.out.println(node + " - " + scorer.getVertexScore(node));
+// }
+
+ EigenvectorCentrality<UserNode, RetweetEdge> eig = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph);
+ eig.evaluate();
+
+ for(UserNode node : retweetGraph.getVertices()){
+ System.out.println(node + " - " + eig.getVertexScore(node));
+ }
+ }
+}
diff --git a/src/Chapter4/centrality/examples/InDegreeCentralityExample.java b/src/Chapter4/centrality/examples/InDegreeCentralityExample.java
new file mode 100644
index 0000000..6a027ac
--- /dev/null
+++ b/src/Chapter4/centrality/examples/InDegreeCentralityExample.java
@@ -0,0 +1,30 @@
+package Chapter4.centrality.examples;
+
+import Chapter4.util.TweetFileToGraph;
+import java.io.File;
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.graph.DirectedGraph;
+
+public class InDegreeCentralityExample {
+
+ public static void main(String[] args){
+
+ File tweetFile;
+
+ if(args.length > 0){
+ tweetFile = new File(args[0]);
+ }
+ else{
+ tweetFile = new File("synthetic_retweet_network.json");
+ }
+
+ DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
+
+ //calculate the betweenness centrality
+ for(UserNode node : retweetGraph.getVertices()){
+ System.out.println(node + " - " + retweetGraph.getInEdges(node).size());
+ }
+
+ }
+}
diff --git a/src/Chapter4/centrality/examples/PageRankCentralityExample.java b/src/Chapter4/centrality/examples/PageRankCentralityExample.java
new file mode 100644
index 0000000..dd44efd
--- /dev/null
+++ b/src/Chapter4/centrality/examples/PageRankCentralityExample.java
@@ -0,0 +1,39 @@
+package Chapter4.centrality.examples;
+
+import Chapter4.util.TweetFileToGraph;
+import java.io.File;
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.algorithms.scoring.PageRank;
+import edu.uci.ics.jung.graph.DirectedGraph;
+
+public class PageRankCentralityExample {
+ public static void main(String[] args){
+
+ File tweetFile;
+
+ if(args.length > 0){
+ tweetFile = new File(args[0]);
+ }
+ else{
+ tweetFile = new File("synthetic_retweet_network.json");
+ }
+
+ DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
+
+
+ PageRank<UserNode, RetweetEdge> pageRank = new PageRank<UserNode, RetweetEdge>(retweetGraph, .5);
+ pageRank.evaluate();
+
+ for(UserNode node : retweetGraph.getVertices()){
+ System.out.println(node + " - " + pageRank.getVertexScore(node));
+ }
+
+// EigenvectorCentrality<UserNode, RetweetEdge> eig = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph);
+// eig.evaluate();
+//
+// for(UserNode node : retweetGraph.getVertices()){
+// System.out.println(node + " - " + eig.getVertexScore(node));
+// }
+ }
+}
diff --git a/src/Chapter4/classification/bayes/Classification.java b/src/Chapter4/classification/bayes/Classification.java
new file mode 100644
index 0000000..ea9aba7
--- /dev/null
+++ b/src/Chapter4/classification/bayes/Classification.java
@@ -0,0 +1,22 @@
+package Chapter4.classification.bayes;
+
+public class Classification {
+ private String label;
+ private double confidence;
+
+ public Classification(String label, double confidence){
+ this.label = label;
+ this.confidence = confidence;
+ }
+
+ public String getLabel() {
+ return label;
+ }
+ public double getConfidence() {
+ return confidence;
+ }
+
+ public String toString(){
+ return "(" + label + ", " + confidence + ")";
+ }
+}
diff --git a/src/Chapter4/classification/bayes/NBCxv.java b/src/Chapter4/classification/bayes/NBCxv.java
new file mode 100644
index 0000000..5c48e28
--- /dev/null
+++ b/src/Chapter4/classification/bayes/NBCxv.java
@@ -0,0 +1,60 @@
+package Chapter4.classification.bayes;
+
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+import com.google.gson.JsonObject;
+import com.google.gson.JsonStreamParser;
+
+public class NBCxv {
+ public static void main(String[] args){
+
+ String filename = args.length >= 1 ? args[0] : "owsemoticons.json";
+
+ ArrayList<String> allTexts = new ArrayList<String>();
+
+ try {
+ //read the file, and train each document
+ JsonStreamParser parser = new JsonStreamParser(new FileReader(filename));
+ JsonObject elem;
+ while (parser.hasNext()) {
+ elem = parser.next().getAsJsonObject();
+ allTexts.add(elem.get("text").getAsString());
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ //do 5-fold cross validation 3 times
+ Map<Integer, ArrayList<String>> buckets;
+ int bucketIdx;
+ NaiveBayesSentimentClassifier nbsc;
+ for(int i = 0; i < 3; i++){
+
+ //randomly split the texts into 5 buckets
+ buckets = new HashMap<Integer, ArrayList<String>>();
+ //initialize the 5 buckets
+ for(int j = 0; j < 5; j++) buckets.put(j, new ArrayList<String>());
+ for(String text : allTexts){
+ bucketIdx = (int) (Math.random()*5);
+ buckets.get(bucketIdx).add(text);
+ }
+
+ for(int j = 0; j < 5; j++){
+ //use all but j as the training, use j as the test.
+ nbsc = new NaiveBayesSentimentClassifier();
+ for(int k = 0; k < 5; k++){
+ if(k != j){
+ nbsc.trainInstances(buckets.get(k));
+ }
+ }
+ //test with bucket j
+
+ }
+ }
+
+ }
+}
diff --git a/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java b/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java
new file mode 100644
index 0000000..923416c
--- /dev/null
+++ b/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java
@@ -0,0 +1,264 @@
+package Chapter4.classification.bayes;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+
+/**
+ * This class performs both the training and classification steps of a Naive Bayes Classifier.
+ *
+ */
+public class NaiveBayesSentimentClassifier {
+ //the possible sentiment labels
+ private static final String[] SENTIMENT_LABELS = {"happy", "sad"};
+ //the tokens to look for in labeling the sentiment.
+ private static final String[] HAPPY_SMILEYS = {":)", ";)", ":D", ":-)", ":o)", ":-D"};
+ private static final String[] SAD_SMILEYS = {":(", ":-(", ":'(", ":'-(", "D:"};
+ //store these as a set for faster retrieval
+ private static final Set<String> HAPPY_SMILEY_SET = new HashSet<String>(Arrays.asList(HAPPY_SMILEYS));
+ private static final Set<String> SAD_SMILEY_SET = new HashSet<String>(Arrays.asList(SAD_SMILEYS));
+
+ //counter for the number of times each word has been associated with each sentiment.
+ private Map<String, Integer[]> sentOccurs;
+ //counter for the number of times we've seen each sentiment.
+ private Integer[] sentCount;
+
+ public NaiveBayesSentimentClassifier(){
+ //initialize the counters
+ sentOccurs = new HashMap<String, Integer[]>();
+ sentCount = new Integer[SENTIMENT_LABELS.length];
+ for(int i = 0; i < SENTIMENT_LABELS.length; i++){
+ sentCount[i] = 0;
+ }
+ }
+
+ /**
+ * Tokenize a string. Turns string into list of words based on whitespace, then
+ * removes stopwords, punctuation, and reduces the word to its stem.
+ * @param text
+ * The piece of text
+ * @return
+ * Each individual word.
+ */
+ private List<String> getTokens(String text){
+ StringTokenizer tokens = new StringTokenizer(text);
+ ArrayList<String> words = new ArrayList<String>();
+
+ String tmp;
+ StringBuilder sb;
+ while(tokens.hasMoreTokens()){
+ sb = new StringBuilder();
+ tmp = tokens.nextToken();
+ tmp = tmp.toLowerCase();
+
+ for(char ch : tmp.toCharArray()){
+ if(Character.isLetter(ch)){
+ sb.append(ch);
+ }
+ }
+ tmp = sb.toString();
+ if(tmp.length() > 0 && !StopwordsList.stopwordsSet.contains(tmp)){
+ words.add(sb.toString());
+ }
+ }
+
+ return words;
+ }
+
+ /**
+ * Checks if tweet has a "label" (emoticon). If so, stores the words in
+ * the prior.
+ * @param tweetText
+ * The text of the document to check.
+ */
+ public void trainInstance(String tweetText){
+ //see if the tweet is labeled (i.e. has a smiley)
+ int tweetLabel = extractLabel(tweetText);
+ List<String> tokens = getTokens(tweetText);
+ if(tweetLabel != -1){
+ //add these words to the classifier
+ updateClassifier(tokens, tweetLabel);
+ }
+ }
+
+ public String printWordOccurs(int sentIndex, int topN){
+ StringBuilder sb = new StringBuilder();
+
+ WordCountPair wpcset[] = new WordCountPair[sentOccurs.keySet().size()];
+
+ String s;
+ int t = 0;
+ Iterator<String> sIter = sentOccurs.keySet().iterator();
+// int totalCount = 0;
+// while(sIter.hasNext()){
+// s = sIter.next();
+// totalCount += sentOccurs.get(s)[sentIndex];
+// }
+
+ sIter = sentOccurs.keySet().iterator();
+ while(sIter.hasNext()){
+ s = sIter.next();
+// wpcset[t++] = new WordCountPair(s, sentOccurs.get(s)[sentIndex] * 1.0 / totalCount);
+ wpcset[t++] = new WordCountPair(s, Math.sqrt(sentOccurs.get(s)[sentIndex] * 1.0 ));
+ }
+
+ Arrays.sort(wpcset);
+
+ double frac;
+ for(int i = 0; (i < topN || topN <= 0) && i < wpcset.length; i++){
+ s = wpcset[i].getWord();
+ frac = wpcset[i].getCount();
+
+ sb.append(s);
+ sb.append(":");
+ sb.append(frac);
+ sb.append("\n");
+ }
+
+ return sb.toString();
+ }
+
+ public void trainInstances(List<String> tweetTexts){
+ for(String text : tweetTexts){
+ trainInstance(text);
+ }
+ }
+
+ /**
+ * Classify a tweet as happy or sad. This ignores the emoticon for demonstration purposes.
+ * @param tweetText
+ * The text of the tweet
+ * @return
+ * A Classification object that returns the sentiment of the tweet.
+ */
+ public Classification classify(String tweetText){
+ //stores the probability of each sentiment being the tweets true sentiment.
+ double[] labelProbs = new double[SENTIMENT_LABELS.length];
+ //tokenize the string
+ List<String> tokens = getTokens(tweetText);
+ int maxLabelIdx = 0;
+ for(int i = 0; i < labelProbs.length; i++){
+ //calculate the probability that the tweet has that sentiment.
+ labelProbs[i] = calcLabelProb(tokens, i);
+ System.out.println(i + " -> " + labelProbs[i] );
+ //keep track of the label probability
+ maxLabelIdx = labelProbs[i] > labelProbs[maxLabelIdx] ? i : maxLabelIdx;
+ }
+ //calc the confidence
+ double conf = labelProbs[maxLabelIdx];
+ labelProbs[maxLabelIdx] = 0;
+ conf -= sumVector(labelProbs);
+
+ return new Classification(SENTIMENT_LABELS[maxLabelIdx], conf);
+ }
+
+ private int extractLabel(String tweetText){
+ StringTokenizer tokens = new StringTokenizer(tweetText);
+ while(tokens.hasMoreTokens()){
+ String token = tokens.nextToken();
+ if(HAPPY_SMILEY_SET.contains(token)){
+ return 0;
+ }
+ else if(SAD_SMILEY_SET.contains(token)){
+ return 1;
+ }
+ }
+ return -1;
+ }
+
+ /**
+ * This updates the classifier's probabilites for each word
+ * with the new piece of text.
+ * @param tokens
+ * The tokens in the tweet.
+ * @param sentIndex
+ * The sentiment label.
+ */
+ private void updateClassifier(List<String> tokens, int sentIndex){
+ for(String token : tokens){
+ if(sentOccurs.containsKey(token)){
+ sentOccurs.get(token)[sentIndex] ++ ;
+ }
+ else{
+ //make a new array and put it
+ Integer[] newArray = {0, 0};
+ newArray[sentIndex] ++;
+ sentOccurs.put(token, newArray);
+ }
+ }
+ //update the overall document count
+ sentCount[sentIndex]++;
+ }
+
+ /**
+ * The probability of the tweet having a given label.
+ * @param tokens
+ * The tokens in the tweet.
+ * @param sentIndex
+ * The probability we are testing.
+ * @return
+ * The probability the tweet has the class label indicated by "sentIndex".
+ */
+ private double calcLabelProb(List<String> tokens, int sentIndex){
+
+ //calculate the class probabilities
+ double[] pClass = new double[SENTIMENT_LABELS.length];
+ int cSum = sumVector(sentCount);
+ int totalWordCount = 0;
+
+ for(int i = 0; i < sentCount.length; i++){
+ pClass[i] = sentCount[i] * 1.0 / cSum;
+ }
+
+ for(String word : sentOccurs.keySet()){
+ Integer[] wordCt = sentOccurs.get(word);
+ totalWordCount = sumVector(wordCt);
+ }
+
+
+ double p = 1.0;
+ boolean foundOne = false;
+ for(String token : tokens){
+ if(sentOccurs.containsKey(token)){
+ foundOne = true;
+ Integer[] probs = sentOccurs.get(token);
+ double pWordGivenClass = probs[sentIndex] / (double)(sumVector(probs));
+ double pWord = sumVector(probs) / totalWordCount;
+ p *= pWordGivenClass * pClass[sentIndex] / pWord;
+ }
+ }
+ return foundOne ? p : 0.0;
+ }
+
+ /**
+ * Helper function to sum the values in a 1D array.
+ * @param vector
+ * The 1D array to sum.
+ * @return
+ * The sum.
+ */
+ private double sumVector(double[] vector){
+ double sum = 0.0;
+ for(double d : vector) sum += d;
+ return sum;
+ }
+
+ /**
+ * Helper function to sum the values in a 1D array.
+ * @param vector
+ * The 1D array to sum.
+ * @return
+ * The sum.
+ */
+ private int sumVector(Integer[] vector){
+ int sum = 0;
+ for(int d : vector) sum += d;
+ return sum;
+ }
+}
diff --git a/src/Chapter4/classification/bayes/StopwordsList.java b/src/Chapter4/classification/bayes/StopwordsList.java
new file mode 100644
index 0000000..06edd5a
--- /dev/null
+++ b/src/Chapter4/classification/bayes/StopwordsList.java
@@ -0,0 +1,10 @@
+package Chapter4.classification.bayes;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class StopwordsList {
+ private static final String[] stopwords = {"a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can", "did", "do", "does", "doing", "don", "down", "during", "each", "few", "for", "from", "further", "get", "had", "has", "have", "having", "he", "her", "here", "hers", "herself", "him", "himself", "his", "how", "i", "if", "im", "i'm", "in", "into", "is", "it", "its", "itself", "just", "me", "more", "most", "my", "myself", "no", "nor", "not", "now", "of", "off", "on", "once", "only", "or", "other", "our", "ours", "ourselves", "out", "over", "own", "rt", "s", "same", "she", "should", "so", "some", "such", "t", "than", "that", "the", "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", "up", "us", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", "yourselves"};
+ public static final Set<String> stopwordsSet = new HashSet<String>(Arrays.asList(stopwords));
+}
diff --git a/src/Chapter4/classification/bayes/TestNBC.java b/src/Chapter4/classification/bayes/TestNBC.java
new file mode 100644
index 0000000..7e0e743
--- /dev/null
+++ b/src/Chapter4/classification/bayes/TestNBC.java
@@ -0,0 +1,49 @@
+package Chapter4.classification.bayes;
+
+import java.io.FileReader;
+import java.io.IOException;
+
+import com.google.gson.JsonObject;
+import com.google.gson.JsonStreamParser;
+
+public class TestNBC {
+ public static void main(String[] args){
+
+ String filename = args.length >= 1 ? args[0] : "owsemoticons.json";
+
+ //initialize the sentiment classifier
+ NaiveBayesSentimentClassifier nbsc = new NaiveBayesSentimentClassifier();
+
+ try {
+ //read the file, and train each document
+ JsonStreamParser parser = new JsonStreamParser(new FileReader(filename));
+ JsonObject elem;
+ String text;
+ while (parser.hasNext()) {
+ elem = parser.next().getAsJsonObject();
+ text = elem.get("text").getAsString();
+ nbsc.trainInstance(text);
+ }
+
+ //print out the positive and negative dictionary
+ System.out.println("=== Positive Dictionary ===");
+ System.out.println(nbsc.printWordOccurs(0, 25));
+ System.out.println("=== Negative Dictionary ===");
+ System.out.println(nbsc.printWordOccurs(1, 25));
+
+ //now go through and classify each line as positive or negative
+// parser = new JsonStreamParser(new FileReader(filename));
+// while (parser.hasNext()) {
+// elem = parser.next().getAsJsonObject();
+// text = elem.get("text").getAsString();
+// Classification c = nbsc.classify(text);
+// System.out.println(c + " -> " + text);
+// }
+ System.out.println(nbsc.classify("I love new york"));
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+}
diff --git a/src/Chapter4/classification/bayes/WordCountPair.java b/src/Chapter4/classification/bayes/WordCountPair.java
new file mode 100644
index 0000000..b96be92
--- /dev/null
+++ b/src/Chapter4/classification/bayes/WordCountPair.java
@@ -0,0 +1,34 @@
+package Chapter4.classification.bayes;
+
+public class WordCountPair implements Comparable<WordCountPair>{
+
+
+ private String word;
+ private double count;
+
+ public WordCountPair(String word, double count){
+ this.word = word;
+ this.count = count;
+ }
+
+ public int compareTo(WordCountPair arg0) {
+ return arg0.count - count < 0 ? -1 : 1;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public double getCount() {
+ return count;
+ }
+
+ public void setCount(int count) {
+ this.count = count;
+ }
+
+}
diff --git a/src/Chapter4/graph/visualization/SimpleGraphViewer.java b/src/Chapter4/graph/visualization/SimpleGraphViewer.java
new file mode 100644
index 0000000..7cb46e4
--- /dev/null
+++ b/src/Chapter4/graph/visualization/SimpleGraphViewer.java
@@ -0,0 +1,86 @@
+package chapter4.graph.visualization;
+
+import Chapter4.util.TweetFileToGraph;
+import java.awt.Dimension;
+import java.awt.Shape;
+import java.awt.geom.Ellipse2D;
+import java.io.File;
+
+import javax.swing.JFrame;
+
+import org.apache.commons.collections15.Transformer;
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.algorithms.layout.KKLayout;
+import edu.uci.ics.jung.algorithms.layout.Layout;
+import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality;
+import edu.uci.ics.jung.graph.DirectedGraph;
+import edu.uci.ics.jung.visualization.BasicVisualizationServer;
+
+public class SimpleGraphViewer {
+ public static void main(String[] args){
+
+ File tweetFile;
+
+ if(args.length > 0){
+ tweetFile = new File(args[0]);
+ }
+ else{
+ tweetFile = new File("synthetic_retweet_network.json");
+ }
+
+ DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile);
+
+ /*
+ * Converts a node to its string representation
+ */
+ Transformer<UserNode, String> stringer = new Transformer<UserNode, String>(){
+ public String transform(UserNode n){
+ return n.toString();
+ }
+ };
+
+ /*
+ * Calculate the centrality
+ */
+ //calculate the betweenness centrality
+// final InDegreeScorer<UserNode> centralityScore = new InDegreeScorer<UserNode>(retweetGraph);
+// final BetweennessCentrality<UserNode, RetweetEdge> centralityScore = new BetweennessCentrality<UserNode, RetweetEdge>(retweetGraph);
+// final PageRank<UserNode, RetweetEdge> centralityScore = new PageRank<UserNode, RetweetEdge>(retweetGraph, 0.85);
+ final EigenvectorCentrality<UserNode, RetweetEdge> centralityScore = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph);
+ centralityScore.evaluate();
+
+ double centralityMax = 0.0f;
+ for(UserNode node : retweetGraph.getVertices()){
+ centralityMax = Math.max(centralityMax, centralityScore.getVertexScore(node));
+ }
+ final double centralityMaxFinal = centralityMax;
+
+ /*
+ * Sizes a node by some centrality measure
+ */
+ Transformer<UserNode, Shape> shaper = new Transformer<UserNode, Shape>(){
+ public Shape transform(UserNode n){
+ System.out.println("User: " + n.getUsername() + " Cent: " + centralityScore.getVertexScore(n) + " Max: " + centralityMaxFinal);
+ double radius = 50 * (centralityScore.getVertexScore(n)) / centralityMaxFinal;
+ radius = Math.max(radius, 5.0f);
+ float fRadius = (float) radius;
+ return new Ellipse2D.Float(-fRadius/2, -fRadius/2, fRadius, fRadius);
+ }
+ };
+
+ Layout<UserNode, RetweetEdge> layout = new KKLayout<UserNode, RetweetEdge>(retweetGraph);
+ layout.setSize(new Dimension(500, 500));
+
+ BasicVisualizationServer<UserNode, RetweetEdge> vv = new BasicVisualizationServer<UserNode, RetweetEdge>(layout);
+ vv.setPreferredSize(new Dimension(550, 550));
+ vv.getRenderContext().setVertexLabelTransformer(stringer);
+ vv.getRenderContext().setVertexShapeTransformer(shaper);
+
+ JFrame jframe = new JFrame("Simple Graph View");
+ jframe.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+ jframe.getContentPane().add(vv);
+ jframe.pack();
+ jframe.setVisible(true);
+ }
+}
diff --git a/src/Chapter4/tweetlda/LDA.java b/src/Chapter4/tweetlda/LDA.java
new file mode 100644
index 0000000..ca7f9a3
--- /dev/null
+++ b/src/Chapter4/tweetlda/LDA.java
@@ -0,0 +1,89 @@
+package tweetlda;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.TreeSet;
+import java.util.regex.Pattern;
+
+import org.json.JSONObject;
+
+import cc.mallet.pipe.CharSequence2TokenSequence;
+import cc.mallet.pipe.CharSequenceLowercase;
+import cc.mallet.pipe.Pipe;
+import cc.mallet.pipe.SerialPipes;
+import cc.mallet.pipe.TokenSequence2FeatureSequence;
+import cc.mallet.pipe.TokenSequenceRemoveStopwords;
+import cc.mallet.pipe.iterator.StringArrayIterator;
+import cc.mallet.topics.ParallelTopicModel;
+import cc.mallet.types.Alphabet;
+import cc.mallet.types.IDSorter;
+import cc.mallet.types.InstanceList;
+
+public class LDA {
+
+ private static final String STOP_WORDS = "stopwords.txt";
+ private static final int ITERATIONS = 100;
+ private static final int THREADS = 4;
+ private static final int NUM_TOPICS = 25;
+ private static final int NOM_WORDS_TO_ANALYZE = 25;
+
+ public static void main(String[] args) throws Exception {
+ ArrayList<Pipe> pipeList = new ArrayList<Pipe>();
+ File stopwords = new File(STOP_WORDS);
+
+ String inputFileName = args.length >= 1 ? args[0] : "testows.json";
+
+ File inputFile = new File(inputFileName);
+
+ // Lowercase, tokenize, remove stopwords, stem, and convert to features
+ pipeList.add((Pipe) new CharSequenceLowercase());
+ pipeList.add((Pipe) new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}")));
+ pipeList.add((Pipe) new TokenSequenceRemoveStopwords(stopwords, "UTF-8", false, false, false));
+ pipeList.add((Pipe) new PorterStemmer());
+ pipeList.add((Pipe) new TokenSequence2FeatureSequence());
+
+ InstanceList instances = new InstanceList(new SerialPipes(pipeList));
+
+ BufferedReader fileReader = new BufferedReader(new FileReader(inputFile));
+ LinkedList<String> textList = new LinkedList<String>();
+ String line;
+ while((line = fileReader.readLine()) != null){
+ JSONObject elem = new JSONObject(line);
+ if(elem.has("text")){
+ textList.add(elem.getString("text"));
+ }
+ }
+
+ instances.addThruPipe(new StringArrayIterator(textList.toArray(new String[textList.size()])));
+
+ ParallelTopicModel model = new ParallelTopicModel(NUM_TOPICS);
+ model.addInstances(instances);
+ model.setNumThreads(THREADS);
+ model.setNumIterations(ITERATIONS);
+ model.estimate();
+
+ // The data alphabet maps word IDs to strings
+ Alphabet dataAlphabet = instances.getDataAlphabet();
+
+ int topicIdx=0;
+ StringBuilder sb;
+ for (TreeSet<IDSorter> set : model.getSortedWords()) {
+ sb = new StringBuilder().append(topicIdx);
+ sb.append(" - ");
+ int j = 0;
+ double sum = 0.0;
+ for (IDSorter s : set) {
+ sum += s.getWeight();
+ }
+ for (IDSorter s : set) {
+ sb.append(dataAlphabet.lookupObject(s.getID())).append(":").append(s.getWeight() / sum).append(", ");
+ if (++j >= NOM_WORDS_TO_ANALYZE) break;
+ }
+ System.out.println(sb.append("\n").toString());
+ topicIdx++;
+ }
+ }
+}
diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java
new file mode 100644
index 0000000..1a7149e
--- /dev/null
+++ b/src/Chapter4/tweetlda/PorterStemmer.java
@@ -0,0 +1,33 @@
+package tweetlda;
+
+import cc.mallet.pipe.Pipe;
+import cc.mallet.types.Instance;
+import cc.mallet.types.TokenSequence;
+
+public class PorterStemmer extends Pipe {
+
+ private static final long serialVersionUID = 154100332101873830L;
+
+ public Instance pipe(Instance carrier){
+ TokenSequence ts = (TokenSequence) carrier.getData();
+ String word;
+ Stemmer s;
+
+ for(int i = 0; i < ts.size(); i++){
+ word = ts.get(i).getText();
+ //stem the word
+ s = new Stemmer();
+ for(char ch : word.toCharArray()){
+ if(Character.isLetter(ch)){
+ s.add(ch);
+ }
+ }
+ s.stem();
+ ts.get(i).setText(s.toString());
+ }
+ carrier.setData(ts);
+
+ return carrier;
+ }
+
+}
diff --git a/src/Chapter4/tweetlda/Stemmer.java b/src/Chapter4/tweetlda/Stemmer.java
new file mode 100644
index 0000000..f06dfc6
--- /dev/null
+++ b/src/Chapter4/tweetlda/Stemmer.java
@@ -0,0 +1,428 @@
+package tweetlda;
+
+
+/*
+
+ Porter stemmer in Java. The original paper is in
+
+ Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+ no. 3, pp 130-137,
+
+ See also http://www.tartarus.org/~martin/PorterStemmer
+
+ History:
+
+ Release 1
+
+ Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+ The words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+ is then out outside the bounds of b.
+
+ Release 2
+
+ Similarly,
+
+ Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+ 'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+ b[j] is then outside the bounds of b.
+
+ Release 3
+
+ Considerably revised 4/9/00 in the light of many helpful suggestions
+ from Brian Goetz of Quiotix Corporation (brian@quiotix.com).
+
+ Release 4
+
+*/
+
+import java.io.*;
+
+/**
+ * Stemmer, implementing the Porter Stemming Algorithm
+ *
+ * The Stemmer class transforms a word into its root form. The input
+ * word can be provided a character at time (by calling add()), or at once
+ * by calling one of the various stem(something) methods.
+ */
+
+class Stemmer
+{ private char[] b;
+ private int i, /* offset into b */
+ i_end, /* offset to end of stemmed word */
+ j, k;
+ private static final int INC = 50;
+ /* unit of size whereby b is increased */
+ public Stemmer()
+ { b = new char[INC];
+ i = 0;
+ i_end = 0;
+ }
+
+ /**
+ * Add a character to the word being stemmed. When you are finished
+ * adding characters, you can call stem(void) to stem the word.
+ */
+
+ public void add(char ch)
+ { if (i == b.length)
+ { char[] new_b = new char[i+INC];
+ for (int c = 0; c < i; c++) new_b[c] = b[c];
+ b = new_b;
+ }
+ b[i++] = ch;
+ }
+
+
+ /** Adds wLen characters to the word being stemmed contained in a portion
+ * of a char[] array. This is like repeated calls of add(char ch), but
+ * faster.
+ */
+
+ public void add(char[] w, int wLen)
+ { if (i+wLen >= b.length)
+ { char[] new_b = new char[i+wLen+INC];
+ for (int c = 0; c < i; c++) new_b[c] = b[c];
+ b = new_b;
+ }
+ for (int c = 0; c < wLen; c++) b[i++] = w[c];
+ }
+
+ /**
+ * After a word has been stemmed, it can be retrieved by toString(),
+ * or a reference to the internal buffer can be retrieved by getResultBuffer
+ * and getResultLength (which is generally more efficient.)
+ */
+ public String toString() { return new String(b,0,i_end); }
+
+ /**
+ * Returns the length of the word resulting from the stemming process.
+ */
+ public int getResultLength() { return i_end; }
+
+ /**
+ * Returns a reference to a character buffer containing the results of
+ * the stemming process. You also need to consult getResultLength()
+ * to determine the length of the result.
+ */
+ public char[] getResultBuffer() { return b; }
+
+ /* cons(i) is true <=> b[i] is a consonant. */
+
+ private final boolean cons(int i)
+ { switch (b[i])
+ { case 'a': case 'e': case 'i': case 'o': case 'u': return false;
+ case 'y': return (i==0) ? true : !cons(i-1);
+ default: return true;
+ }
+ }
+
+ /* m() measures the number of consonant sequences between 0 and j. if c is
+ a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+ presence,
+
+ <c><v> gives 0
+ <c>vc<v> gives 1
+ <c>vcvc<v> gives 2
+ <c>vcvcvc<v> gives 3
+ ....
+ */
+
+ private final int m()
+ { int n = 0;
+ int i = 0;
+ while(true)
+ { if (i > j) return n;
+ if (! cons(i)) break; i++;
+ }
+ i++;
+ while(true)
+ { while(true)
+ { if (i > j) return n;
+ if (cons(i)) break;
+ i++;
+ }
+ i++;
+ n++;
+ while(true)
+ { if (i > j) return n;
+ if (! cons(i)) break;
+ i++;
+ }
+ i++;
+ }
+ }
+
+ /* vowelinstem() is true <=> 0,...j contains a vowel */
+
+ private final boolean vowelinstem()
+ { int i; for (i = 0; i <= j; i++) if (! cons(i)) return true;
+ return false;
+ }
+
+ /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+ private final boolean doublec(int j)
+ { if (j < 1) return false;
+ if (b[j] != b[j-1]) return false;
+ return cons(j);
+ }
+
+ /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+ and also if the second c is not w,x or y. this is used when trying to
+ restore an e at the end of a short word. e.g.
+
+ cav(e), lov(e), hop(e), crim(e), but
+ snow, box, tray.
+
+ */
+
+ private final boolean cvc(int i)
+ { if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false;
+ { int ch = b[i];
+ if (ch == 'w' || ch == 'x' || ch == 'y') return false;
+ }
+ return true;
+ }
+
+ private final boolean ends(String s)
+ { int l = s.length();
+ int o = k-l+1;
+ if (o < 0) return false;
+ for (int i = 0; i < l; i++) if (b[o+i] != s.charAt(i)) return false;
+ j = k-l;
+ return true;
+ }
+
+ /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+ k. */
+
+ private final void setto(String s)
+ { int l = s.length();
+ int o = j+1;
+ for (int i = 0; i < l; i++) b[o+i] = s.charAt(i);
+ k = j+l;
+ }
+
+ /* r(s) is used further down. */
+
+ private final void r(String s) { if (m() > 0) setto(s); }
+
+ /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+ caresses -> caress
+ ponies -> poni
+ ties -> ti
+ caress -> caress
+ cats -> cat
+
+ feed -> feed
+ agreed -> agree
+ disabled -> disable
+
+ matting -> mat
+ mating -> mate
+ meeting -> meet
+ milling -> mill
+ messing -> mess
+
+ meetings -> meet
+
+ */
+
+ private final void step1()
+ { if (b[k] == 's')
+ { if (ends("sses")) k -= 2; else
+ if (ends("ies")) setto("i"); else
+ if (b[k-1] != 's') k--;
+ }
+ if (ends("eed")) { if (m() > 0) k--; } else
+ if ((ends("ed") || ends("ing")) && vowelinstem())
+ { k = j;
+ if (ends("at")) setto("ate"); else
+ if (ends("bl")) setto("ble"); else
+ if (ends("iz")) setto("ize"); else
+ if (doublec(k))
+ { k--;
+ { int ch = b[k];
+ if (ch == 'l' || ch == 's' || ch == 'z') k++;
+ }
+ }
+ else if (m() == 1 && cvc(k)) setto("e");
+ }
+ }
+
+ /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+ private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; }
+
+ /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+ -ation) maps to -ize etc. note that the string before the suffix must give
+ m() > 0. */
+
+ private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1])
+ {
+ case 'a': if (ends("ational")) { r("ate"); break; }
+ if (ends("tional")) { r("tion"); break; }
+ break;
+ case 'c': if (ends("enci")) { r("ence"); break; }
+ if (ends("anci")) { r("ance"); break; }
+ break;
+ case 'e': if (ends("izer")) { r("ize"); break; }
+ break;
+ case 'l': if (ends("bli")) { r("ble"); break; }
+ if (ends("alli")) { r("al"); break; }
+ if (ends("entli")) { r("ent"); break; }
+ if (ends("eli")) { r("e"); break; }
+ if (ends("ousli")) { r("ous"); break; }
+ break;
+ case 'o': if (ends("ization")) { r("ize"); break; }
+ if (ends("ation")) { r("ate"); break; }
+ if (ends("ator")) { r("ate"); break; }
+ break;
+ case 's': if (ends("alism")) { r("al"); break; }
+ if (ends("iveness")) { r("ive"); break; }
+ if (ends("fulness")) { r("ful"); break; }
+ if (ends("ousness")) { r("ous"); break; }
+ break;
+ case 't': if (ends("aliti")) { r("al"); break; }
+ if (ends("iviti")) { r("ive"); break; }
+ if (ends("biliti")) { r("ble"); break; }
+ break;
+ case 'g': if (ends("logi")) { r("log"); break; }
+ } }
+
+ /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+ private final void step4() { switch (b[k])
+ {
+ case 'e': if (ends("icate")) { r("ic"); break; }
+ if (ends("ative")) { r(""); break; }
+ if (ends("alize")) { r("al"); break; }
+ break;
+ case 'i': if (ends("iciti")) { r("ic"); break; }
+ break;
+ case 'l': if (ends("ical")) { r("ic"); break; }
+ if (ends("ful")) { r(""); break; }
+ break;
+ case 's': if (ends("ness")) { r(""); break; }
+ break;
+ } }
+
+ /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+ private final void step5()
+ { if (k == 0) return; /* for Bug 1 */ switch (b[k-1])
+ { case 'a': if (ends("al")) break; return;
+ case 'c': if (ends("ance")) break;
+ if (ends("ence")) break; return;
+ case 'e': if (ends("er")) break; return;
+ case 'i': if (ends("ic")) break; return;
+ case 'l': if (ends("able")) break;
+ if (ends("ible")) break; return;
+ case 'n': if (ends("ant")) break;
+ if (ends("ement")) break;
+ if (ends("ment")) break;
+ /* element etc. not stripped before the m */
+ if (ends("ent")) break; return;
+ case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
+ /* j >= 0 fixes Bug 2 */
+ if (ends("ou")) break; return;
+ /* takes care of -ous */
+ case 's': if (ends("ism")) break; return;
+ case 't': if (ends("ate")) break;
+ if (ends("iti")) break; return;
+ case 'u': if (ends("ous")) break; return;
+ case 'v': if (ends("ive")) break; return;
+ case 'z': if (ends("ize")) break; return;
+ default: return;
+ }
+ if (m() > 1) k = j;
+ }
+
+ /* step6() removes a final -e if m() > 1. */
+
+ private final void step6()
+ { j = k;
+ if (b[k] == 'e')
+ { int a = m();
+ if (a > 1 || a == 1 && !cvc(k-1)) k--;
+ }
+ if (b[k] == 'l' && doublec(k) && m() > 1) k--;
+ }
+
+ /** Stem the word placed into the Stemmer buffer through calls to add().
+ * Returns true if the stemming process resulted in a word different
+ * from the input. You can retrieve the result with
+ * getResultLength()/getResultBuffer() or toString().
+ */
+ public void stem()
+ { k = i - 1;
+ if (k > 1) { step1(); step2(); step3(); step4(); step5(); step6(); }
+ i_end = k+1; i = 0;
+ }
+
+ /** Test program for demonstrating the Stemmer. It reads text from a
+ * a list of files, stems each word, and writes the result to standard
+ * output. Note that the word stemmed is expected to be in lower case:
+ * forcing lower case must be done outside the Stemmer class.
+ * Usage: Stemmer file-name file-name ...
+ */
+ public static void main(String[] args)
+ {
+ char[] w = new char[501];
+ Stemmer s = new Stemmer();
+ for (int i = 0; i < args.length; i++)
+ try
+ {
+ FileInputStream in = new FileInputStream(args[i]);
+
+ try
+ { while(true)
+
+ { int ch = in.read();
+ if (Character.isLetter((char) ch))
+ {
+ int j = 0;
+ while(true)
+ { ch = Character.toLowerCase((char) ch);
+ w[j] = (char) ch;
+ if (j < 500) j++;
+ ch = in.read();
+ if (!Character.isLetter((char) ch))
+ {
+ /* to test add(char ch) */
+ for (int c = 0; c < j; c++) s.add(w[c]);
+
+ /* or, to test add(char[] w, int j) */
+ /* s.add(w, j); */
+
+ s.stem();
+ { String u;
+
+ /* and now, to test toString() : */
+ u = s.toString();
+
+ /* to test getResultBuffer(), getResultLength() : */
+ /* u = new String(s.getResultBuffer(), 0, s.getResultLength()); */
+
+ System.out.print(u);
+ }
+ break;
+ }
+ }
+ }
+ if (ch < 0) break;
+ System.out.print((char)ch);
+ }
+ }
+ catch (IOException e)
+ { System.out.println("error reading " + args[i]);
+ break;
+ }
+ }
+ catch (FileNotFoundException e)
+ { System.out.println("file " + args[i] + " not found");
+ break;
+ }
+ }
+}
diff --git a/src/Chapter4/util/BetweennessScorer.java b/src/Chapter4/util/BetweennessScorer.java
new file mode 100644
index 0000000..0926d34
--- /dev/null
+++ b/src/Chapter4/util/BetweennessScorer.java
@@ -0,0 +1,25 @@
+package util;
+
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
+import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath;
+import edu.uci.ics.jung.graph.Graph;
+import edu.uci.ics.jung.graph.Hypergraph;
+
+public class BetweennessScorer implements VertexScorer<UserNode, Double>{
+
+ public BetweennessScorer(Hypergraph<UserNode, RetweetEdge> graph){
+ /*
+ * Step 1: Calculate the shortest path between each pair of nodes.
+ */
+ DijkstraShortestPath<UserNode, RetweetEdge> paths = new DijkstraShortestPath<UserNode, RetweetEdge>((Graph<UserNode, RetweetEdge>) graph);
+// paths.getDistance(source, target);
+ }
+
+ public Double getVertexScore(UserNode arg0) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/src/Chapter4/util/EigenVectorScorer.java b/src/Chapter4/util/EigenVectorScorer.java
new file mode 100644
index 0000000..da0c1a8
--- /dev/null
+++ b/src/Chapter4/util/EigenVectorScorer.java
@@ -0,0 +1,64 @@
+package Chapter4.util;
+
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+import cern.colt.matrix.DoubleMatrix2D;
+import cern.colt.matrix.impl.SparseDoubleMatrix2D;
+import cern.colt.matrix.linalg.EigenvalueDecomposition;
+import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
+import edu.uci.ics.jung.graph.Hypergraph;
+
+/**
+ * This is a Jung Node Scorer that computes the Eigenvector Centrality for each node.
+ */
+public class EigenVectorScorer implements VertexScorer<UserNode, Double> {
+
+ private UserNode[] users;
+ private DoubleMatrix2D eigenVectors;
+ private int dominantEigenvectorIdx;
+
+ public EigenVectorScorer(Hypergraph<UserNode, RetweetEdge> graph){
+ users = new UserNode[graph.getVertexCount()];
+ graph.getVertices().toArray(users);
+
+ /* Step 1: Create the adjacency matrix.
+ *
+ * An adjacency matrix is a matrix with N users and N columns,
+ * where N is the number of nodes in the network.
+ * An entry in the matrix is 1 when node i connects to node j,
+ * and 0 otherwise.
+ */
+ SparseDoubleMatrix2D matrix = new SparseDoubleMatrix2D(users.length, users.length);
+ for(int i = 0; i < users.length; i++){
+ for(int j = 0; j < users.length; j++){
+ matrix.setQuick(i, j, graph.containsEdge(new RetweetEdge(users[i], users[j])) ? 1 : 0);
+ }
+ }
+
+ /* Step 2: Find the principle eigenvector.
+ * For more information on eigen-decomposition please see
+ * http://mathworld.wolfram.com/EigenDecomposition.html
+ */
+ EigenvalueDecomposition eig = new EigenvalueDecomposition(matrix);
+ DoubleMatrix2D eigenVals = eig.getD();
+ eigenVectors = eig.getV();
+
+ dominantEigenvectorIdx = 0;
+ for(int i = 1; i < eigenVals.columns(); i++){
+ if(eigenVals.getQuick(dominantEigenvectorIdx, dominantEigenvectorIdx) <
+ eigenVals.getQuick(i, i)){
+ dominantEigenvectorIdx = i;
+ }
+ }
+ }
+
+ public Double getVertexScore(UserNode arg0) {
+ for(int i = 0; i < users.length; i++){
+ if(users[i].equals(arg0)){
+ return Math.abs(eigenVectors.getQuick(i, dominantEigenvectorIdx));
+ }
+ }
+ return null;
+ }
+
+}
diff --git a/src/Chapter4/util/InDegreeScorer.java b/src/Chapter4/util/InDegreeScorer.java
new file mode 100644
index 0000000..014adc6
--- /dev/null
+++ b/src/Chapter4/util/InDegreeScorer.java
@@ -0,0 +1,30 @@
+package Chapter4.util;
+
+import edu.uci.ics.jung.algorithms.scoring.VertexScorer;
+import edu.uci.ics.jung.graph.Hypergraph;
+
+/**
+ * This is a Jung Node Scorer that computes the
+ * In-Degree Centrality of nodes.
+ */
+public class InDegreeScorer<T> implements VertexScorer<T, Double>{
+
+ //The graph representation in JUNG.
+ private Hypergraph<T, ?> graph;
+
+ /**
+ * Initialize the graph scorer.
+ * @param graph
+ * The graph we wish to score.
+ */
+ public InDegreeScorer(Hypergraph<T, ?> graph){
+ this.graph = graph;
+ }
+
+ /**
+ * @return The In-Degree Centrality of the vertex.
+ */
+ public Double getVertexScore(T node) {
+ return (double) graph.getInEdges(node).size();
+ }
+} \ No newline at end of file
diff --git a/src/Chapter4/util/TweetFileProcessor.java b/src/Chapter4/util/TweetFileProcessor.java
new file mode 100644
index 0000000..9b6b99c
--- /dev/null
+++ b/src/Chapter4/util/TweetFileProcessor.java
@@ -0,0 +1,76 @@
+package Chapter4.util;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.Iterator;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class TweetFileProcessor implements Iterator<JSONObject>{
+
+ protected BufferedReader fileBuffer;
+ protected boolean endOfFile;
+ protected String nextLine;
+
+ public TweetFileProcessor(File f){
+
+ endOfFile = false;
+
+ InputStreamReader isr;
+ BufferedReader br = null;
+ try {
+ isr = new InputStreamReader(new FileInputStream(f), "UTF-8");
+ br = new BufferedReader(isr);
+ nextLine = br.readLine();
+ } catch (UnsupportedEncodingException e) {
+ e.printStackTrace();
+ endOfFile = true;
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ endOfFile = true;
+ } catch (IOException e) {
+ e.printStackTrace();
+ endOfFile = true;
+ }
+ finally{
+ fileBuffer = br;
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !endOfFile;
+ }
+
+ @Override
+ public JSONObject next() {
+ JSONObject obj = null;
+ try {
+ obj = new JSONObject(nextLine);
+ } catch (JSONException ex) {
+ Logger.getLogger(TweetFileProcessor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ nextLine = fileBuffer.readLine();
+ if(nextLine == null){
+ endOfFile = true;
+ }
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return obj;
+ }
+
+ @Override
+ public void remove() throws UnsupportedOperationException{
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/src/Chapter4/util/TweetFileToGraph.java b/src/Chapter4/util/TweetFileToGraph.java
new file mode 100644
index 0000000..6cf2e3a
--- /dev/null
+++ b/src/Chapter4/util/TweetFileToGraph.java
@@ -0,0 +1,77 @@
+package Chapter4.util;
+
+import java.io.File;
+
+import GraphElements.RetweetEdge;
+import GraphElements.UserNode;
+
+import edu.uci.ics.jung.graph.DirectedGraph;
+import edu.uci.ics.jung.graph.DirectedSparseGraph;
+import edu.uci.ics.jung.graph.util.EdgeType;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+/**
+ * Some basic functionality to convert files collected
+ * in Chapter 2 to JUNG graphs.
+ */
+public class TweetFileToGraph {
+
+ public static DirectedGraph<UserNode, RetweetEdge> getRetweetNetwork(File tweetFile){
+
+ JSONObject tmp;
+
+ TweetFileProcessor tfp = new TweetFileProcessor(tweetFile);
+ DirectedSparseGraph<UserNode, RetweetEdge> dsg = new DirectedSparseGraph<UserNode, RetweetEdge>();
+
+ while (tfp.hasNext()){
+ tmp = tfp.next();
+ if(tmp==null)
+ {
+ continue;
+ }
+ //get the author
+ String user=null;
+ try {
+ user = tmp.getJSONObject("user").getString("screen_name");
+ } catch (JSONException ex) {
+ Logger.getLogger(TweetFileToGraph.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ if(user==null)
+ {
+ continue;
+ }
+ //get the retweeted user
+ try{
+ JSONObject retweet = tmp.getJSONObject("retweeted_status");
+ String retweeted_user = retweet.getJSONObject("user").getString("screen_name");
+
+ //make an edge or increment the weight if it exists.
+ UserNode toUser = new UserNode(retweeted_user);
+ UserNode fromUser = new UserNode(user);
+
+ dsg.addVertex(toUser);
+ dsg.addVertex(fromUser);
+
+ RetweetEdge edge = new RetweetEdge(toUser, fromUser);
+
+ if(dsg.containsEdge(edge)){
+ dsg.findEdge(fromUser, toUser).incrementRTCount();
+ }
+ else{
+ dsg.addEdge(edge, fromUser, toUser);
+ }
+ dsg.addEdge(edge, fromUser, toUser, EdgeType.DIRECTED);
+ }
+ catch(JSONException ex){
+ //the tweet is not a retweet. this is not a problem.
+ }
+
+
+ }
+
+ return dsg;
+ }
+}
diff --git a/src/Chapter5/network/CreateD3Network.java b/src/Chapter5/network/CreateD3Network.java
new file mode 100644
index 0000000..d4c25af
--- /dev/null
+++ b/src/Chapter5/network/CreateD3Network.java
@@ -0,0 +1,716 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter5.network;
+
+
+import Chapter5.support.HashTagDS;
+import Chapter5.support.NetworkNode;
+import Chapter5.support.NodeIDComparator;
+import Chapter5.support.NodeSizeComparator;
+import Chapter5.support.ToNodeInfo;
+import Chapter5.support.Tweet;
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import utils.TextUtils;
+
+/**
+ *
+ * @author shamanth
+ */
+public class CreateD3Network
+{
+ static final String DEF_INFILENAME = "ows.json";
+ private String RTPATTERN = "rt @[_a-zA-Z0-9]+";
+ private final int DEFAULT_NODE_SIZE = 0;
+// private final int NODE_COUNT_LIMIT = 1;
+ //private final String[] node_color_scheme = new String[]{"#FFFFD9","#EDF8B1","#C7E9B4","#7FCDBB","#41B6C4","#1D91C0","#225EA8","#253494","#081D58"};
+ //private final String[] node_color_scheme = new String[]{"#A6BDDB","#74A9CF","#3690C0","#0570B0","#045A8D","#023858"};
+
+ /**
+ * Extracts the users who have been retweeted using the RTPATTERN
+ * @param text
+ * @return
+ */
+ public ArrayList<String> GetRTUsers(String text)
+ {
+ Pattern p = Pattern.compile(RTPATTERN, Pattern.CASE_INSENSITIVE);
+ Matcher m = p.matcher(text);
+ ArrayList<String> rtusers = new ArrayList<String>();
+ while(m.find())
+ {
+ String nuser = text.substring(m.start(),m.end());
+ nuser = nuser.replaceAll("rt @|RT @", "");
+// nuser = nuser.replaceAll("RT @", "");
+ rtusers.add(nuser.toLowerCase());
+ }
+ return rtusers;
+ }
+
+ /**
+ * Identifies the category to which the tweet belongs. Each category is defined by a group of words/hashtags
+ * @param tweet
+ * @param usercategories
+ * @return
+ */
+ public int GetCategory(String tweet, HashTagDS[] usercategories)
+ {
+ HashMap<Integer,Integer> categoryvotes = new HashMap<Integer,Integer>();
+ tweet = tweet.toLowerCase();
+ int i=0;
+ for(HashTagDS cat:usercategories)
+ {
+
+ for(String s :cat.tags)
+ {
+ if(tweet.indexOf(s)!=-1)
+ {
+ if(categoryvotes.containsKey(i))
+ {
+ categoryvotes.put(i, categoryvotes.get(i)+1);
+ }
+ else
+ {
+ categoryvotes.put(i, 1);
+ }
+ }
+ }
+ i++;
+ }
+ Set<Integer> keyset = categoryvotes.keySet();
+ int maxvote = 0;
+ //by default the tweet will be in the first category
+ int maxcategoryindex = 0;
+ for(int key:keyset)
+ {
+ if(categoryvotes.get(key)>maxvote)
+ {
+ maxvote = categoryvotes.get(key);
+ maxcategoryindex = key;
+ }
+ }
+ return maxcategoryindex;
+ }
+
+ /**
+ * Converts the input jsonobject containing category descriptions to an array for processing.
+ * @param hashtagcoll JSONObject containing the list of hashtags, color, and the topic information
+ * @return An array of hashtags
+ */
+ public HashTagDS[] ConvertJSONArrayToArray(JSONObject hashtagcoll)
+ {
+ HashTagDS[] hashtags = new HashTagDS[hashtagcoll.length()];
+ int j=0;
+ try{
+ if(hashtagcoll!=null)
+ {
+ Iterator keyit = hashtagcoll.keys();
+ while(keyit.hasNext())
+ {
+ HashTagDS ht = new HashTagDS();
+ JSONObject tags = (JSONObject) hashtagcoll.get((String)keyit.next());
+ ht.groupname = keyit.toString();
+ ht.color = tags.getString("color");
+ JSONArray tagjson = tags.getJSONArray("hts");
+ ht.tags = new String[tagjson.length()];
+ for(int i=0;i<tagjson.length();i++)
+ {
+ ht.tags[i] = tagjson.getString(i);
+ }
+ hashtags[j++] = ht;
+ }
+ }
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ return hashtags;
+ }
+
+ /**
+ * Identifies the category of a node based on the content of his tweets(each tweet can be assigned a category based on it's text). A simple majority is sufficient to make this decision.
+ * @param tnfs
+ * @param hashtagarray
+ * @return
+ */
+ public int GetMajorityTopicColor(NetworkNode tnfs,HashTagDS[] hashtagarray)
+ {
+ HashMap<Integer,Integer> catcount = new HashMap<Integer,Integer>();
+ //if the node has no tolinks then look at the node that it retweeted to decide the color of the node
+ for(String tweet:tnfs.data)
+ {
+ int id = this.GetCategory(tweet, hashtagarray);
+ if(catcount.containsKey(id))
+ {
+ catcount.put(id, catcount.get(id)+1);
+ }
+ else
+ catcount.put(id, 1);
+ }
+ Set<Integer> keys = catcount.keySet();
+ int maxcatID = -1;
+ int maxcount = 0;
+ for(int k:keys)
+ {
+ if(maxcatID==-1)
+ {
+ maxcatID = k;
+ maxcount = catcount.get(k);
+ }
+ else
+ {
+ if(maxcount<catcount.get(k))
+ {
+ maxcount = catcount.get(k);
+ maxcatID = k;
+ }
+ }
+ }
+ return maxcatID;
+ }
+
+ /**
+ * Takes as input a JSON file and reads through the file sequentially to process and create a retweet network from the tweets.
+ * @param inFilename
+ * @param numNodeClasses
+ * @param hashtags category info containing hashtags
+ * @param num_nodes number of seed nodes to be included in the network
+ * @return a JSONObject consisting of nodes and links of the network
+ */
+ public JSONObject ConvertTweetsToDiffusionPath(String inFilename,int numNodeClasses,
+ JSONObject hashtags, int num_nodes)
+ {
+ HashMap<String,NetworkNode> userconnections = new HashMap<String,NetworkNode>();
+// HashMap<String,Integer> tweet_class_codes = new HashMap<String,Integer>();
+// int tweet_class_counter = 1;
+ HashTagDS[] hashtagarray = ConvertJSONArrayToArray(hashtags);
+ BufferedReader br = null;
+ try{
+ br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ JSONObject tweetobj;
+ try {
+ tweetobj = new JSONObject(temp);
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ continue;
+ }
+ //Extract the tweet first
+ Tweet t = new Tweet();
+ String text="";
+ try {
+ text = TextUtils.GetCleanText(tweetobj.getString("text")).toLowerCase();
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ continue;
+ }
+ //Check that the tweet matches at least one of the topics
+ boolean groupmatch = false;
+ for(HashTagDS ht:hashtagarray)
+ {
+ String[] tags = ht.tags;
+ for(String tg:tags)
+ {
+ if(text.contains(tg))
+ {
+ groupmatch = true;
+ break;
+ }
+ }
+ if(groupmatch)
+ {
+ break;
+ }
+ }
+ if(!groupmatch)
+ {
+ continue;
+ }
+ //
+ ArrayList<String> fromusers = new ArrayList<String>();
+ if(!tweetobj.isNull("retweeted_status"))
+ {
+ JSONObject rtstatus;
+ try {
+ rtstatus = tweetobj.getJSONObject("retweeted_status");
+ if(rtstatus.isNull("user"))
+ {
+ JSONObject rtuserobj = rtstatus.getJSONObject("user");
+ try{
+ fromusers.add(rtuserobj.get("screen_name").toString());
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ else
+ {
+ //use the tweet text to retrieve the pattern "RT @username:"
+ fromusers = GetRTUsers(text);
+ }
+ if(fromusers.isEmpty())
+ {
+ continue;
+ }
+
+ //identify the class values to be applied to all the nodes and
+ //edges.
+// String prunedtext = TextUtils.RemoveTwitterElements(text);
+// Integer class_code = tweet_class_codes.get(prunedtext);
+// if(class_code==null)
+// {
+// class_code = tweet_class_counter;
+// tweet_class_codes.put(prunedtext, class_code); //set the unique id for this tweet
+// tweet_class_counter++;
+// }
+ t.text = TextUtils.RemoveRTElements(text);
+ if(!tweetobj.isNull("user"))
+ {
+ JSONObject userobj;
+ try {
+ userobj = tweetobj.getJSONObject("user");
+ t.user = userobj.getString("screen_name").toLowerCase();
+ } catch (JSONException ex) {
+ Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+// try {
+// t.pubdate = String.valueOf(tweetobj.get("timestamp"));
+// } catch (JSONException ex) {
+// Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
+// }
+ t.catColor = hashtagarray[t.catID].color;
+ //update the size of the from fromuser
+ int cur_level = 0;
+ for(int i=fromusers.size()-1;i>=0;i--)
+ {
+ String touser = "";
+ if(i==0)
+ {//if this is the last user in the retweet sequence then use the user of the tweet as the next link
+ touser = t.user;
+ }
+ else
+ { //if there are still fromuser in the retweet chain then use them as the next link
+ touser = fromusers.get(i-1);
+ }
+ //don't add any selflinks
+ if(fromusers.get(i).equals(touser))
+ {
+ continue;
+ }
+ NetworkNode fromuser = null;
+ if(userconnections.containsKey(fromusers.get(i)))
+ {
+ //from node already exists simply add this new connection to it
+ fromuser = userconnections.get(fromusers.get(i));
+ }
+ else
+ {
+ //the from user was not found. add the node
+ fromuser = new NetworkNode();
+ // fromuser.id = nodeid++;
+ fromuser.username = fromusers.get(i);
+ fromuser.tonodes = new ArrayList<ToNodeInfo>();
+ fromuser.class_codes = new ArrayList<Integer>();
+ fromuser.size = DEFAULT_NODE_SIZE;
+ fromuser.level = cur_level;
+ fromuser.data = new ArrayList<String>();
+ fromuser.data.add(t.text);
+ //fromuser.category = ;
+ }
+// if(!fromuser.class_codes.contains(class_code))
+// {
+// //add the marker to from node if it does not have it already
+// fromuser.class_codes.add(class_code);
+// }
+ //if to node is not in the list then create it
+ NetworkNode tonode = null;
+ if(!userconnections.containsKey(touser))
+ {
+ tonode = new NetworkNode();
+ // System.out.println(touser+" "+nodeid);
+ // tonode.id= nodeid++;
+ tonode.username = touser;
+ tonode.tonodes= new ArrayList<ToNodeInfo>();
+ tonode.class_codes = new ArrayList<Integer>();
+ tonode.catID = t.catID;
+ tonode.catColor = t.catColor;
+ tonode.size = DEFAULT_NODE_SIZE;
+ tonode.data= new ArrayList<String>();
+ tonode.data.add(t.text);
+ tonode.level = cur_level+1;
+ //add the classcode to the node if it doesn't already exist
+// if(!tonode.class_codes.contains(class_code))
+// {
+// tonode.class_codes.add(class_code);
+// }
+ //add the touser info
+ userconnections.put(touser, tonode);
+ }
+ else
+ {
+ tonode = userconnections.get(touser);
+ tonode.data.add(t.text);
+ if(tonode.level<cur_level+1)
+ {
+ tonode.level = cur_level;
+ }
+ //add the classcode to the node if it doesn't already exist
+// if(!tonode.class_codes.contains(class_code))
+// {
+// tonode.class_codes.add(class_code);
+// }
+ }
+ ToNodeInfo inf = new ToNodeInfo();
+ inf.tonodeid = tonode.id;
+ inf.text = t.text;
+// inf.date = t.pubdate;
+// inf.class_code = class_code;
+ inf.tousername = touser;
+ inf.catID = t.catID;
+ inf.catColor = t.catColor;
+ fromuser.tonodes.add(inf);
+ //update from node size
+ fromuser.size++;
+ //add back updated fromuser
+ userconnections.put(fromusers.get(i), fromuser);
+ //update the level for next iteration
+ cur_level++;
+ }
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }
+ Set<String> keys = userconnections.keySet();
+ ArrayList<NetworkNode> returnnodes = new ArrayList<NetworkNode>();
+ //its +1 because nodes with size 0 are not going to be used to calculate the class
+ int min = DEFAULT_NODE_SIZE+1;
+ int max = DEFAULT_NODE_SIZE+1;
+ for(String k:keys)
+ {
+ NetworkNode n = userconnections.get(k);
+ int maxcat = GetMajorityTopicColor(n,hashtagarray);
+ n.catID = maxcat;
+ n.catColor = hashtagarray[maxcat].color;
+ userconnections.put(k, n);
+ //
+// if(n.size==0)
+// {//mark the node as a zero node
+// n.class_codes.add(-1);
+// }
+// else
+// {
+ if(n.size>max)
+ {
+ max = n.size;
+ }
+ if(n.size<min)
+ {
+ min = n.size;
+ }
+// }
+ returnnodes.add(n);
+ }
+ //create node groups to assign unique colors to nodes in different Categories based upon the number of connections
+ ArrayList<NetworkNode> nodes = ComputeGroupsSqrt(returnnodes, max, min, numNodeClasses);
+ Collections.sort(nodes,Collections.reverseOrder(new NodeSizeComparator()));
+ //select how many nodes to show.
+ int nodes_to_visit = 0;
+ if(nodes.size()>=num_nodes)
+ {
+ nodes_to_visit = num_nodes;
+ }
+ else
+ {
+ nodes_to_visit = nodes.size();
+ }
+
+ HashMap<String,NetworkNode> prunednodes = new HashMap<String,NetworkNode>();
+ HashMap<String,Integer> nodeidlist = new HashMap<String,Integer>();
+ int nodeid = 0; //node nodeid counter
+ for(int k=0;k<nodes_to_visit;k++)
+ {
+ NetworkNode nd = nodes.get(k);
+// System.out.println("visiting node "+nd.username);
+ nd.level = 0;
+ HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections,nd,new HashMap<String,NetworkNode>());
+ Set<String> names = rtnodes.keySet();
+ for(String n:names)
+ {
+ if(!prunednodes.containsKey(n))
+ {
+ NetworkNode newnode = rtnodes.get(n);
+ if(newnode.size>0)
+ {
+ prunednodes.put(n, newnode);
+ nodeidlist.put(n, nodeid++);
+ }
+ }
+ }
+ }
+
+ /** We now have all the nodes of the network. compute their ids sequentially
+ * and assign them to the respective nodes. Simultaneously compact the nodes
+ * of the network to remove all nodes which have not been retweeted and are
+ * of size 0
+ */
+
+ Set<String> allnodes = prunednodes.keySet();
+// System.out.println(prunednodes.size());
+ ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>();
+// HashMap<Integer,ArrayList<Integer>> conninfo = new HashMap<Integer,ArrayList<Integer>>();
+ for(String n:allnodes)
+ {
+ NetworkNode nd = prunednodes.get(n);
+ nd.id = nodeidlist.get(nd.username);
+ ArrayList<Integer> connids = new ArrayList<Integer>();
+// ArrayList<ToNodeInfo> compact_To_nodes = new ArrayList<ToNodeInfo>();
+ int counter = 0;
+ for(ToNodeInfo tnf: nd.tonodes)
+ {
+ //user has never been retweeted. the chain terminates here, so remove it
+ if(nodeidlist.containsKey(tnf.tousername))
+ {
+ tnf.tonodeid = nodeidlist.get(tnf.tousername);
+ connids.add(tnf.tonodeid);
+ nd.tonodes.set(counter, tnf);
+ counter++;
+ }
+ }
+ finalnodes.add(nd);
+ //store the connections to compute the clusterids later
+// if(!conninfo.containsKey(nd.id))
+// {
+// conninfo.put(nd.id, connids);
+// }
+ }
+ //generate the clusterids
+// ArrayList<Integer>[] clusterids = (ArrayList<Integer>[])new ArrayList[allnodes.size()];
+// Set<Integer> idkeys = conninfo.keySet();
+// for(int id:idkeys)
+// {
+// for(int x:conninfo.get(id))
+// {
+// if(clusterids[x]==null)
+// {
+// ArrayList<Integer> toclusterid = new ArrayList<Integer>();
+// toclusterid.add(id);
+// clusterids[x] = toclusterid;
+// }
+// else
+// {
+// ArrayList<Integer> toclusterid = clusterids[x];
+// if(!toclusterid.contains(id))
+// {
+// toclusterid.add(id);
+// clusterids[x] = toclusterid;
+// }
+// }
+// }
+// }
+ //now create the final node list with the clusterids
+// for(String n:allnodes)
+// {
+// NetworkNode nd = prunednodes.get(n);
+// ArrayList<Integer> cids = clusterids[nd.id];
+// if(cids!=null)
+// {
+// int size = cids.size();
+// nd.clusterID = new int[size+1];
+// int counter=0;
+// nd.clusterID[counter++] = nd.id;
+// for(int c:cids)
+// {
+// nd.clusterID[counter++] = c;
+// }
+// }
+ //System.out.println(nd.class_codes.toString());
+// finalnodes.add(nd);
+// }
+ Collections.sort(finalnodes,new NodeIDComparator());
+ System.out.println(finalnodes.size());
+ for(NetworkNode node:finalnodes)
+ {
+ System.out.println(node.id+" "+node.username+" "+node.level+" "+node.size+" "+node.catColor+node.data.get(0));
+ }
+ return GetD3Structure(finalnodes);
+ }
+
+ /**
+ * Creates a D3 representation of the nodes, consisting of two JSONArray a set of nodes and a set of links between the nodes
+ * @param finalnodes
+ * @return
+ */
+ public JSONObject GetD3Structure(ArrayList<NetworkNode> finalnodes)
+ {
+ JSONObject alltweets = new JSONObject();
+ try {
+ JSONArray nodes = new JSONArray();
+ JSONArray links = new JSONArray();
+ for (NetworkNode node : finalnodes)
+ {
+ try {
+ //create adjacencies
+ JSONArray nodedata = new JSONArray();
+ for (ToNodeInfo tnf : node.tonodes) {
+ JSONObject jsadj = new JSONObject();
+ jsadj.put("source", node.id);
+ jsadj.put("target", tnf.tonodeid);
+ //weight of the edge
+ jsadj.put("value", 1);
+ //class code is a unique id corresponding to the text
+ jsadj.put("data", tnf.class_code);
+ links.put(jsadj);
+ //create a data object for the node
+ JSONObject jsdata = new JSONObject();
+ jsdata.put("tonodeid", tnf.tonodeid);
+ jsdata.put("nodefrom", node.username);
+ jsdata.put("nodeto", tnf.tousername);
+ jsdata.put("tweet", tnf.text);
+// jsdata.put("pubtime", tnf.date);
+ //class code for tweet to be used to filter
+// jsdata.put("classcode", tnf.class_code);
+ nodedata.put(jsdata);
+ }
+ //add node
+ JSONObject nd = new JSONObject();
+ nd.put("name", node.username);
+ nd.put("group", node.group);
+ nd.put("id", node.id);
+ nd.put("size", node.size);
+ nd.put("catColor", node.catColor);
+ nd.put("catID", node.catID);
+ nd.put("data", nodedata);
+ nd.put("level", node.level);
+ //clusterids for the node
+// JSONArray cids = new JSONArray();
+// if (node.clusterID != null) {
+// for (int code : node.clusterID) {
+// cids.put(code);
+// }
+// } else {
+// cids.put(node.id);
+// }
+// nd.put("clusterids", cids);
+ //classcodes for the node
+// JSONArray codes = new JSONArray();
+// for (int c : node.class_codes) {
+// codes.put(c);
+// }
+// nd.put("classcodes", codes);
+ nodes.put(nd);
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ }
+ alltweets.put("nodes", nodes);
+ alltweets.put("links", links);
+ } catch (JSONException ex) {
+ Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ return alltweets;
+ }
+
+ /**
+ * Recursively traverses the list of nodes to identify all nodes reachable from a starting node.
+ * @param userconnections A map containing the usernames as keys and the node information as value
+ * @param cur_node Node currently being processed.
+ * @param newnodes A list of nodes which can be reached from the current node
+ * @return A map of the usernames and the node information for all nodes reachable
+ */
+ public HashMap<String,NetworkNode> GetNextHopConnections(HashMap<String,NetworkNode> userconnections,NetworkNode cur_node,HashMap<String,NetworkNode> newnodes)
+ {
+ cur_node.level = cur_node.level+1;
+ newnodes.put(cur_node.username,cur_node);
+ for(int i=0;i<cur_node.tonodes.size();i++)
+ {
+ ToNodeInfo tnf = cur_node.tonodes.get(i);
+ if(newnodes.containsKey(tnf.tousername))
+ {
+ continue;
+ }
+
+ HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections, userconnections.get(tnf.tousername),newnodes);
+ newnodes = rtnodes;
+ }
+ return newnodes;
+ }
+
+ /**
+ * Divides a list of nodes into groups using the square root binning
+ * technique. If a node has size x and there are y groups in total. Then the
+ * group of the node is computed as ceil((sqrt(x)/sqrt(max))*y), where max is
+ * the size of the largest node.
+ * @param nodes A list of nodes
+ * @param max The maximum size of a node
+ * @param min The minimum size of a node
+ * @param noofclasses Number of classes into which the nodes must be classified
+ * @return A list of nodes along with their class
+ */
+ public ArrayList<NetworkNode> ComputeGroupsSqrt(ArrayList<NetworkNode> nodes, int max, int min, int noofclasses)
+ {
+ ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>();
+ for(int i=0;i<nodes.size();i++)
+ {
+ NetworkNode node = nodes.get(i);
+ int color_index = 0;
+ if(node.size>0)
+ {
+ color_index = (int) Math.ceil(((double)Math.sqrt(node.size)/Math.sqrt(max))*noofclasses)-1;
+// node.size = color_index*6;
+ }
+ node.group = color_index;
+ finalnodes.add(node);
+ }
+ return finalnodes;
+ }
+
+
+ //DEBUG use only
+ public static void main(String[] args)
+ {
+ try {
+ CreateD3Network cdn = new CreateD3Network();
+ JSONObject jobj = new JSONObject();
+ JSONObject obj = new JSONObject();
+ obj.put("color", "#800000");
+ JSONArray ja = new JSONArray();
+ ja.put("zuccotti");
+ obj.put("hts", ja);
+ jobj.put("Group 1", obj);
+ obj = new JSONObject();
+ obj.put("color", "#0FFF00");
+ ja = new JSONArray();
+ ja.put("#nypd");
+ obj.put("hts", ja);
+ jobj.put("Group 2", obj);
+ String filename = "D:\\Twitter Data Analytics\\Data\\testows.json";
+ JSONObject nodes = cdn.ConvertTweetsToDiffusionPath(filename,7, jobj,5);
+ } catch (JSONException ex) {
+ ex.printStackTrace();
+ }
+ }
+}
diff --git a/src/Chapter5/network/ExtractUserTagNetwork.java b/src/Chapter5/network/ExtractUserTagNetwork.java
new file mode 100644
index 0000000..43ae680
--- /dev/null
+++ b/src/Chapter5/network/ExtractUserTagNetwork.java
@@ -0,0 +1,173 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.network;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class ExtractUserTagNetwork
+{
+
+ static final String DEF_INFILENAME = "ows.json";
+
+ /**
+ * Extracts a map of all the hashtags a user has used in his tweets resulting in a bipartite network. The frequency of each tag is also returned in the form of a map.
+ * @param inFilename File containing a list of tweets as JSON objects
+ * @return A map containing the users as keys and a map containing the hashtags they use along with their frequency.
+ */
+ public HashMap<String,HashMap<String,Integer>> ExtractUserHashtagNetwork(String inFilename)
+ {
+ HashMap<String,HashMap<String,Integer>> usertagmap = new HashMap<String,HashMap<String,Integer>>();
+ BufferedReader br = null;
+ try{
+ br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ try{
+ JSONObject tweetobj = new JSONObject(temp);
+ String text;
+ String username;
+ HashMap<String,Integer> tags = new HashMap<String,Integer>();
+ if(!tweetobj.isNull("entities"))
+ {
+ JSONObject entities = tweetobj.getJSONObject("entities");
+ JSONArray hashtags;
+ try {
+ hashtags = entities.getJSONArray("hashtags");
+ for(int i=0;i<hashtags.length();i++)
+ {
+ JSONObject tag = hashtags.getJSONObject(i);
+ String tg = tag.getString("text").toLowerCase();
+ if(!tags.containsKey(tg))
+ {
+ tags.put(tg,1);
+ }
+ else
+ {
+ tags.put(tg, tags.get(tg)+1);
+ }
+ }
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ else
+ if(!tweetobj.isNull("text"))
+ {
+ text = tweetobj.getString("text");
+ tags = ExtractHashTags(text);
+ }
+ if(!tweetobj.isNull("user"))
+ {
+ JSONObject userobj = tweetobj.getJSONObject("user");
+ username = "@"+userobj.getString("screen_name").toLowerCase();
+ if(usertagmap.containsKey(username))
+ {
+ HashMap<String,Integer> usertags = usertagmap.get(username);
+ Set<String> keys = tags.keySet();
+ for(String k:keys)
+ {
+ if(usertags.containsKey(k))
+ {
+ usertags.put(k, usertags.get(k)+tags.get(k));
+ }
+ else
+ {
+ usertags.put(k, tags.get(k));
+ }
+ }
+ usertagmap.put(username, usertags);
+ }
+ else
+ {
+ usertagmap.put(username, tags);
+ }
+ }
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(ExtractUserTagNetwork.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ return usertagmap;
+ }
+
+ /**
+ * Extracts all the hashtags mentioned in a tweet and creates a map with the frequency of their occurrence.
+ * @param text
+ * @return A map containing the hashtags as keys and their frequency as value
+ */
+ public HashMap<String,Integer> ExtractHashTags(String text)
+ {
+ Pattern p = Pattern.compile("#[a-zA-Z0-9]+");
+ Matcher m = p.matcher(text);
+ HashMap<String,Integer> tags = new HashMap<String,Integer>();
+ while(m.find())
+ {
+ String tag = text.substring(m.start(),m.end()).toLowerCase();
+ if(!tags.containsKey(tag))
+ {
+ tags.put(tag,1);
+ }
+ else
+ {
+ tags.put(tag, tags.get(tag)+1);
+ }
+ }
+ return tags;
+ }
+
+ public static void main(String[] args)
+ {
+ ExtractUserTagNetwork eutn = new ExtractUserTagNetwork();
+
+ String infilename = DEF_INFILENAME;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ }
+ HashMap<String, HashMap<String,Integer>> usertagmap = eutn.ExtractUserHashtagNetwork(infilename);
+ Set<String> keys = usertagmap.keySet();
+ for(String key:keys)
+ {
+ System.out.println(key);
+ HashMap<String,Integer> tags = usertagmap.get(key);
+ Set<String> tagkeys = tags.keySet();
+ for(String tag:tagkeys)
+ {
+ System.out.println(tag+","+tags.get(tag));
+ }
+ }
+ }
+}
diff --git a/src/Chapter5/support/DateInfo.java b/src/Chapter5/support/DateInfo.java
new file mode 100644
index 0000000..9a32d4c
--- /dev/null
+++ b/src/Chapter5/support/DateInfo.java
@@ -0,0 +1,30 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.support;
+
+import java.util.Date;
+import java.util.HashMap;
+
+public class DateInfo implements Comparable
+{
+ public Date d;
+ public HashMap<String,Integer> catcounts = new HashMap<String,Integer>();
+
+ public int compareTo(Object o) {
+ DateInfo temp = (DateInfo) o;
+ if(temp.d.after(this.d))
+ {
+ return 1;
+ }
+ else
+ if(temp.d.before(this.d))
+ {
+ return -1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+}
diff --git a/src/Chapter5/support/HashTagDS.java b/src/Chapter5/support/HashTagDS.java
new file mode 100644
index 0000000..b338b6d
--- /dev/null
+++ b/src/Chapter5/support/HashTagDS.java
@@ -0,0 +1,18 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter5.support;
+
+/**
+ *
+ * @author shamanth
+ */
+public class HashTagDS
+{
+ public String groupname;
+ public String[] tags;
+ public String color;
+
+}
diff --git a/src/Chapter5/support/NetworkNode.java b/src/Chapter5/support/NetworkNode.java
new file mode 100644
index 0000000..4f662e8
--- /dev/null
+++ b/src/Chapter5/support/NetworkNode.java
@@ -0,0 +1,49 @@
+package Chapter5.support;
+
+
+import java.util.ArrayList;
+
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+/**
+ *
+ * @author shamanth
+ */
+public class NetworkNode
+{
+ public int id;
+ public String username;
+ public int size;
+ public String catColor;
+ public int group;
+// public int[] clusterID;
+ public int catID;
+// public double lat;
+// public double lng;
+ public ArrayList<String> data;
+ public int level;
+ public ArrayList<Integer> class_codes;
+ public ArrayList<ToNodeInfo> tonodes;
+
+ public NetworkNode Copy()
+ {
+ NetworkNode tempnode = new NetworkNode();
+ tempnode.catColor = this.catColor;
+ tempnode.id = this.id;
+ tempnode.username= this.username;
+ tempnode.size = this.size;
+ tempnode.group = this.group;
+// tempnode.clusterID = this.clusterID;
+ tempnode.catID = this.catID;
+// tempnode.lat = this.lat;
+// tempnode.lng = this.lng;
+ tempnode.data = this.data;
+// tempnode.level = this.level;
+ tempnode.class_codes = this.class_codes;
+ tempnode.tonodes = this.tonodes;
+ return tempnode;
+ }
+}
diff --git a/src/Chapter5/support/NodeIDComparator.java b/src/Chapter5/support/NodeIDComparator.java
new file mode 100644
index 0000000..0b41ae7
--- /dev/null
+++ b/src/Chapter5/support/NodeIDComparator.java
@@ -0,0 +1,32 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter5.support;
+
+import java.util.Comparator;
+
+/**
+ *
+ * @author shamanth
+ */
+public class NodeIDComparator implements Comparator
+{
+
+ public int compare(Object o1, Object o2) {
+ int id1 = ((NetworkNode) o1).id;
+ int id2 = ((NetworkNode) o2).id;
+ if(id1>id2)
+ {
+ return 1;
+ }
+ else
+ if(id1<id2)
+ return -1;
+ else
+ return 0;
+ }
+
+
+}
diff --git a/src/Chapter5/support/NodeSizeComparator.java b/src/Chapter5/support/NodeSizeComparator.java
new file mode 100644
index 0000000..23ecb4e
--- /dev/null
+++ b/src/Chapter5/support/NodeSizeComparator.java
@@ -0,0 +1,29 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter5.support;
+import java.util.Comparator;
+
+/**
+ *
+ * @author shamanth
+ */
+public class NodeSizeComparator implements Comparator
+{
+ public int compare(Object o1, Object o2)
+ {
+ int size1 = ((NetworkNode) o1).size;
+ int size2 = ((NetworkNode) o2).size;
+ if(size1>size2)
+ {
+ return 1;
+ }
+ if(size1<size2)
+ return -1;
+ else
+ return 0;
+ }
+
+}
diff --git a/src/Chapter5/support/ToNodeInfo.java b/src/Chapter5/support/ToNodeInfo.java
new file mode 100644
index 0000000..725a10a
--- /dev/null
+++ b/src/Chapter5/support/ToNodeInfo.java
@@ -0,0 +1,23 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter5.support;
+
+/**
+ *
+ * @author shamanth
+ */
+public class ToNodeInfo
+{
+ public int tonodeid;
+ public String text;
+ public String tousername;
+ public String date;
+ public int class_code;
+ public int catID;
+ public String catColor;
+ //this is the default direction invert option. If the library adds nodes to the adjacency then that should be set to true in the client side
+// public boolean direction = false;
+}
diff --git a/src/Chapter5/support/Tweet.java b/src/Chapter5/support/Tweet.java
new file mode 100644
index 0000000..be53166
--- /dev/null
+++ b/src/Chapter5/support/Tweet.java
@@ -0,0 +1,21 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package Chapter5.support;
+
+/**
+ *
+ * @author shamanth
+ */
+public class Tweet {
+ public String text;
+ public long id;
+ public double lat;
+ public double lng;
+ public String pubdate;
+ public String user;
+ public int catID;
+ public String catColor;
+}
diff --git a/src/Chapter5/text/EventSummaryExtractor.java b/src/Chapter5/text/EventSummaryExtractor.java
new file mode 100644
index 0000000..e76f42e
--- /dev/null
+++ b/src/Chapter5/text/EventSummaryExtractor.java
@@ -0,0 +1,269 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.text;
+
+import Chapter5.support.DateInfo;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class EventSummaryExtractor
+{
+
+ final String DEF_INFILENAME = "ows.json";
+ HashMap<String,ArrayList<String>> CATEGORIES = new HashMap<String,ArrayList<String>>();
+ SimpleDateFormat twittersdm = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy");
+ SimpleDateFormat dayhoursdm = new SimpleDateFormat("yyyy-MM-dd:HH");
+// SimpleDateFormat daysdm = new SimpleDateFormat("MM/dd/yyyy");
+ SimpleDateFormat hoursdm = new SimpleDateFormat("HH");
+
+ /**
+ *
+ */
+ public void InitializeCategories()
+ {
+ ArrayList<String> people = new ArrayList<String>();
+ people.add("protesters");
+ people.add("people");
+ CATEGORIES.put("People",people);
+ ArrayList<String> police = new ArrayList<String>();
+ police.add("police");
+ police.add("cops");
+ police.add("nypd");
+ police.add("raid");
+ CATEGORIES.put("Police",police);
+ ArrayList<String> media = new ArrayList<String>();
+ media.add("press");
+ media.add("news");
+ media.add("media");
+ CATEGORIES.put("Media",media);
+ ArrayList<String> city = new ArrayList<String>();
+ city.add("nyc");
+ city.add("zucotti");
+ city.add("park");
+ CATEGORIES.put("Location",city);
+ ArrayList<String> judiciary = new ArrayList<String>();
+ judiciary.add("judge");
+ judiciary.add("eviction");
+ judiciary.add("order");
+ judiciary.add("court");
+ CATEGORIES.put("Judiciary", judiciary);
+ }
+
+ /**
+ *
+ * @param filename
+ * @return
+ */
+ public JSONObject ExtractCategoryTrends(String filename)
+ {
+ JSONObject result = new JSONObject();
+ try {
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8"));
+ String temp = "";
+ Set<String> catkeys = CATEGORIES.keySet();
+ HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
+ while((temp = br.readLine())!=null)
+ {
+ Date d = new Date();
+ try {
+ JSONObject jobj = new JSONObject(temp);
+ //Published time
+ if(!jobj.isNull("created_at"))
+ {
+ String time = "";
+ try {
+ time = jobj.getString("created_at");
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ if(time.isEmpty())
+ {
+ continue;
+ }
+ else
+ {
+ try {
+ d = twittersdm.parse(time);
+ } catch (ParseException ex) {
+ continue;
+ }
+ }
+ }
+ else
+ if(!jobj.isNull("timestamp"))
+ {
+ long time = new Date().getTime();
+ try{
+ time = jobj.getLong("timestamp");
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ d = new Date();
+ d.setTime(time);
+ }
+ String datestr = dayhoursdm.format(d);
+ String text = jobj.getString("text").toLowerCase();
+// System.out.println(text);
+ for(String key:catkeys)
+ {
+ ArrayList<String> words = CATEGORIES.get(key);
+ for(String word:words)
+ {
+ if(text.contains(word))
+ {
+ HashMap<String,Integer> categorycount = new HashMap<String,Integer>();
+ if(datecount.containsKey(datestr))
+ {
+ categorycount = datecount.get(datestr);
+ }
+ if(categorycount.containsKey(key))
+ {
+ categorycount.put(key, categorycount.get(key)+1);
+ }
+ else
+ {
+ categorycount.put(key, 1);
+ }
+ //update the categorycount for the specific date
+ datecount.put(datestr, categorycount);
+ break;
+ }
+ }
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ //sort the dates
+ Set<String> datekeys = datecount.keySet();
+ ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
+ for(String date:datekeys)
+ {
+ Date d = null;
+ try {
+ d = dayhoursdm.parse(date);
+ } catch (ParseException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ if(d!=null)
+ {
+ DateInfo info = new DateInfo();
+ info.d = d;
+ info.catcounts = datecount.get(date);
+ dinfos.add(info);
+ }
+ }
+ Collections.sort(dinfos, Collections.reverseOrder());
+ try {
+ result.put("axisxstep", dinfos.size()-1);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ result.put("axisystep", CATEGORIES.size()-1);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ JSONArray xcoordinates = new JSONArray();
+ JSONArray ycoordinates = new JSONArray();
+ //now add the data and the axis labels
+ JSONArray axisxlabels = new JSONArray();
+ JSONArray axisylabels = new JSONArray();
+ JSONArray data = new JSONArray();
+ for(String key:catkeys)
+ {
+ axisylabels.put(key);
+ }
+ //counters to mark the indices of the values added to data field. i is the x coordinate and j is the y coordinate
+ int i=0,j=0;
+
+ for(DateInfo date:dinfos)
+ {
+ String strdate = hoursdm.format(date.d);
+ axisxlabels.put(strdate);
+ HashMap<String,Integer> catcounts = date.catcounts;
+ for(String key:catkeys)
+ {
+ xcoordinates.put(j);
+ ycoordinates.put(i++);
+ if(catcounts.containsKey(key))
+ {
+ data.put(catcounts.get(key));
+ }
+ else
+ {
+ data.put(0);
+ }
+ }
+ //reset the x coordinate as we move to the next y item
+ i=0;
+ j++;
+ }
+ try {
+ result.put("xcoordinates", xcoordinates);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ result.put("ycoordinates", ycoordinates);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ result.put("axisxlabels", axisxlabels);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ result.put("axisylabels", axisylabels);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ try {
+ result.put("data", data);
+ } catch (JSONException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ return result;
+ }
+
+ public static void main(String[] args)
+ {
+ EventSummaryExtractor ese = new EventSummaryExtractor();
+ String infilename = ese.DEF_INFILENAME;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ }
+ ese.InitializeCategories();
+ System.out.println(ese.ExtractCategoryTrends(infilename).toString());
+ }
+}
diff --git a/src/Chapter5/text/ExtractTopKeywords.java b/src/Chapter5/text/ExtractTopKeywords.java
new file mode 100644
index 0000000..8ab412a
--- /dev/null
+++ b/src/Chapter5/text/ExtractTopKeywords.java
@@ -0,0 +1,151 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.text;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import utils.Tags;
+import utils.TextUtils;
+
+public class ExtractTopKeywords
+{
+
+ static final String DEF_INFILENAME = "ows.json";
+ static final int DEF_K = 60;
+
+ /**
+ * Extracts the most frequently occurring keywords from the tweets by processing them sequentially. Stopwords are ignored.
+ * @param inFilename File containing a list of tweets as JSON objects
+ * @param K Count of the top keywords to return
+ * @param ignoreHashtags If true, hashtags are not considered while counting the most frequent keywords
+ * @param ignoreUsernames If true, usernames are not considered while counting the most frequent keywords
+ * @param tu TextUtils object which handles the stopwords
+ * @return a JSONArray containing an array of JSONObjects. Each object contains two elements "text" and "size" referring to the word and it's frequency
+ */
+ public JSONArray GetTopKeywords(String inFilename, int K, boolean ignoreHashtags, boolean ignoreUsernames, TextUtils tu)
+ {
+ HashMap<String, Integer> words = new HashMap<String,Integer>();
+ BufferedReader br = null;
+ try{
+ br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ try{
+ JSONObject tweetobj = new JSONObject(temp);
+ if(!tweetobj.isNull("text"))
+ {
+ String text = tweetobj.getString("text");
+ //System.out.println(text);
+ text = text.toLowerCase().replaceAll("\\s+", " ");
+ /** Step 1: Tokenize tweets into individual words. and count their frequency in the corpus
+ * Remove stop words and special characters. Ignore user names and hashtags if the user chooses to.
+ */
+ HashMap<String,Integer> tokens = tu.TokenizeText(text,ignoreHashtags,ignoreUsernames);
+ Set<String> keys = tokens.keySet();
+ for(String key:keys)
+ {
+ if(words.containsKey(key))
+ {
+ words.put(key, words.get(key)+tokens.get(key));
+ }
+ else
+ {
+ words.put(key, tokens.get(key));
+ }
+ }
+ }
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(ExtractTopKeywords.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ Set<String> keys = words.keySet();
+ ArrayList<Tags> tags = new ArrayList<Tags>();
+ for(String key:keys)
+ {
+ Tags tag = new Tags();
+ tag.setKey(key);
+ tag.setValue(words.get(key));
+ tags.add(tag);
+ }
+ // Step 2: Sort the words in descending order of frequency
+ Collections.sort(tags, Collections.reverseOrder());
+ JSONArray cloudwords = new JSONArray();
+ int numwords = K;
+ if(tags.size()<numwords)
+ {
+ numwords = tags.size();
+ }
+ for(int i=0;i<numwords;i++)
+ {
+ JSONObject wordfreq = new JSONObject();
+ Tags tag = tags.get(i);
+ try{
+ wordfreq.put("text", tag.getKey());
+ wordfreq.put("size",tag.getValue());
+ cloudwords.put(wordfreq);
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ return cloudwords;
+ }
+
+ public static void main(String[] args)
+ {
+ ExtractTopKeywords etk = new ExtractTopKeywords();
+
+ //Initialize the TextUtils class which handles all the processing of text.
+ TextUtils tu = new TextUtils();
+ tu.LoadStopWords("C:/tweettracker/stopwords.txt");
+ String infilename = DEF_INFILENAME;
+ int K = DEF_K;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ if(args.length>=2&&!args[1].isEmpty())
+ {
+ try{
+ K = Integer.parseInt(args[1]);
+ }catch(NumberFormatException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ }
+ System.out.println(etk.GetTopKeywords(infilename, K, false,true,tu));
+ }
+
+}
diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java
new file mode 100644
index 0000000..2df814f
--- /dev/null
+++ b/src/Chapter5/trends/ControlChartExample.java
@@ -0,0 +1,144 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.trends;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class ControlChartExample
+{
+ static final String DEF_INFILENAME = "ows.json";
+ static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
+
+ public JSONArray GenerateDataTrend(String inFilename)
+ {
+ BufferedReader br = null;
+ JSONArray result = new JSONArray();
+ HashMap<String,Integer> datecount = new HashMap<String,Integer>();
+ try{
+ br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ try {
+ JSONObject jobj = new JSONObject(temp);
+ long timestamp = jobj.getLong("timestamp");
+ Date d = new Date(timestamp);
+ String strdate = SDM.format(d);
+ if(datecount.containsKey(strdate))
+ {
+ datecount.put(strdate, datecount.get(strdate)+1);
+ }
+ else
+ {
+ datecount.put(strdate, 1);
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
+ Set<String> keys = datecount.keySet();
+ for(String key:keys)
+ {
+ DateInfo dinfo = new DateInfo();
+ try {
+ dinfo.d = SDM.parse(key);
+ } catch (ParseException ex) {
+ ex.printStackTrace();
+ continue;
+ }
+ dinfo.count = datecount.get(key);
+ dinfos.add(dinfo);
+ }
+ double mean = this.GetMean(dinfos);
+ double stddev = this.GetStandardDev(dinfos, mean);
+ Collections.sort(dinfos);
+ //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1
+ for(DateInfo dinfo:dinfos)
+ {
+ try{
+ JSONObject jobj = new JSONObject();
+ jobj.put("date", SDM.format(dinfo.d));
+ jobj.put("count", (dinfo.count-mean)/stddev);
+ jobj.put("mean", 0);
+ jobj.put("stdev+3", 3);
+ jobj.put("stdev-3", -3);
+ result.put(jobj);
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ return result;
+ }
+
+ public double GetStandardDev(ArrayList<DateInfo> dateinfos,double mean)
+ {
+ double intsum = 0;
+ int numperiods = dateinfos.size();
+ for(DateInfo dinfo:dateinfos)
+ {
+ intsum+=Math.pow((dinfo.count - mean),2);
+ }
+// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size()));
+ return Math.sqrt((double)intsum/numperiods);
+ }
+
+ public double GetMean(ArrayList<DateInfo> dateinfos)
+ {
+ int numperiods = dateinfos.size();
+ int sum = 0;
+ for(DateInfo dinfo:dateinfos)
+ {
+ sum +=dinfo.count;
+ }
+// System.out.println((double)sum/numPeriods);
+ return ((double)sum/numperiods);
+ }
+
+ public static void main(String[] args)
+ {
+ ControlChartExample cce = new ControlChartExample();
+ String infilename = DEF_INFILENAME;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ }
+ System.out.println(cce.GenerateDataTrend(infilename));
+ }
+
+}
diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java
new file mode 100644
index 0000000..209f4a3
--- /dev/null
+++ b/src/Chapter5/trends/DateInfo.java
@@ -0,0 +1,29 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.trends;
+
+import java.util.Date;
+
+public class DateInfo implements Comparable
+{
+ public Date d;
+ public int count;
+
+ public int compareTo(Object o) {
+ DateInfo temp = (DateInfo) o;
+ if(temp.d.after(this.d))
+ {
+ return -1;
+ }
+ else
+ if(temp.d.before(this.d))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+}
diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java
new file mode 100644
index 0000000..dad7f27
--- /dev/null
+++ b/src/Chapter5/trends/ExtractDatasetTrend.java
@@ -0,0 +1,120 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.trends;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class ExtractDatasetTrend
+{
+ static final String DEF_INFILENAME = "ows.json";
+ // Date pattern used to count the volume of tweets
+ final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
+
+ public JSONArray GenerateDataTrend(String inFilename)
+ {
+ BufferedReader br = null;
+ JSONArray result = new JSONArray();
+ HashMap<String,Integer> datecount = new HashMap<String,Integer>();
+ try{
+ br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ try {
+ JSONObject jobj = new JSONObject(temp);
+ long timestamp = jobj.getLong("timestamp");
+ Date d = new Date(timestamp);
+ String strdate = SDM.format(d);
+ if(datecount.containsKey(strdate))
+ {
+ datecount.put(strdate, datecount.get(strdate)+1);
+ }
+ else
+ {
+ datecount.put(strdate, 1);
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ /** DateInfo consists of a date string and the corresponding count.
+ * It also implements a Comparator for sorting by date
+ */
+ ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>();
+ Set<String> keys = datecount.keySet();
+ for(String key:keys)
+ {
+ DateInfo dinfo = new DateInfo();
+ try {
+ dinfo.d = SDM.parse(key);
+ } catch (ParseException ex) {
+ ex.printStackTrace();
+ continue;
+ }
+ dinfo.count = datecount.get(key);
+ dinfos.add(dinfo);
+ }
+ Collections.sort(dinfos);
+ // Format and return the date string and the corresponding count
+ for(DateInfo dinfo:dinfos)
+ {
+ try{
+ JSONObject jobj = new JSONObject();
+ jobj.put("date", SDM.format(dinfo.d));
+ jobj.put("count", dinfo.count);
+ result.put(jobj);
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ return result;
+ }
+
+ public static void main(String[] args)
+ {
+ ExtractDatasetTrend edt = new ExtractDatasetTrend();
+
+ String infilename = DEF_INFILENAME;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ }
+ System.out.println(edt.GenerateDataTrend(infilename));
+ }
+
+}
diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java
new file mode 100644
index 0000000..4a0164b
--- /dev/null
+++ b/src/Chapter5/trends/SparkLineExample.java
@@ -0,0 +1,163 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.trends;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class SparkLineExample
+{
+ static final String DEF_INFILENAME = "ows.json";
+ static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH");
+
+ public JSONObject GenerateDataTrend(String inFilename, ArrayList<String> keywords)
+ {
+ BufferedReader br = null;
+ JSONObject result = new JSONObject();
+ HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
+ try{
+ br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ try {
+ JSONObject jobj = new JSONObject(temp);
+ String text = jobj.getString("text").toLowerCase();
+ long timestamp = jobj.getLong("timestamp");
+ Date d = new Date(timestamp);
+ String strdate = SDM.format(d);
+ for(String word:keywords)
+ {
+ if(text.contains(word))
+ {
+ HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
+ if(datecount.containsKey(strdate))
+ {
+ wordcount = datecount.get(strdate);
+ }
+ if(wordcount.containsKey(word))
+ {
+ wordcount.put(word, wordcount.get(word)+1);
+ }
+ else
+ {
+ wordcount.put(word, 1);
+ }
+ //update the wordcount for the specific date
+ datecount.put(strdate, wordcount);
+ }
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ //sort the dates
+ ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>();
+ Set<String> keys = datecount.keySet();
+ for(String key:keys)
+ {
+ TCDateInfo dinfo = new TCDateInfo();
+ try {
+ dinfo.d = SDM.parse(key);
+ } catch (ParseException ex) {
+ ex.printStackTrace();
+ continue;
+ }
+ dinfo.wordcount = datecount.get(key);
+ dinfos.add(dinfo);
+ }
+ Collections.sort(dinfos);
+ JSONArray[] tseriesvals = new JSONArray[keywords.size()];
+ for(int i=0;i<tseriesvals.length;i++)
+ {
+ tseriesvals[i] = new JSONArray();
+ }
+ //prepare the output
+ for(TCDateInfo date:dinfos)
+ {
+ HashMap<String,Integer> wordcount = date.wordcount;
+ int counter=0;
+ for(String word:keywords)
+ {
+ if(wordcount.containsKey(word))
+ {
+ tseriesvals[counter].put(wordcount.get(word));
+ }
+ else
+ {
+ tseriesvals[counter].put(0);
+ }
+ counter++;
+ }
+ }
+ int counter=0;
+ for(String word:keywords)
+ {
+ try {
+ result.put(word, tseriesvals[counter]);
+ } catch (JSONException ex) {
+ Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ counter++;
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ return result;
+ }
+
+ public static void main(String[] args)
+ {
+ SparkLineExample sle = new SparkLineExample();
+ ArrayList<String> words = new ArrayList<String>();
+ String infilename = DEF_INFILENAME;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ for(int i=1;i<args.length;i++)
+ {
+ if(args[i]!=null&&!args[i].isEmpty())
+ {
+ words.add(args[i]);
+ }
+ }
+ }
+ if(words.isEmpty())
+ {
+ words.add("#nypd");
+ words.add("#ows");
+ }
+ System.out.println(sle.GenerateDataTrend(infilename,words));
+ }
+
+}
diff --git a/src/Chapter5/trends/TCDateInfo.java b/src/Chapter5/trends/TCDateInfo.java
new file mode 100644
index 0000000..88450e9
--- /dev/null
+++ b/src/Chapter5/trends/TCDateInfo.java
@@ -0,0 +1,31 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.trends;
+
+import java.util.Date;
+import java.util.HashMap;
+
+public class TCDateInfo implements Comparable
+{
+ public Date d;
+ public HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
+
+ public int compareTo(Object o) {
+ TCDateInfo temp = (TCDateInfo) o;
+ if(temp.d.after(this.d))
+ {
+ return -1;
+ }
+ else
+ if(temp.d.before(this.d))
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+}
diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java
new file mode 100644
index 0000000..20991cd
--- /dev/null
+++ b/src/Chapter5/trends/TrendComparisonExample.java
@@ -0,0 +1,155 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package Chapter5.trends;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class TrendComparisonExample
+{
+ static final String DEF_INFILENAME = "ows.json";
+ static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm");
+
+ public JSONArray GenerateDataTrend(String inFilename, ArrayList<String> keywords)
+ {
+ BufferedReader br = null;
+ JSONArray result = new JSONArray();
+ HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>();
+ try{
+ br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8"));
+ String temp = "";
+ while((temp = br.readLine())!=null)
+ {
+ try {
+ JSONObject jobj = new JSONObject(temp);
+ String text = jobj.getString("text").toLowerCase();
+ long timestamp = jobj.getLong("timestamp");
+ Date d = new Date(timestamp);
+ String strdate = SDM.format(d);
+ for(String word:keywords)
+ {
+ if(text.contains(word))
+ {
+ HashMap<String,Integer> wordcount = new HashMap<String,Integer>();
+ if(datecount.containsKey(strdate))
+ {
+ wordcount = datecount.get(strdate);
+ }
+ if(wordcount.containsKey(word))
+ {
+ wordcount.put(word, wordcount.get(word)+1);
+ }
+ else
+ {
+ wordcount.put(word, 1);
+ }
+ //update the wordcount for the specific date
+ datecount.put(strdate, wordcount);
+ }
+ }
+ } catch (JSONException ex) {
+ Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ //sort the dates
+ ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>();
+ Set<String> keys = datecount.keySet();
+ for(String key:keys)
+ {
+ TCDateInfo dinfo = new TCDateInfo();
+ try {
+ dinfo.d = SDM.parse(key);
+ } catch (ParseException ex) {
+ ex.printStackTrace();
+ continue;
+ }
+ dinfo.wordcount = datecount.get(key);
+ dinfos.add(dinfo);
+ }
+ Collections.sort(dinfos);
+ //prepare the output
+ for(TCDateInfo date:dinfos)
+ {
+ JSONObject item = new JSONObject();
+ String strdate = SDM.format(date.d);
+ try{
+ item.put("date",strdate);
+ HashMap<String,Integer> wordcount = date.wordcount;
+ for(String word:keywords)
+ {
+ if(wordcount.containsKey(word))
+ {
+ item.put(word, wordcount.get(word));
+ }
+ else
+ {
+ item.put(word, 0);
+ }
+ }
+ result.put(item);
+ }catch(JSONException ex)
+ {
+ ex.printStackTrace();
+ }
+ }
+ }catch(IOException ex)
+ {
+ ex.printStackTrace();
+ }finally{
+ try {
+ br.close();
+ } catch (IOException ex) {
+ Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ return result;
+ }
+
+ public static void main(String[] args)
+ {
+ TrendComparisonExample tce = new TrendComparisonExample();
+ ArrayList<String> words = new ArrayList<String>();
+ String infilename = DEF_INFILENAME;
+ if(args!=null)
+ {
+ if(args.length>=1&&!args[0].isEmpty())
+ {
+ File fl = new File(args[0]);
+ if(fl.exists())
+ {
+ infilename = args[0];
+ }
+ }
+ for(int i=1;i<args.length;i++)
+ {
+ if(args[i]!=null&&!args[i].isEmpty())
+ {
+ words.add(args[i]);
+ }
+ }
+ }
+ if(words.isEmpty())
+ {
+ words.add("#nypd");
+ words.add("#ows");
+ }
+ System.out.println(tce.GenerateDataTrend(infilename,words));
+ }
+
+}
diff --git a/src/utils/OAuthUtils.java b/src/utils/OAuthUtils.java
new file mode 100644
index 0000000..3cd5703
--- /dev/null
+++ b/src/utils/OAuthUtils.java
@@ -0,0 +1,21 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package utils;
+
+public class OAuthUtils
+{
+ //Please replace the Consumer key and secret to the one representing your application.
+ public static final String CONSUMER_SECRET = "PPCTObQGbGm1gkNvdJiTPKhoTksG787RTBwardkbM";
+ public static final String CONSUMER_KEY = "L8CRRCUoRl3xcZ9bdrfUw";
+ public static final String REQUEST_TOKEN_URL = "https://twitter.com/oauth/request_token";
+ public static final String AUTHORIZE_URL = "https://twitter.com/oauth/authorize";
+ public static final String ACCESS_TOKEN_URL = "https://twitter.com/oauth/access_token";
+
+
+ //Use a JFIG file for all the configurations
+ public void ReadApplicationIdentity()
+ {
+
+ }
+}
diff --git a/src/utils/Tags.java b/src/utils/Tags.java
new file mode 100644
index 0000000..f1a5b56
--- /dev/null
+++ b/src/utils/Tags.java
@@ -0,0 +1,52 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+package utils;
+
+/**
+ *
+ * @author skumar34
+ */
+public class Tags implements Comparable{
+ public String key;
+ public double value;
+
+ public Tags()
+ {
+
+ }
+
+ public Tags(String key, double value) {
+ this.key = key;
+ this.value = value;
+ }
+ public int compareTo(Object obj)
+ {
+ Tags tempObject=new Tags();
+ tempObject=(Tags) obj;
+ if(this.value>tempObject.value)
+ return 1;
+ if(this.value<tempObject.value)
+ return -1;
+ else
+ return 0;
+ }
+
+ public String getKey() {
+ return key;
+ }
+
+ public void setKey(String key) {
+ this.key = key;
+ }
+
+ public double getValue() {
+ return value;
+ }
+
+ public void setValue(double value) {
+ this.value = value;
+ }
+}
diff --git a/src/utils/TextUtils.java b/src/utils/TextUtils.java
new file mode 100644
index 0000000..764ce11
--- /dev/null
+++ b/src/utils/TextUtils.java
@@ -0,0 +1,212 @@
+/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University
+ * @author shamanth
+ */
+package utils;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class TextUtils
+{
+ //holds a list of stop words to be removed when generating word clouds etc.
+ HashSet<String> STOPWORDS = new HashSet<String>();
+
+ String SEPARATOR = " ";
+
+ /**
+ * Loads the stop words from a file onto a collection. for use by all methods in this class
+ * @param filename
+ */
+ public void LoadStopWords(String filename)
+ {
+ if(!filename.isEmpty())
+ {
+
+ BufferedReader bread = null;
+ try {
+ bread = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF8"));
+ String temp = "";
+ try {
+ while ((temp = bread.readLine()) != null) {
+ if (!temp.isEmpty()) {
+ String[] stwords = temp.split(",");
+ for (String t : stwords) {
+ t = t.toLowerCase();
+ if (!STOPWORDS.contains(t)) {
+ STOPWORDS.add(t);
+ }
+ }
+ }
+ }
+ } catch (IOException ex) {
+ Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ } catch (UnsupportedEncodingException ex) {
+ Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex);
+ } catch (FileNotFoundException ex) {
+ Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex);
+ } finally {
+ try {
+ bread.close();
+ } catch (IOException ex) {
+ Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ }
+ }
+
+ /**
+ * Converts a tweet/text into individual words/tokens. All stopwords are removed and the list also does not contain hyperlinks.
+ * Splitting is performed on space.
+ * @param text
+ * @param ignoreHashtags
+ * @param ignoreUsernames
+ * @return a list of words contained in text
+ */
+ public HashMap<String,Integer> TokenizeText(String text, boolean ignoreHashtags, boolean ignoreUsernames)
+ {
+ String[] tokens = text.split(SEPARATOR);
+ HashMap<String,Integer> words = new HashMap<String,Integer>();
+ for(String token:tokens)
+ {
+ token = token.replaceAll("\"|'|\\.||;|,", "");
+ if(token.isEmpty()||token.length()<=2||STOPWORDS.contains(token)||token.startsWith("&")||token.startsWith("http"))
+ {
+ continue;
+ }
+ else
+ {
+ if(ignoreHashtags)
+ {
+ if(token.startsWith("#"))
+ {
+ continue;
+ }
+ }
+ if(ignoreUsernames)
+ {
+ if(token.startsWith("@"))
+ {
+ continue;
+ }
+ }
+ if(!words.containsKey(token))
+ {
+ words.put(token,1);
+ }
+ else
+ {
+ words.put(token, words.get(token)+1);
+ }
+ }
+ }
+ return words;
+ }
+
+ /**
+ * Checks whether the tweet is a retweet based on the presence of the RT pattern as the start of the text. Expects the tweet text to be in lowercase.
+ * @param text
+ * @return
+ */
+ public static boolean IsTweetRT(String text)
+ {
+ Pattern p = Pattern.compile("^rt @[a-z_0-9]+");
+ Matcher m = p.matcher(text);
+ if(m.find())
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Checks whether the text contains a hyperlink in the text
+ * @param text
+ * @return
+ */
+ public static boolean ContainsURL(String text)
+ {
+ Pattern urlpat = Pattern.compile("https?://[a-zA-Z0-9\\./]+");
+ Matcher urlmat = urlpat.matcher(text);
+ if(urlmat.find())
+ {
+ return true;
+ }
+ else
+ return false;
+ }
+
+ /**
+ * extracts and returns a list of hashtags from the text
+ * @param text
+ * @return
+ */
+ public static ArrayList<String> GetHashTags(String text)
+ {
+ Pattern p = Pattern.compile("#[a-zA-Z0-9]+");
+ Matcher mat = p.matcher(text);
+ ArrayList<String> tags = new ArrayList<String>();
+ while(mat.find())
+ {
+ String tag = text.substring(mat.start(),mat.end());
+ if(!tags.contains(tag.toLowerCase()))
+ {
+ tags.add(tag.toLowerCase());
+ }
+ }
+ return tags;
+ }
+
+ /**
+ * Removes LF and CR from the text as well as any quotes and backslashes
+ * @param text
+ * @return
+ */
+ public static String GetCleanText(String text)
+ {
+ text = text.replaceAll("'|\"|&quot;", "");
+ text = text.replaceAll("\\\\", "");
+ text = text.replaceAll("\r\n|\n|\r", " ");
+ text = text.trim();
+ return text;
+ }
+
+ /**
+ * Removes all patterns that correspond to Retweeted status leaving only original text
+ * @param tweet
+ * @return
+ */
+ public static String RemoveRTElements(String tweet)
+ {
+ String text = tweet.replaceAll("rt @[a-z_A-Z0-9]+", " ");
+ text = text.replaceAll("RT @[a-z_A-Z0-9]+", " ");
+ text = text.replaceAll(":","");
+ return text.trim();
+ }
+
+ /**
+ * Removes all hashtags, URLs, and usernames from the tweet text
+ * @param tweet
+ * @return
+ */
+ public static String RemoveTwitterElements(String tweet)
+ {
+ String temptweet = tweet.replaceAll("#[a-zA-Z_0-9]+", "");
+ temptweet = temptweet.replaceAll("https?://[a-zA-Z0-9\\./]+", "");
+ temptweet = temptweet.replaceAll("@[a-zA-Z_0-9]+", "");
+ temptweet = temptweet.replaceAll("[:?\\.;<>()]", "");
+ return temptweet;
+ }
+
+}
diff --git a/streaming/streaming.config b/streaming/streaming.config
new file mode 100644
index 0000000..d6e27cb
--- /dev/null
+++ b/streaming/streaming.config
@@ -0,0 +1,3 @@
+#morsi #egypt #tahrir #june30 #scaf
+-118.79,32.49,-115.23,34.67
+15127356 20627637 \ No newline at end of file