diff options
96 files changed, 7339 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bae604d --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +/nbproject/private/ +/build/ +/dist/ + +# JaCoCO test coverage tool +.jacocoverage/ +jacoco.exec-* + +# Editor temp files, diff, etc. +*~ +.*.sw? +*.orig diff --git a/build.xml b/build.xml new file mode 100644 index 0000000..24aba47 --- /dev/null +++ b/build.xml @@ -0,0 +1,73 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- You may freely edit this file. See commented blocks below for --> +<!-- some examples of how to customize the build. --> +<!-- (If you delete it and reopen the project it will be recreated.) --> +<!-- By default, only the Clean and Build commands use this build script. --> +<!-- Commands such as Run, Debug, and Test only use this build script if --> +<!-- the Compile on Save feature is turned off for the project. --> +<!-- You can turn off the Compile on Save (or Deploy on Save) setting --> +<!-- in the project's Project Properties dialog box.--> +<project name="TwitterDataAnalytics" default="default" basedir="."> + <description>Builds, tests, and runs the project TwitterDataAnalytics.</description> + <import file="nbproject/build-impl.xml"/> + <!-- + + There exist several targets which are by default empty and which can be + used for execution of your tasks. These targets are usually executed + before and after some main targets. They are: + + -pre-init: called before initialization of project properties + -post-init: called after initialization of project properties + -pre-compile: called before javac compilation + -post-compile: called after javac compilation + -pre-compile-single: called before javac compilation of single file + -post-compile-single: called after javac compilation of single file + -pre-compile-test: called before javac compilation of JUnit tests + -post-compile-test: called after javac compilation of JUnit tests + -pre-compile-test-single: called before javac compilation of single JUnit test + -post-compile-test-single: called after javac compilation of single JUunit test + -pre-jar: called before JAR building + -post-jar: called after JAR building + -post-clean: called after cleaning build products + + (Targets beginning with '-' are not intended to be called on their own.) + + Example of inserting an obfuscator after compilation could look like this: + + <target name="-post-compile"> + <obfuscate> + <fileset dir="${build.classes.dir}"/> + </obfuscate> + </target> + + For list of available properties check the imported + nbproject/build-impl.xml file. + + + Another way to customize the build is by overriding existing main targets. + The targets of interest are: + + -init-macrodef-javac: defines macro for javac compilation + -init-macrodef-junit: defines macro for junit execution + -init-macrodef-debug: defines macro for class debugging + -init-macrodef-java: defines macro for class execution + -do-jar: JAR building + run: execution of project + -javadoc-build: Javadoc generation + test-report: JUnit report generation + + An example of overriding the target for project execution could look like this: + + <target name="run" depends="TwitterDataAnalytics-impl.jar"> + <exec dir="bin" executable="launcher.exe"> + <arg file="${dist.jar}"/> + </exec> + </target> + + Notice that the overridden target depends on the jar target and not only on + the compile target as the regular run target does. Again, for a list of available + properties which you can use, check the target you are overriding in the + nbproject/build-impl.xml file. + + --> +</project> diff --git a/lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar b/lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar Binary files differnew file mode 100644 index 0000000..ff1abcc --- /dev/null +++ b/lib/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar diff --git a/lib/collections-generic-4.01.jar b/lib/collections-generic-4.01.jar Binary files differnew file mode 100644 index 0000000..92d009c --- /dev/null +++ b/lib/collections-generic-4.01.jar diff --git a/lib/colt-1.2.0.jar b/lib/colt-1.2.0.jar Binary files differnew file mode 100644 index 0000000..a7192f6 --- /dev/null +++ b/lib/colt-1.2.0.jar diff --git a/lib/commons-codec-1.7.jar b/lib/commons-codec-1.7.jar Binary files differnew file mode 100644 index 0000000..efa7f72 --- /dev/null +++ b/lib/commons-codec-1.7.jar diff --git a/lib/commons-httpclient-3.1_1.jar b/lib/commons-httpclient-3.1_1.jar Binary files differnew file mode 100644 index 0000000..7c59774 --- /dev/null +++ b/lib/commons-httpclient-3.1_1.jar diff --git a/lib/commons-lang-2.6.jar b/lib/commons-lang-2.6.jar Binary files differnew file mode 100644 index 0000000..98467d3 --- /dev/null +++ b/lib/commons-lang-2.6.jar diff --git a/lib/commons-logging-1.1.1.jar b/lib/commons-logging-1.1.1.jar Binary files differnew file mode 100644 index 0000000..8758a96 --- /dev/null +++ b/lib/commons-logging-1.1.1.jar diff --git a/lib/concurrent-1.3.4.jar b/lib/concurrent-1.3.4.jar Binary files differnew file mode 100644 index 0000000..551f347 --- /dev/null +++ b/lib/concurrent-1.3.4.jar diff --git a/lib/gson-2.2.4.jar b/lib/gson-2.2.4.jar Binary files differnew file mode 100644 index 0000000..9478253 --- /dev/null +++ b/lib/gson-2.2.4.jar diff --git a/lib/httpclient-4.2.1.jar b/lib/httpclient-4.2.1.jar Binary files differnew file mode 100644 index 0000000..1d52333 --- /dev/null +++ b/lib/httpclient-4.2.1.jar diff --git a/lib/httpcore-4.2.1.jar b/lib/httpcore-4.2.1.jar Binary files differnew file mode 100644 index 0000000..16d75e1 --- /dev/null +++ b/lib/httpcore-4.2.1.jar diff --git a/lib/j3d-core-1.3.1.jar b/lib/j3d-core-1.3.1.jar Binary files differnew file mode 100644 index 0000000..cfe6364 --- /dev/null +++ b/lib/j3d-core-1.3.1.jar diff --git a/lib/jfig-1.5.2.jar b/lib/jfig-1.5.2.jar Binary files differnew file mode 100644 index 0000000..d671f83 --- /dev/null +++ b/lib/jfig-1.5.2.jar diff --git a/lib/json.jar b/lib/json.jar Binary files differnew file mode 100644 index 0000000..5a93e51 --- /dev/null +++ b/lib/json.jar diff --git a/lib/jung-3d-2.0.1.jar b/lib/jung-3d-2.0.1.jar Binary files differnew file mode 100644 index 0000000..05c3f18 --- /dev/null +++ b/lib/jung-3d-2.0.1.jar diff --git a/lib/jung-3d-demos-2.0.1.jar b/lib/jung-3d-demos-2.0.1.jar Binary files differnew file mode 100644 index 0000000..10fd834 --- /dev/null +++ b/lib/jung-3d-demos-2.0.1.jar diff --git a/lib/jung-algorithms-2.0.1.jar b/lib/jung-algorithms-2.0.1.jar Binary files differnew file mode 100644 index 0000000..5b98f9c --- /dev/null +++ b/lib/jung-algorithms-2.0.1.jar diff --git a/lib/jung-api-2.0.1.jar b/lib/jung-api-2.0.1.jar Binary files differnew file mode 100644 index 0000000..6dcac89 --- /dev/null +++ b/lib/jung-api-2.0.1.jar diff --git a/lib/jung-graph-impl-2.0.1.jar b/lib/jung-graph-impl-2.0.1.jar Binary files differnew file mode 100644 index 0000000..a64f6f7 --- /dev/null +++ b/lib/jung-graph-impl-2.0.1.jar diff --git a/lib/jung-io-2.0.1.jar b/lib/jung-io-2.0.1.jar Binary files differnew file mode 100644 index 0000000..4059dcd --- /dev/null +++ b/lib/jung-io-2.0.1.jar diff --git a/lib/jung-jai-2.0.1.jar b/lib/jung-jai-2.0.1.jar Binary files differnew file mode 100644 index 0000000..feeb09d --- /dev/null +++ b/lib/jung-jai-2.0.1.jar diff --git a/lib/jung-jai-samples-2.0.1.jar b/lib/jung-jai-samples-2.0.1.jar Binary files differnew file mode 100644 index 0000000..784cd88 --- /dev/null +++ b/lib/jung-jai-samples-2.0.1.jar diff --git a/lib/jung-samples-2.0.1.jar b/lib/jung-samples-2.0.1.jar Binary files differnew file mode 100644 index 0000000..838461d --- /dev/null +++ b/lib/jung-samples-2.0.1.jar diff --git a/lib/jung-visualization-2.0.1.jar b/lib/jung-visualization-2.0.1.jar Binary files differnew file mode 100644 index 0000000..c611e77 --- /dev/null +++ b/lib/jung-visualization-2.0.1.jar diff --git a/lib/junit/junit-3.8.2-api.zip b/lib/junit/junit-3.8.2-api.zip Binary files differnew file mode 100644 index 0000000..6d792fd --- /dev/null +++ b/lib/junit/junit-3.8.2-api.zip diff --git a/lib/junit/junit-3.8.2.jar b/lib/junit/junit-3.8.2.jar Binary files differnew file mode 100644 index 0000000..d835872 --- /dev/null +++ b/lib/junit/junit-3.8.2.jar diff --git a/lib/junit_4/junit-4.5-api.zip b/lib/junit_4/junit-4.5-api.zip Binary files differnew file mode 100644 index 0000000..5748c44 --- /dev/null +++ b/lib/junit_4/junit-4.5-api.zip diff --git a/lib/junit_4/junit-4.5-src.jar b/lib/junit_4/junit-4.5-src.jar Binary files differnew file mode 100644 index 0000000..18774a5 --- /dev/null +++ b/lib/junit_4/junit-4.5-src.jar diff --git a/lib/junit_4/junit-4.5.jar b/lib/junit_4/junit-4.5.jar Binary files differnew file mode 100644 index 0000000..83f8bc7 --- /dev/null +++ b/lib/junit_4/junit-4.5.jar diff --git a/lib/log4j-1.2.15.jar b/lib/log4j-1.2.15.jar Binary files differnew file mode 100644 index 0000000..c930a6a --- /dev/null +++ b/lib/log4j-1.2.15.jar diff --git a/lib/mallet-deps.jar b/lib/mallet-deps.jar Binary files differnew file mode 100644 index 0000000..05517df --- /dev/null +++ b/lib/mallet-deps.jar diff --git a/lib/mallet.jar b/lib/mallet.jar Binary files differnew file mode 100644 index 0000000..fb8fef5 --- /dev/null +++ b/lib/mallet.jar diff --git a/lib/nblibraries.properties b/lib/nblibraries.properties new file mode 100644 index 0000000..52864ae --- /dev/null +++ b/lib/nblibraries.properties @@ -0,0 +1,14 @@ +libs.CopyLibs.classpath=\ + ${base}/CopyLibs/org-netbeans-modules-java-j2seproject-copylibstask.jar +libs.CopyLibs.displayName=CopyLibs Task +libs.CopyLibs.prop-version=2.0 +libs.junit.classpath=\ + ${base}/junit/junit-3.8.2.jar +libs.junit.javadoc=\ + ${base}/junit/junit-3.8.2-api.zip +libs.junit_4.classpath=\ + ${base}/junit_4/junit-4.5.jar +libs.junit_4.javadoc=\ + ${base}/junit_4/junit-4.5-api.zip +libs.junit_4.src=\ + ${base}/junit_4/junit-4.5-src.jar diff --git a/lib/signpost-commonshttp4-1.2.1.2.jar b/lib/signpost-commonshttp4-1.2.1.2.jar Binary files differnew file mode 100644 index 0000000..fd37cfa --- /dev/null +++ b/lib/signpost-commonshttp4-1.2.1.2.jar diff --git a/lib/signpost-core-1.2.1.2.jar b/lib/signpost-core-1.2.1.2.jar Binary files differnew file mode 100644 index 0000000..8871730 --- /dev/null +++ b/lib/signpost-core-1.2.1.2.jar diff --git a/lib/stax-api-1.0.1.jar b/lib/stax-api-1.0.1.jar Binary files differnew file mode 100644 index 0000000..d9a1665 --- /dev/null +++ b/lib/stax-api-1.0.1.jar diff --git a/lib/vecmath-1.3.1.jar b/lib/vecmath-1.3.1.jar Binary files differnew file mode 100644 index 0000000..fc2244b --- /dev/null +++ b/lib/vecmath-1.3.1.jar diff --git a/lib/wstx-asl-3.2.6.jar b/lib/wstx-asl-3.2.6.jar Binary files differnew file mode 100644 index 0000000..aee5f0c --- /dev/null +++ b/lib/wstx-asl-3.2.6.jar diff --git a/manifest.mf b/manifest.mf new file mode 100644 index 0000000..1574df4 --- /dev/null +++ b/manifest.mf @@ -0,0 +1,3 @@ +Manifest-Version: 1.0
+X-COMMENT: Main-Class will be added automatically by build
+
diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml new file mode 100644 index 0000000..7e854aa --- /dev/null +++ b/nbproject/build-impl.xml @@ -0,0 +1,1415 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +*** GENERATED FROM project.xml - DO NOT EDIT *** +*** EDIT ../build.xml INSTEAD *** + +For the purpose of easier reading the script +is divided into following sections: + + - initialization + - compilation + - jar + - execution + - debugging + - javadoc + - test compilation + - test execution + - test debugging + - applet + - cleanup + + --> +<project xmlns:j2seproject1="http://www.netbeans.org/ns/j2se-project/1" xmlns:j2seproject3="http://www.netbeans.org/ns/j2se-project/3" xmlns:jaxrpc="http://www.netbeans.org/ns/j2se-project/jax-rpc" basedir=".." default="default" name="TwitterDataAnalytics-impl"> + <fail message="Please build using Ant 1.8.0 or higher."> + <condition> + <not> + <antversion atleast="1.8.0"/> + </not> + </condition> + </fail> + <target depends="test,jar,javadoc" description="Build and test whole project." name="default"/> + <!-- + ====================== + INITIALIZATION SECTION + ====================== + --> + <target name="-pre-init"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="-pre-init" name="-init-private"> + <property file="nbproject/private/config.properties"/> + <property file="nbproject/private/configs/${config}.properties"/> + <property file="nbproject/private/private.properties"/> + </target> + <target name="-pre-init-libraries"> + <property location=".\lib\nblibraries.properties" name="libraries.path"/> + <dirname file="${libraries.path}" property="libraries.dir.nativedirsep"/> + <pathconvert dirsep="/" property="libraries.dir"> + <path path="${libraries.dir.nativedirsep}"/> + </pathconvert> + <basename file="${libraries.path}" property="libraries.basename" suffix=".properties"/> + <available file="${libraries.dir}/${libraries.basename}-private.properties" property="private.properties.available"/> + </target> + <target depends="-pre-init-libraries" if="private.properties.available" name="-init-private-libraries"> + <loadproperties encoding="ISO-8859-1" srcfile="${libraries.dir}/${libraries.basename}-private.properties"> + <filterchain> + <replacestring from="$${base}" to="${libraries.dir}"/> + <escapeunicode/> + </filterchain> + </loadproperties> + </target> + <target depends="-pre-init,-init-private,-init-private-libraries" name="-init-libraries"> + <loadproperties encoding="ISO-8859-1" srcfile="${libraries.path}"> + <filterchain> + <replacestring from="$${base}" to="${libraries.dir}"/> + <escapeunicode/> + </filterchain> + </loadproperties> + </target> + <target depends="-pre-init,-init-private,-init-libraries" name="-init-user"> + <property file="${user.properties.file}"/> + <!-- The two properties below are usually overridden --> + <!-- by the active platform. Just a fallback. --> + <property name="default.javac.source" value="1.4"/> + <property name="default.javac.target" value="1.4"/> + </target> + <target depends="-pre-init,-init-private,-init-libraries,-init-user" name="-init-project"> + <property file="nbproject/configs/${config}.properties"/> + <property file="nbproject/project.properties"/> + </target> + <target depends="-pre-init,-init-private,-init-libraries,-init-user,-init-project,-init-macrodef-property" name="-do-init"> + <property name="platform.java" value="${java.home}/bin/java"/> + <available file="${manifest.file}" property="manifest.available"/> + <condition property="splashscreen.available"> + <and> + <not> + <equals arg1="${application.splash}" arg2="" trim="true"/> + </not> + <available file="${application.splash}"/> + </and> + </condition> + <condition property="main.class.available"> + <and> + <isset property="main.class"/> + <not> + <equals arg1="${main.class}" arg2="" trim="true"/> + </not> + </and> + </condition> + <condition property="profile.available"> + <and> + <isset property="javac.profile"/> + <length length="0" string="${javac.profile}" when="greater"/> + <matches pattern="1\.[89](\..*)?" string="${javac.source}"/> + </and> + </condition> + <condition property="do.archive"> + <not> + <istrue value="${jar.archive.disabled}"/> + </not> + </condition> + <condition property="do.mkdist"> + <and> + <isset property="do.archive"/> + <isset property="libs.CopyLibs.classpath"/> + <not> + <istrue value="${mkdist.disabled}"/> + </not> + </and> + </condition> + <condition property="do.archive+manifest.available"> + <and> + <isset property="manifest.available"/> + <istrue value="${do.archive}"/> + </and> + </condition> + <condition property="do.archive+main.class.available"> + <and> + <isset property="main.class.available"/> + <istrue value="${do.archive}"/> + </and> + </condition> + <condition property="do.archive+splashscreen.available"> + <and> + <isset property="splashscreen.available"/> + <istrue value="${do.archive}"/> + </and> + </condition> + <condition property="do.archive+profile.available"> + <and> + <isset property="profile.available"/> + <istrue value="${do.archive}"/> + </and> + </condition> + <condition property="have.tests"> + <or/> + </condition> + <condition property="have.sources"> + <or> + <available file="${src.dir}"/> + </or> + </condition> + <condition property="netbeans.home+have.tests"> + <and> + <isset property="netbeans.home"/> + <isset property="have.tests"/> + </and> + </condition> + <condition property="no.javadoc.preview"> + <and> + <isset property="javadoc.preview"/> + <isfalse value="${javadoc.preview}"/> + </and> + </condition> + <property name="run.jvmargs" value=""/> + <property name="run.jvmargs.ide" value=""/> + <property name="javac.compilerargs" value=""/> + <property name="work.dir" value="${basedir}"/> + <condition property="no.deps"> + <and> + <istrue value="${no.dependencies}"/> + </and> + </condition> + <property name="javac.debug" value="true"/> + <property name="javadoc.preview" value="true"/> + <property name="application.args" value=""/> + <property name="source.encoding" value="${file.encoding}"/> + <property name="runtime.encoding" value="${source.encoding}"/> + <condition property="javadoc.encoding.used" value="${javadoc.encoding}"> + <and> + <isset property="javadoc.encoding"/> + <not> + <equals arg1="${javadoc.encoding}" arg2=""/> + </not> + </and> + </condition> + <property name="javadoc.encoding.used" value="${source.encoding}"/> + <property name="includes" value="**"/> + <property name="excludes" value=""/> + <property name="do.depend" value="false"/> + <condition property="do.depend.true"> + <istrue value="${do.depend}"/> + </condition> + <path id="endorsed.classpath.path" path="${endorsed.classpath}"/> + <condition else="" property="endorsed.classpath.cmd.line.arg" value="-Xbootclasspath/p:'${toString:endorsed.classpath.path}'"> + <and> + <isset property="endorsed.classpath"/> + <not> + <equals arg1="${endorsed.classpath}" arg2="" trim="true"/> + </not> + </and> + </condition> + <condition else="" property="javac.profile.cmd.line.arg" value="-profile ${javac.profile}"> + <isset property="profile.available"/> + </condition> + <condition else="false" property="jdkBug6558476"> + <and> + <matches pattern="1\.[56]" string="${java.specification.version}"/> + <not> + <os family="unix"/> + </not> + </and> + </condition> + <property name="javac.fork" value="${jdkBug6558476}"/> + <property name="jar.index" value="false"/> + <property name="jar.index.metainf" value="${jar.index}"/> + <property name="copylibs.rebase" value="true"/> + <available file="${meta.inf.dir}/persistence.xml" property="has.persistence.xml"/> + <condition property="junit.available"> + <or> + <available classname="org.junit.Test" classpath="${run.test.classpath}"/> + <available classname="junit.framework.Test" classpath="${run.test.classpath}"/> + </or> + </condition> + <condition property="testng.available"> + <available classname="org.testng.annotations.Test" classpath="${run.test.classpath}"/> + </condition> + <condition property="junit+testng.available"> + <and> + <istrue value="${junit.available}"/> + <istrue value="${testng.available}"/> + </and> + </condition> + <condition else="testng" property="testng.mode" value="mixed"> + <istrue value="${junit+testng.available}"/> + </condition> + <condition else="" property="testng.debug.mode" value="-mixed"> + <istrue value="${junit+testng.available}"/> + </condition> + </target> + <target name="-post-init"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="-pre-init,-init-private,-init-libraries,-init-user,-init-project,-do-init" name="-init-check"> + <fail unless="src.dir">Must set src.dir</fail> + <fail unless="build.dir">Must set build.dir</fail> + <fail unless="dist.dir">Must set dist.dir</fail> + <fail unless="build.classes.dir">Must set build.classes.dir</fail> + <fail unless="dist.javadoc.dir">Must set dist.javadoc.dir</fail> + <fail unless="build.test.classes.dir">Must set build.test.classes.dir</fail> + <fail unless="build.test.results.dir">Must set build.test.results.dir</fail> + <fail unless="build.classes.excludes">Must set build.classes.excludes</fail> + <fail unless="dist.jar">Must set dist.jar</fail> + </target> + <target name="-init-macrodef-property"> + <macrodef name="property" uri="http://www.netbeans.org/ns/j2se-project/1"> + <attribute name="name"/> + <attribute name="value"/> + <sequential> + <property name="@{name}" value="${@{value}}"/> + </sequential> + </macrodef> + </target> + <target depends="-init-ap-cmdline-properties" if="ap.supported.internal" name="-init-macrodef-javac-with-processors"> + <macrodef name="javac" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${src.dir}" name="srcdir"/> + <attribute default="${build.classes.dir}" name="destdir"/> + <attribute default="${javac.classpath}" name="classpath"/> + <attribute default="${javac.processorpath}" name="processorpath"/> + <attribute default="${build.generated.sources.dir}/ap-source-output" name="apgeneratedsrcdir"/> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="${javac.debug}" name="debug"/> + <attribute default="${empty.dir}" name="sourcepath"/> + <attribute default="${empty.dir}" name="gensrcdir"/> + <element name="customize" optional="true"/> + <sequential> + <property location="${build.dir}/empty" name="empty.dir"/> + <mkdir dir="${empty.dir}"/> + <mkdir dir="@{apgeneratedsrcdir}"/> + <javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" fork="${javac.fork}" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}"> + <src> + <dirset dir="@{gensrcdir}" erroronmissingdir="false"> + <include name="*"/> + </dirset> + </src> + <classpath> + <path path="@{classpath}"/> + </classpath> + <compilerarg line="${endorsed.classpath.cmd.line.arg}"/> + <compilerarg line="${javac.profile.cmd.line.arg}"/> + <compilerarg line="${javac.compilerargs}"/> + <compilerarg value="-processorpath"/> + <compilerarg path="@{processorpath}:${empty.dir}"/> + <compilerarg line="${ap.processors.internal}"/> + <compilerarg line="${annotation.processing.processor.options}"/> + <compilerarg value="-s"/> + <compilerarg path="@{apgeneratedsrcdir}"/> + <compilerarg line="${ap.proc.none.internal}"/> + <customize/> + </javac> + </sequential> + </macrodef> + </target> + <target depends="-init-ap-cmdline-properties" name="-init-macrodef-javac-without-processors" unless="ap.supported.internal"> + <macrodef name="javac" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${src.dir}" name="srcdir"/> + <attribute default="${build.classes.dir}" name="destdir"/> + <attribute default="${javac.classpath}" name="classpath"/> + <attribute default="${javac.processorpath}" name="processorpath"/> + <attribute default="${build.generated.sources.dir}/ap-source-output" name="apgeneratedsrcdir"/> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="${javac.debug}" name="debug"/> + <attribute default="${empty.dir}" name="sourcepath"/> + <attribute default="${empty.dir}" name="gensrcdir"/> + <element name="customize" optional="true"/> + <sequential> + <property location="${build.dir}/empty" name="empty.dir"/> + <mkdir dir="${empty.dir}"/> + <javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" fork="${javac.fork}" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}"> + <src> + <dirset dir="@{gensrcdir}" erroronmissingdir="false"> + <include name="*"/> + </dirset> + </src> + <classpath> + <path path="@{classpath}"/> + </classpath> + <compilerarg line="${endorsed.classpath.cmd.line.arg}"/> + <compilerarg line="${javac.profile.cmd.line.arg}"/> + <compilerarg line="${javac.compilerargs}"/> + <customize/> + </javac> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-javac-with-processors,-init-macrodef-javac-without-processors" name="-init-macrodef-javac"> + <macrodef name="depend" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${src.dir}" name="srcdir"/> + <attribute default="${build.classes.dir}" name="destdir"/> + <attribute default="${javac.classpath}" name="classpath"/> + <sequential> + <depend cache="${build.dir}/depcache" destdir="@{destdir}" excludes="${excludes}" includes="${includes}" srcdir="@{srcdir}"> + <classpath> + <path path="@{classpath}"/> + </classpath> + </depend> + </sequential> + </macrodef> + <macrodef name="force-recompile" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${build.classes.dir}" name="destdir"/> + <sequential> + <fail unless="javac.includes">Must set javac.includes</fail> + <pathconvert pathsep="${line.separator}" property="javac.includes.binary"> + <path> + <filelist dir="@{destdir}" files="${javac.includes}"/> + </path> + <globmapper from="*.java" to="*.class"/> + </pathconvert> + <tempfile deleteonexit="true" property="javac.includesfile.binary"/> + <echo file="${javac.includesfile.binary}" message="${javac.includes.binary}"/> + <delete> + <files includesfile="${javac.includesfile.binary}"/> + </delete> + <delete> + <fileset file="${javac.includesfile.binary}"/> + </delete> + </sequential> + </macrodef> + </target> + <target if="${junit.available}" name="-init-macrodef-junit-init"> + <condition else="false" property="nb.junit.batch" value="true"> + <and> + <istrue value="${junit.available}"/> + <not> + <isset property="test.method"/> + </not> + </and> + </condition> + <condition else="false" property="nb.junit.single" value="true"> + <and> + <istrue value="${junit.available}"/> + <isset property="test.method"/> + </and> + </condition> + </target> + <target name="-init-test-properties"> + <property name="test.binaryincludes" value="<nothing>"/> + <property name="test.binarytestincludes" value=""/> + <property name="test.binaryexcludes" value=""/> + </target> + <target if="${nb.junit.single}" name="-init-macrodef-junit-single" unless="${nb.junit.batch}"> + <macrodef name="junit" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element name="customize" optional="true"/> + <sequential> + <property name="junit.forkmode" value="perTest"/> + <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}"> + <test methods="@{testmethods}" name="@{testincludes}" todir="${build.test.results.dir}"/> + <syspropertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <formatter type="brief" usefile="false"/> + <formatter type="xml"/> + <jvmarg value="-ea"/> + <customize/> + </junit> + </sequential> + </macrodef> + </target> + <target depends="-init-test-properties" if="${nb.junit.batch}" name="-init-macrodef-junit-batch" unless="${nb.junit.single}"> + <macrodef name="junit" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element name="customize" optional="true"/> + <sequential> + <property name="junit.forkmode" value="perTest"/> + <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}"> + <batchtest todir="${build.test.results.dir}"> + <fileset dir="${build.test.classes.dir}" excludes="@{excludes},${excludes},${test.binaryexcludes}" includes="${test.binaryincludes}"> + <filename name="${test.binarytestincludes}"/> + </fileset> + </batchtest> + <syspropertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <formatter type="brief" usefile="false"/> + <formatter type="xml"/> + <jvmarg value="-ea"/> + <customize/> + </junit> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-junit-init,-init-macrodef-junit-single, -init-macrodef-junit-batch" if="${junit.available}" name="-init-macrodef-junit"/> + <target if="${testng.available}" name="-init-macrodef-testng"> + <macrodef name="testng" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element name="customize" optional="true"/> + <sequential> + <condition else="" property="testng.methods.arg" value="@{testincludes}.@{testmethods}"> + <isset property="test.method"/> + </condition> + <union id="test.set"/> + <taskdef classname="org.testng.TestNGAntTask" classpath="${run.test.classpath}" name="testng"/> + <testng classfilesetref="test.set" failureProperty="tests.failed" listeners="org.testng.reporters.VerboseReporter" methods="${testng.methods.arg}" mode="${testng.mode}" outputdir="${build.test.results.dir}" suitename="TwitterDataAnalytics" testname="TestNG tests" workingDir="${work.dir}"> + <xmlfileset dir="${build.test.classes.dir}" includes="@{testincludes}"/> + <propertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </propertyset> + <customize/> + </testng> + </sequential> + </macrodef> + </target> + <target name="-init-macrodef-test-impl"> + <macrodef name="test-impl" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element implicit="true" name="customize" optional="true"/> + <sequential> + <echo>No tests executed.</echo> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-junit" if="${junit.available}" name="-init-macrodef-junit-impl"> + <macrodef name="test-impl" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element implicit="true" name="customize" optional="true"/> + <sequential> + <j2seproject3:junit excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}"> + <customize/> + </j2seproject3:junit> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-testng" if="${testng.available}" name="-init-macrodef-testng-impl"> + <macrodef name="test-impl" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element implicit="true" name="customize" optional="true"/> + <sequential> + <j2seproject3:testng excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}"> + <customize/> + </j2seproject3:testng> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-test-impl,-init-macrodef-junit-impl,-init-macrodef-testng-impl" name="-init-macrodef-test"> + <macrodef name="test" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <sequential> + <j2seproject3:test-impl excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}"> + <customize> + <classpath> + <path path="${run.test.classpath}"/> + </classpath> + <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> + <jvmarg line="${run.jvmargs}"/> + <jvmarg line="${run.jvmargs.ide}"/> + </customize> + </j2seproject3:test-impl> + </sequential> + </macrodef> + </target> + <target if="${junit.available}" name="-init-macrodef-junit-debug" unless="${nb.junit.batch}"> + <macrodef name="junit-debug" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element name="customize" optional="true"/> + <sequential> + <property name="junit.forkmode" value="perTest"/> + <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}"> + <test methods="@{testmethods}" name="@{testincludes}" todir="${build.test.results.dir}"/> + <syspropertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <formatter type="brief" usefile="false"/> + <formatter type="xml"/> + <jvmarg value="-ea"/> + <jvmarg line="${debug-args-line}"/> + <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/> + <customize/> + </junit> + </sequential> + </macrodef> + </target> + <target depends="-init-test-properties" if="${nb.junit.batch}" name="-init-macrodef-junit-debug-batch"> + <macrodef name="junit-debug" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element name="customize" optional="true"/> + <sequential> + <property name="junit.forkmode" value="perTest"/> + <junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}"> + <batchtest todir="${build.test.results.dir}"> + <fileset dir="${build.test.classes.dir}" excludes="@{excludes},${excludes},${test.binaryexcludes}" includes="${test.binaryincludes}"> + <filename name="${test.binarytestincludes}"/> + </fileset> + </batchtest> + <syspropertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <formatter type="brief" usefile="false"/> + <formatter type="xml"/> + <jvmarg value="-ea"/> + <jvmarg line="${debug-args-line}"/> + <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/> + <customize/> + </junit> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-junit-debug,-init-macrodef-junit-debug-batch" if="${junit.available}" name="-init-macrodef-junit-debug-impl"> + <macrodef name="test-debug-impl" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <element implicit="true" name="customize" optional="true"/> + <sequential> + <j2seproject3:junit-debug excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}"> + <customize/> + </j2seproject3:junit-debug> + </sequential> + </macrodef> + </target> + <target if="${testng.available}" name="-init-macrodef-testng-debug"> + <macrodef name="testng-debug" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${main.class}" name="testClass"/> + <attribute default="" name="testMethod"/> + <element name="customize2" optional="true"/> + <sequential> + <condition else="-testclass @{testClass}" property="test.class.or.method" value="-methods @{testClass}.@{testMethod}"> + <isset property="test.method"/> + </condition> + <condition else="-suitename TwitterDataAnalytics -testname @{testClass} ${test.class.or.method}" property="testng.cmd.args" value="@{testClass}"> + <matches pattern=".*\.xml" string="@{testClass}"/> + </condition> + <delete dir="${build.test.results.dir}" quiet="true"/> + <mkdir dir="${build.test.results.dir}"/> + <j2seproject3:debug classname="org.testng.TestNG" classpath="${debug.test.classpath}"> + <customize> + <customize2/> + <jvmarg value="-ea"/> + <arg line="${testng.debug.mode}"/> + <arg line="-d ${build.test.results.dir}"/> + <arg line="-listener org.testng.reporters.VerboseReporter"/> + <arg line="${testng.cmd.args}"/> + </customize> + </j2seproject3:debug> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-testng-debug" if="${testng.available}" name="-init-macrodef-testng-debug-impl"> + <macrodef name="testng-debug-impl" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${main.class}" name="testClass"/> + <attribute default="" name="testMethod"/> + <element implicit="true" name="customize2" optional="true"/> + <sequential> + <j2seproject3:testng-debug testClass="@{testClass}" testMethod="@{testMethod}"> + <customize2/> + </j2seproject3:testng-debug> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-junit-debug-impl" if="${junit.available}" name="-init-macrodef-test-debug-junit"> + <macrodef name="test-debug" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <attribute default="${main.class}" name="testClass"/> + <attribute default="" name="testMethod"/> + <sequential> + <j2seproject3:test-debug-impl excludes="@{excludes}" includes="@{includes}" testincludes="@{testincludes}" testmethods="@{testmethods}"> + <customize> + <classpath> + <path path="${run.test.classpath}"/> + </classpath> + <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> + <jvmarg line="${run.jvmargs}"/> + <jvmarg line="${run.jvmargs.ide}"/> + </customize> + </j2seproject3:test-debug-impl> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-testng-debug-impl" if="${testng.available}" name="-init-macrodef-test-debug-testng"> + <macrodef name="test-debug" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${includes}" name="includes"/> + <attribute default="${excludes}" name="excludes"/> + <attribute default="**" name="testincludes"/> + <attribute default="" name="testmethods"/> + <attribute default="${main.class}" name="testClass"/> + <attribute default="" name="testMethod"/> + <sequential> + <j2seproject3:testng-debug-impl testClass="@{testClass}" testMethod="@{testMethod}"> + <customize2> + <syspropertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + </customize2> + </j2seproject3:testng-debug-impl> + </sequential> + </macrodef> + </target> + <target depends="-init-macrodef-test-debug-junit,-init-macrodef-test-debug-testng" name="-init-macrodef-test-debug"/> + <!-- + pre NB7.2 profiling section; consider it deprecated + --> + <target depends="-profile-pre-init, init, -profile-post-init, -profile-init-macrodef-profile, -profile-init-check" if="profiler.info.jvmargs.agent" name="profile-init"/> + <target if="profiler.info.jvmargs.agent" name="-profile-pre-init"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target if="profiler.info.jvmargs.agent" name="-profile-post-init"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target if="profiler.info.jvmargs.agent" name="-profile-init-macrodef-profile"> + <macrodef name="resolve"> + <attribute name="name"/> + <attribute name="value"/> + <sequential> + <property name="@{name}" value="${env.@{value}}"/> + </sequential> + </macrodef> + <macrodef name="profile"> + <attribute default="${main.class}" name="classname"/> + <element name="customize" optional="true"/> + <sequential> + <property environment="env"/> + <resolve name="profiler.current.path" value="${profiler.info.pathvar}"/> + <java classname="@{classname}" dir="${profiler.info.dir}" fork="true" jvm="${profiler.info.jvm}"> + <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> + <jvmarg value="${profiler.info.jvmargs.agent}"/> + <jvmarg line="${profiler.info.jvmargs}"/> + <env key="${profiler.info.pathvar}" path="${profiler.info.agentpath}:${profiler.current.path}"/> + <arg line="${application.args}"/> + <classpath> + <path path="${run.classpath}"/> + </classpath> + <syspropertyset> + <propertyref prefix="run-sys-prop."/> + <mapper from="run-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <customize/> + </java> + </sequential> + </macrodef> + </target> + <target depends="-profile-pre-init, init, -profile-post-init, -profile-init-macrodef-profile" if="profiler.info.jvmargs.agent" name="-profile-init-check"> + <fail unless="profiler.info.jvm">Must set JVM to use for profiling in profiler.info.jvm</fail> + <fail unless="profiler.info.jvmargs.agent">Must set profiler agent JVM arguments in profiler.info.jvmargs.agent</fail> + </target> + <!-- + end of pre NB7.2 profiling section + --> + <target depends="-init-debug-args" name="-init-macrodef-nbjpda"> + <macrodef name="nbjpdastart" uri="http://www.netbeans.org/ns/j2se-project/1"> + <attribute default="${main.class}" name="name"/> + <attribute default="${debug.classpath}" name="classpath"/> + <attribute default="" name="stopclassname"/> + <sequential> + <nbjpdastart addressproperty="jpda.address" name="@{name}" stopclassname="@{stopclassname}" transport="${debug-transport}"> + <classpath> + <path path="@{classpath}"/> + </classpath> + </nbjpdastart> + </sequential> + </macrodef> + <macrodef name="nbjpdareload" uri="http://www.netbeans.org/ns/j2se-project/1"> + <attribute default="${build.classes.dir}" name="dir"/> + <sequential> + <nbjpdareload> + <fileset dir="@{dir}" includes="${fix.classes}"> + <include name="${fix.includes}*.class"/> + </fileset> + </nbjpdareload> + </sequential> + </macrodef> + </target> + <target name="-init-debug-args"> + <property name="version-output" value="java version "${ant.java.version}"/> + <condition property="have-jdk-older-than-1.4"> + <or> + <contains string="${version-output}" substring="java version "1.0"/> + <contains string="${version-output}" substring="java version "1.1"/> + <contains string="${version-output}" substring="java version "1.2"/> + <contains string="${version-output}" substring="java version "1.3"/> + </or> + </condition> + <condition else="-Xdebug" property="debug-args-line" value="-Xdebug -Xnoagent -Djava.compiler=none"> + <istrue value="${have-jdk-older-than-1.4}"/> + </condition> + <condition else="dt_socket" property="debug-transport-by-os" value="dt_shmem"> + <os family="windows"/> + </condition> + <condition else="${debug-transport-by-os}" property="debug-transport" value="${debug.transport}"> + <isset property="debug.transport"/> + </condition> + </target> + <target depends="-init-debug-args" name="-init-macrodef-debug"> + <macrodef name="debug" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${main.class}" name="classname"/> + <attribute default="${debug.classpath}" name="classpath"/> + <element name="customize" optional="true"/> + <sequential> + <java classname="@{classname}" dir="${work.dir}" fork="true"> + <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> + <jvmarg line="${debug-args-line}"/> + <jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/> + <jvmarg value="-Dfile.encoding=${runtime.encoding}"/> + <redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/> + <jvmarg line="${run.jvmargs}"/> + <jvmarg line="${run.jvmargs.ide}"/> + <classpath> + <path path="@{classpath}"/> + </classpath> + <syspropertyset> + <propertyref prefix="run-sys-prop."/> + <mapper from="run-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <customize/> + </java> + </sequential> + </macrodef> + </target> + <target name="-init-macrodef-java"> + <macrodef name="java" uri="http://www.netbeans.org/ns/j2se-project/1"> + <attribute default="${main.class}" name="classname"/> + <attribute default="${run.classpath}" name="classpath"/> + <attribute default="jvm" name="jvm"/> + <element name="customize" optional="true"/> + <sequential> + <java classname="@{classname}" dir="${work.dir}" fork="true"> + <jvmarg line="${endorsed.classpath.cmd.line.arg}"/> + <jvmarg value="-Dfile.encoding=${runtime.encoding}"/> + <redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/> + <jvmarg line="${run.jvmargs}"/> + <jvmarg line="${run.jvmargs.ide}"/> + <classpath> + <path path="@{classpath}"/> + </classpath> + <syspropertyset> + <propertyref prefix="run-sys-prop."/> + <mapper from="run-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <customize/> + </java> + </sequential> + </macrodef> + </target> + <target name="-init-macrodef-copylibs"> + <macrodef name="copylibs" uri="http://www.netbeans.org/ns/j2se-project/3"> + <attribute default="${manifest.file}" name="manifest"/> + <element name="customize" optional="true"/> + <sequential> + <property location="${build.classes.dir}" name="build.classes.dir.resolved"/> + <pathconvert property="run.classpath.without.build.classes.dir"> + <path path="${run.classpath}"/> + <map from="${build.classes.dir.resolved}" to=""/> + </pathconvert> + <pathconvert pathsep=" " property="jar.classpath"> + <path path="${run.classpath.without.build.classes.dir}"/> + <chainedmapper> + <flattenmapper/> + <filtermapper> + <replacestring from=" " to="%20"/> + </filtermapper> + <globmapper from="*" to="lib/*"/> + </chainedmapper> + </pathconvert> + <taskdef classname="org.netbeans.modules.java.j2seproject.copylibstask.CopyLibs" classpath="${libs.CopyLibs.classpath}" name="copylibs"/> + <copylibs compress="${jar.compress}" excludeFromCopy="${copylibs.excludes}" index="${jar.index}" indexMetaInf="${jar.index.metainf}" jarfile="${dist.jar}" manifest="@{manifest}" rebase="${copylibs.rebase}" runtimeclasspath="${run.classpath.without.build.classes.dir}"> + <fileset dir="${build.classes.dir}" excludes="${dist.archive.excludes}"/> + <manifest> + <attribute name="Class-Path" value="${jar.classpath}"/> + <customize/> + </manifest> + </copylibs> + </sequential> + </macrodef> + </target> + <target name="-init-presetdef-jar"> + <presetdef name="jar" uri="http://www.netbeans.org/ns/j2se-project/1"> + <jar compress="${jar.compress}" index="${jar.index}" jarfile="${dist.jar}"> + <j2seproject1:fileset dir="${build.classes.dir}" excludes="${dist.archive.excludes}"/> + </jar> + </presetdef> + </target> + <target name="-init-ap-cmdline-properties"> + <property name="annotation.processing.enabled" value="true"/> + <property name="annotation.processing.processors.list" value=""/> + <property name="annotation.processing.processor.options" value=""/> + <property name="annotation.processing.run.all.processors" value="true"/> + <property name="javac.processorpath" value="${javac.classpath}"/> + <property name="javac.test.processorpath" value="${javac.test.classpath}"/> + <condition property="ap.supported.internal" value="true"> + <not> + <matches pattern="1\.[0-5](\..*)?" string="${javac.source}"/> + </not> + </condition> + </target> + <target depends="-init-ap-cmdline-properties" if="ap.supported.internal" name="-init-ap-cmdline-supported"> + <condition else="" property="ap.processors.internal" value="-processor ${annotation.processing.processors.list}"> + <isfalse value="${annotation.processing.run.all.processors}"/> + </condition> + <condition else="" property="ap.proc.none.internal" value="-proc:none"> + <isfalse value="${annotation.processing.enabled}"/> + </condition> + </target> + <target depends="-init-ap-cmdline-properties,-init-ap-cmdline-supported" name="-init-ap-cmdline"> + <property name="ap.cmd.line.internal" value=""/> + </target> + <target depends="-pre-init,-init-private,-init-libraries,-init-user,-init-project,-do-init,-post-init,-init-check,-init-macrodef-property,-init-macrodef-javac,-init-macrodef-test,-init-macrodef-test-debug,-init-macrodef-nbjpda,-init-macrodef-debug,-init-macrodef-java,-init-presetdef-jar,-init-ap-cmdline" name="init"/> + <!-- + =================== + COMPILATION SECTION + =================== + --> + <target name="-deps-jar-init" unless="built-jar.properties"> + <property location="${build.dir}/built-jar.properties" name="built-jar.properties"/> + <delete file="${built-jar.properties}" quiet="true"/> + </target> + <target if="already.built.jar.${basedir}" name="-warn-already-built-jar"> + <echo level="warn" message="Cycle detected: TwitterDataAnalytics was already built"/> + </target> + <target depends="init,-deps-jar-init" name="deps-jar" unless="no.deps"> + <mkdir dir="${build.dir}"/> + <touch file="${built-jar.properties}" verbose="false"/> + <property file="${built-jar.properties}" prefix="already.built.jar."/> + <antcall target="-warn-already-built-jar"/> + <propertyfile file="${built-jar.properties}"> + <entry key="${basedir}" value=""/> + </propertyfile> + </target> + <target depends="init,-check-automatic-build,-clean-after-automatic-build" name="-verify-automatic-build"/> + <target depends="init" name="-check-automatic-build"> + <available file="${build.classes.dir}/.netbeans_automatic_build" property="netbeans.automatic.build"/> + </target> + <target depends="init" if="netbeans.automatic.build" name="-clean-after-automatic-build"> + <antcall target="clean"/> + </target> + <target depends="init,deps-jar" name="-pre-pre-compile"> + <mkdir dir="${build.classes.dir}"/> + </target> + <target name="-pre-compile"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target if="do.depend.true" name="-compile-depend"> + <pathconvert property="build.generated.subdirs"> + <dirset dir="${build.generated.sources.dir}" erroronmissingdir="false"> + <include name="*"/> + </dirset> + </pathconvert> + <j2seproject3:depend srcdir="${src.dir}:${build.generated.subdirs}"/> + </target> + <target depends="init,deps-jar,-pre-pre-compile,-pre-compile, -copy-persistence-xml,-compile-depend" if="have.sources" name="-do-compile"> + <j2seproject3:javac gensrcdir="${build.generated.sources.dir}"/> + <copy todir="${build.classes.dir}"> + <fileset dir="${src.dir}" excludes="${build.classes.excludes},${excludes}" includes="${includes}"/> + </copy> + </target> + <target if="has.persistence.xml" name="-copy-persistence-xml"> + <mkdir dir="${build.classes.dir}/META-INF"/> + <copy todir="${build.classes.dir}/META-INF"> + <fileset dir="${meta.inf.dir}" includes="persistence.xml orm.xml"/> + </copy> + </target> + <target name="-post-compile"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,deps-jar,-verify-automatic-build,-pre-pre-compile,-pre-compile,-do-compile,-post-compile" description="Compile project." name="compile"/> + <target name="-pre-compile-single"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,deps-jar,-pre-pre-compile" name="-do-compile-single"> + <fail unless="javac.includes">Must select some files in the IDE or set javac.includes</fail> + <j2seproject3:force-recompile/> + <j2seproject3:javac excludes="" gensrcdir="${build.generated.sources.dir}" includes="${javac.includes}" sourcepath="${src.dir}"/> + </target> + <target name="-post-compile-single"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,deps-jar,-verify-automatic-build,-pre-pre-compile,-pre-compile-single,-do-compile-single,-post-compile-single" name="compile-single"/> + <!-- + ==================== + JAR BUILDING SECTION + ==================== + --> + <target depends="init" name="-pre-pre-jar"> + <dirname file="${dist.jar}" property="dist.jar.dir"/> + <mkdir dir="${dist.jar.dir}"/> + </target> + <target name="-pre-jar"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init" if="do.archive" name="-do-jar-create-manifest" unless="manifest.available"> + <tempfile deleteonexit="true" destdir="${build.dir}" property="tmp.manifest.file"/> + <touch file="${tmp.manifest.file}" verbose="false"/> + </target> + <target depends="init" if="do.archive+manifest.available" name="-do-jar-copy-manifest"> + <tempfile deleteonexit="true" destdir="${build.dir}" property="tmp.manifest.file"/> + <copy file="${manifest.file}" tofile="${tmp.manifest.file}"/> + </target> + <target depends="init,-do-jar-create-manifest,-do-jar-copy-manifest" if="do.archive+main.class.available" name="-do-jar-set-mainclass"> + <manifest file="${tmp.manifest.file}" mode="update"> + <attribute name="Main-Class" value="${main.class}"/> + </manifest> + </target> + <target depends="init,-do-jar-create-manifest,-do-jar-copy-manifest" if="do.archive+profile.available" name="-do-jar-set-profile"> + <manifest file="${tmp.manifest.file}" mode="update"> + <attribute name="Profile" value="${javac.profile}"/> + </manifest> + </target> + <target depends="init,-do-jar-create-manifest,-do-jar-copy-manifest" if="do.archive+splashscreen.available" name="-do-jar-set-splashscreen"> + <basename file="${application.splash}" property="splashscreen.basename"/> + <mkdir dir="${build.classes.dir}/META-INF"/> + <copy failonerror="false" file="${application.splash}" todir="${build.classes.dir}/META-INF"/> + <manifest file="${tmp.manifest.file}" mode="update"> + <attribute name="SplashScreen-Image" value="META-INF/${splashscreen.basename}"/> + </manifest> + </target> + <target depends="init,-init-macrodef-copylibs,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen" if="do.mkdist" name="-do-jar-copylibs"> + <j2seproject3:copylibs manifest="${tmp.manifest.file}"/> + <echo level="info">To run this application from the command line without Ant, try:</echo> + <property location="${dist.jar}" name="dist.jar.resolved"/> + <echo level="info">java -jar "${dist.jar.resolved}"</echo> + </target> + <target depends="init,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen" if="do.archive" name="-do-jar-jar" unless="do.mkdist"> + <j2seproject1:jar manifest="${tmp.manifest.file}"/> + <property location="${build.classes.dir}" name="build.classes.dir.resolved"/> + <property location="${dist.jar}" name="dist.jar.resolved"/> + <pathconvert property="run.classpath.with.dist.jar"> + <path path="${run.classpath}"/> + <map from="${build.classes.dir.resolved}" to="${dist.jar.resolved}"/> + </pathconvert> + <condition else="" property="jar.usage.message" value="To run this application from the command line without Ant, try:${line.separator}${platform.java} -cp ${run.classpath.with.dist.jar} ${main.class}"> + <isset property="main.class.available"/> + </condition> + <condition else="debug" property="jar.usage.level" value="info"> + <isset property="main.class.available"/> + </condition> + <echo level="${jar.usage.level}" message="${jar.usage.message}"/> + </target> + <target depends="-do-jar-copylibs" if="do.archive" name="-do-jar-delete-manifest"> + <delete> + <fileset file="${tmp.manifest.file}"/> + </delete> + </target> + <target depends="init,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen,-do-jar-jar,-do-jar-delete-manifest" name="-do-jar-without-libraries"/> + <target depends="init,compile,-pre-pre-jar,-pre-jar,-do-jar-create-manifest,-do-jar-copy-manifest,-do-jar-set-mainclass,-do-jar-set-profile,-do-jar-set-splashscreen,-do-jar-copylibs,-do-jar-delete-manifest" name="-do-jar-with-libraries"/> + <target name="-post-jar"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,compile,-pre-jar,-do-jar-without-libraries,-do-jar-with-libraries,-post-jar" name="-do-jar"/> + <target depends="init,compile,-pre-jar,-do-jar,-post-jar" description="Build JAR." name="jar"/> + <!-- + ================= + EXECUTION SECTION + ================= + --> + <target depends="init,compile" description="Run a main class." name="run"> + <j2seproject1:java> + <customize> + <arg line="${application.args}"/> + </customize> + </j2seproject1:java> + </target> + <target name="-do-not-recompile"> + <property name="javac.includes.binary" value=""/> + </target> + <target depends="init,compile-single" name="run-single"> + <fail unless="run.class">Must select one file in the IDE or set run.class</fail> + <j2seproject1:java classname="${run.class}"/> + </target> + <target depends="init,compile-test-single" name="run-test-with-main"> + <fail unless="run.class">Must select one file in the IDE or set run.class</fail> + <j2seproject1:java classname="${run.class}" classpath="${run.test.classpath}"/> + </target> + <!-- + ================= + DEBUGGING SECTION + ================= + --> + <target depends="init" if="netbeans.home" name="-debug-start-debugger"> + <j2seproject1:nbjpdastart name="${debug.class}"/> + </target> + <target depends="init" if="netbeans.home" name="-debug-start-debugger-main-test"> + <j2seproject1:nbjpdastart classpath="${debug.test.classpath}" name="${debug.class}"/> + </target> + <target depends="init,compile" name="-debug-start-debuggee"> + <j2seproject3:debug> + <customize> + <arg line="${application.args}"/> + </customize> + </j2seproject3:debug> + </target> + <target depends="init,compile,-debug-start-debugger,-debug-start-debuggee" description="Debug project in IDE." if="netbeans.home" name="debug"/> + <target depends="init" if="netbeans.home" name="-debug-start-debugger-stepinto"> + <j2seproject1:nbjpdastart stopclassname="${main.class}"/> + </target> + <target depends="init,compile,-debug-start-debugger-stepinto,-debug-start-debuggee" if="netbeans.home" name="debug-stepinto"/> + <target depends="init,compile-single" if="netbeans.home" name="-debug-start-debuggee-single"> + <fail unless="debug.class">Must select one file in the IDE or set debug.class</fail> + <j2seproject3:debug classname="${debug.class}"/> + </target> + <target depends="init,compile-single,-debug-start-debugger,-debug-start-debuggee-single" if="netbeans.home" name="debug-single"/> + <target depends="init,compile-test-single" if="netbeans.home" name="-debug-start-debuggee-main-test"> + <fail unless="debug.class">Must select one file in the IDE or set debug.class</fail> + <j2seproject3:debug classname="${debug.class}" classpath="${debug.test.classpath}"/> + </target> + <target depends="init,compile-test-single,-debug-start-debugger-main-test,-debug-start-debuggee-main-test" if="netbeans.home" name="debug-test-with-main"/> + <target depends="init" name="-pre-debug-fix"> + <fail unless="fix.includes">Must set fix.includes</fail> + <property name="javac.includes" value="${fix.includes}.java"/> + </target> + <target depends="init,-pre-debug-fix,compile-single" if="netbeans.home" name="-do-debug-fix"> + <j2seproject1:nbjpdareload/> + </target> + <target depends="init,-pre-debug-fix,-do-debug-fix" if="netbeans.home" name="debug-fix"/> + <!-- + ================= + PROFILING SECTION + ================= + --> + <!-- + pre NB7.2 profiler integration + --> + <target depends="profile-init,compile" description="Profile a project in the IDE." if="profiler.info.jvmargs.agent" name="-profile-pre72"> + <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail> + <nbprofiledirect> + <classpath> + <path path="${run.classpath}"/> + </classpath> + </nbprofiledirect> + <profile/> + </target> + <target depends="profile-init,compile-single" description="Profile a selected class in the IDE." if="profiler.info.jvmargs.agent" name="-profile-single-pre72"> + <fail unless="profile.class">Must select one file in the IDE or set profile.class</fail> + <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail> + <nbprofiledirect> + <classpath> + <path path="${run.classpath}"/> + </classpath> + </nbprofiledirect> + <profile classname="${profile.class}"/> + </target> + <target depends="profile-init,compile-single" if="profiler.info.jvmargs.agent" name="-profile-applet-pre72"> + <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail> + <nbprofiledirect> + <classpath> + <path path="${run.classpath}"/> + </classpath> + </nbprofiledirect> + <profile classname="sun.applet.AppletViewer"> + <customize> + <arg value="${applet.url}"/> + </customize> + </profile> + </target> + <target depends="profile-init,compile-test-single" if="profiler.info.jvmargs.agent" name="-profile-test-single-pre72"> + <fail unless="netbeans.home">This target only works when run from inside the NetBeans IDE.</fail> + <nbprofiledirect> + <classpath> + <path path="${run.test.classpath}"/> + </classpath> + </nbprofiledirect> + <junit dir="${profiler.info.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" jvm="${profiler.info.jvm}" showoutput="true"> + <env key="${profiler.info.pathvar}" path="${profiler.info.agentpath}:${profiler.current.path}"/> + <jvmarg value="${profiler.info.jvmargs.agent}"/> + <jvmarg line="${profiler.info.jvmargs}"/> + <test name="${profile.class}"/> + <classpath> + <path path="${run.test.classpath}"/> + </classpath> + <syspropertyset> + <propertyref prefix="test-sys-prop."/> + <mapper from="test-sys-prop.*" to="*" type="glob"/> + </syspropertyset> + <formatter type="brief" usefile="false"/> + <formatter type="xml"/> + </junit> + </target> + <!-- + end of pre NB72 profiling section + --> + <target if="netbeans.home" name="-profile-check"> + <condition property="profiler.configured"> + <or> + <contains casesensitive="true" string="${run.jvmargs.ide}" substring="-agentpath:"/> + <contains casesensitive="true" string="${run.jvmargs.ide}" substring="-javaagent:"/> + </or> + </condition> + </target> + <target depends="-profile-check,-profile-pre72" description="Profile a project in the IDE." if="profiler.configured" name="profile" unless="profiler.info.jvmargs.agent"> + <startprofiler/> + <antcall target="run"/> + </target> + <target depends="-profile-check,-profile-single-pre72" description="Profile a selected class in the IDE." if="profiler.configured" name="profile-single" unless="profiler.info.jvmargs.agent"> + <fail unless="run.class">Must select one file in the IDE or set run.class</fail> + <startprofiler/> + <antcall target="run-single"/> + </target> + <target depends="-profile-test-single-pre72" description="Profile a selected test in the IDE." name="profile-test-single"/> + <target depends="-profile-check" description="Profile a selected test in the IDE." if="profiler.configured" name="profile-test" unless="profiler.info.jvmargs"> + <fail unless="test.includes">Must select some files in the IDE or set test.includes</fail> + <startprofiler/> + <antcall target="test-single"/> + </target> + <target depends="-profile-check" description="Profile a selected class in the IDE." if="profiler.configured" name="profile-test-with-main"> + <fail unless="run.class">Must select one file in the IDE or set run.class</fail> + <startprofiler/> + <antcal target="run-test-with-main"/> + </target> + <target depends="-profile-check,-profile-applet-pre72" if="profiler.configured" name="profile-applet" unless="profiler.info.jvmargs.agent"> + <fail unless="applet.url">Must select one file in the IDE or set applet.url</fail> + <startprofiler/> + <antcall target="run-applet"/> + </target> + <!-- + =============== + JAVADOC SECTION + =============== + --> + <target depends="init" if="have.sources" name="-javadoc-build"> + <mkdir dir="${dist.javadoc.dir}"/> + <condition else="" property="javadoc.endorsed.classpath.cmd.line.arg" value="-J${endorsed.classpath.cmd.line.arg}"> + <and> + <isset property="endorsed.classpath.cmd.line.arg"/> + <not> + <equals arg1="${endorsed.classpath.cmd.line.arg}" arg2=""/> + </not> + </and> + </condition> + <javadoc additionalparam="${javadoc.additionalparam}" author="${javadoc.author}" charset="UTF-8" destdir="${dist.javadoc.dir}" docencoding="UTF-8" encoding="${javadoc.encoding.used}" failonerror="true" noindex="${javadoc.noindex}" nonavbar="${javadoc.nonavbar}" notree="${javadoc.notree}" private="${javadoc.private}" source="${javac.source}" splitindex="${javadoc.splitindex}" use="${javadoc.use}" useexternalfile="true" version="${javadoc.version}" windowtitle="${javadoc.windowtitle}"> + <classpath> + <path path="${javac.classpath}"/> + </classpath> + <fileset dir="${src.dir}" excludes="*.java,${excludes}" includes="${includes}"> + <filename name="**/*.java"/> + </fileset> + <fileset dir="${build.generated.sources.dir}" erroronmissingdir="false"> + <include name="**/*.java"/> + <exclude name="*.java"/> + </fileset> + <arg line="${javadoc.endorsed.classpath.cmd.line.arg}"/> + </javadoc> + <copy todir="${dist.javadoc.dir}"> + <fileset dir="${src.dir}" excludes="${excludes}" includes="${includes}"> + <filename name="**/doc-files/**"/> + </fileset> + <fileset dir="${build.generated.sources.dir}" erroronmissingdir="false"> + <include name="**/doc-files/**"/> + </fileset> + </copy> + </target> + <target depends="init,-javadoc-build" if="netbeans.home" name="-javadoc-browse" unless="no.javadoc.preview"> + <nbbrowse file="${dist.javadoc.dir}/index.html"/> + </target> + <target depends="init,-javadoc-build,-javadoc-browse" description="Build Javadoc." name="javadoc"/> + <!-- + ========================= + TEST COMPILATION SECTION + ========================= + --> + <target depends="init,compile" if="have.tests" name="-pre-pre-compile-test"> + <mkdir dir="${build.test.classes.dir}"/> + </target> + <target name="-pre-compile-test"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target if="do.depend.true" name="-compile-test-depend"> + <j2seproject3:depend classpath="${javac.test.classpath}" destdir="${build.test.classes.dir}" srcdir=""/> + </target> + <target depends="init,deps-jar,compile,-pre-pre-compile-test,-pre-compile-test,-compile-test-depend" if="have.tests" name="-do-compile-test"> + <j2seproject3:javac apgeneratedsrcdir="${build.test.classes.dir}" classpath="${javac.test.classpath}" debug="true" destdir="${build.test.classes.dir}" processorpath="${javac.test.processorpath}" srcdir=""/> + <copy todir="${build.test.classes.dir}"/> + </target> + <target name="-post-compile-test"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,compile,-pre-pre-compile-test,-pre-compile-test,-do-compile-test,-post-compile-test" name="compile-test"/> + <target name="-pre-compile-test-single"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,deps-jar,compile,-pre-pre-compile-test,-pre-compile-test-single" if="have.tests" name="-do-compile-test-single"> + <fail unless="javac.includes">Must select some files in the IDE or set javac.includes</fail> + <j2seproject3:force-recompile destdir="${build.test.classes.dir}"/> + <j2seproject3:javac apgeneratedsrcdir="${build.test.classes.dir}" classpath="${javac.test.classpath}" debug="true" destdir="${build.test.classes.dir}" excludes="" includes="${javac.includes}" processorpath="${javac.test.processorpath}" sourcepath="" srcdir=""/> + <copy todir="${build.test.classes.dir}"/> + </target> + <target name="-post-compile-test-single"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,compile,-pre-pre-compile-test,-pre-compile-test-single,-do-compile-test-single,-post-compile-test-single" name="compile-test-single"/> + <!-- + ======================= + TEST EXECUTION SECTION + ======================= + --> + <target depends="init" if="have.tests" name="-pre-test-run"> + <mkdir dir="${build.test.results.dir}"/> + </target> + <target depends="init,compile-test,-pre-test-run" if="have.tests" name="-do-test-run"> + <j2seproject3:test testincludes="**/*Test.java"/> + </target> + <target depends="init,compile-test,-pre-test-run,-do-test-run" if="have.tests" name="-post-test-run"> + <fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail> + </target> + <target depends="init" if="have.tests" name="test-report"/> + <target depends="init" if="netbeans.home+have.tests" name="-test-browse"/> + <target depends="init,compile-test,-pre-test-run,-do-test-run,test-report,-post-test-run,-test-browse" description="Run unit tests." name="test"/> + <target depends="init" if="have.tests" name="-pre-test-run-single"> + <mkdir dir="${build.test.results.dir}"/> + </target> + <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-do-test-run-single"> + <fail unless="test.includes">Must select some files in the IDE or set test.includes</fail> + <j2seproject3:test excludes="" includes="${test.includes}" testincludes="${test.includes}"/> + </target> + <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single" if="have.tests" name="-post-test-run-single"> + <fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail> + </target> + <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single,-post-test-run-single" description="Run single unit test." name="test-single"/> + <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-do-test-run-single-method"> + <fail unless="test.class">Must select some files in the IDE or set test.class</fail> + <fail unless="test.method">Must select some method in the IDE or set test.method</fail> + <j2seproject3:test excludes="" includes="${javac.includes}" testincludes="${test.class}" testmethods="${test.method}"/> + </target> + <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single-method" if="have.tests" name="-post-test-run-single-method"> + <fail if="tests.failed" unless="ignore.failing.tests">Some tests failed; see details above.</fail> + </target> + <target depends="init,compile-test-single,-pre-test-run-single,-do-test-run-single-method,-post-test-run-single-method" description="Run single unit test." name="test-single-method"/> + <!-- + ======================= + TEST DEBUGGING SECTION + ======================= + --> + <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-debug-start-debuggee-test"> + <fail unless="test.class">Must select one file in the IDE or set test.class</fail> + <j2seproject3:test-debug excludes="" includes="${javac.includes}" testClass="${test.class}" testincludes="${javac.includes}"/> + </target> + <target depends="init,compile-test-single,-pre-test-run-single" if="have.tests" name="-debug-start-debuggee-test-method"> + <fail unless="test.class">Must select one file in the IDE or set test.class</fail> + <fail unless="test.method">Must select some method in the IDE or set test.method</fail> + <j2seproject3:test-debug excludes="" includes="${javac.includes}" testClass="${test.class}" testMethod="${test.method}" testincludes="${test.class}" testmethods="${test.method}"/> + </target> + <target depends="init,compile-test" if="netbeans.home+have.tests" name="-debug-start-debugger-test"> + <j2seproject1:nbjpdastart classpath="${debug.test.classpath}" name="${test.class}"/> + </target> + <target depends="init,compile-test-single,-debug-start-debugger-test,-debug-start-debuggee-test" name="debug-test"/> + <target depends="init,compile-test-single,-debug-start-debugger-test,-debug-start-debuggee-test-method" name="debug-test-method"/> + <target depends="init,-pre-debug-fix,compile-test-single" if="netbeans.home" name="-do-debug-fix-test"> + <j2seproject1:nbjpdareload dir="${build.test.classes.dir}"/> + </target> + <target depends="init,-pre-debug-fix,-do-debug-fix-test" if="netbeans.home" name="debug-fix-test"/> + <!-- + ========================= + APPLET EXECUTION SECTION + ========================= + --> + <target depends="init,compile-single" name="run-applet"> + <fail unless="applet.url">Must select one file in the IDE or set applet.url</fail> + <j2seproject1:java classname="sun.applet.AppletViewer"> + <customize> + <arg value="${applet.url}"/> + </customize> + </j2seproject1:java> + </target> + <!-- + ========================= + APPLET DEBUGGING SECTION + ========================= + --> + <target depends="init,compile-single" if="netbeans.home" name="-debug-start-debuggee-applet"> + <fail unless="applet.url">Must select one file in the IDE or set applet.url</fail> + <j2seproject3:debug classname="sun.applet.AppletViewer"> + <customize> + <arg value="${applet.url}"/> + </customize> + </j2seproject3:debug> + </target> + <target depends="init,compile-single,-debug-start-debugger,-debug-start-debuggee-applet" if="netbeans.home" name="debug-applet"/> + <!-- + =============== + CLEANUP SECTION + =============== + --> + <target name="-deps-clean-init" unless="built-clean.properties"> + <property location="${build.dir}/built-clean.properties" name="built-clean.properties"/> + <delete file="${built-clean.properties}" quiet="true"/> + </target> + <target if="already.built.clean.${basedir}" name="-warn-already-built-clean"> + <echo level="warn" message="Cycle detected: TwitterDataAnalytics was already built"/> + </target> + <target depends="init,-deps-clean-init" name="deps-clean" unless="no.deps"> + <mkdir dir="${build.dir}"/> + <touch file="${built-clean.properties}" verbose="false"/> + <property file="${built-clean.properties}" prefix="already.built.clean."/> + <antcall target="-warn-already-built-clean"/> + <propertyfile file="${built-clean.properties}"> + <entry key="${basedir}" value=""/> + </propertyfile> + </target> + <target depends="init" name="-do-clean"> + <delete dir="${build.dir}"/> + <delete dir="${dist.dir}" followsymlinks="false" includeemptydirs="true"/> + </target> + <target name="-post-clean"> + <!-- Empty placeholder for easier customization. --> + <!-- You can override this target in the ../build.xml file. --> + </target> + <target depends="init,deps-clean,-do-clean,-post-clean" description="Clean build products." name="clean"/> + <target name="-check-call-dep"> + <property file="${call.built.properties}" prefix="already.built."/> + <condition property="should.call.dep"> + <and> + <not> + <isset property="already.built.${call.subproject}"/> + </not> + <available file="${call.script}"/> + </and> + </condition> + </target> + <target depends="-check-call-dep" if="should.call.dep" name="-maybe-call-dep"> + <ant antfile="${call.script}" inheritall="false" target="${call.target}"> + <propertyset> + <propertyref prefix="transfer."/> + <mapper from="transfer.*" to="*" type="glob"/> + </propertyset> + </ant> + </target> +</project> diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties new file mode 100644 index 0000000..b42a5d3 --- /dev/null +++ b/nbproject/genfiles.properties @@ -0,0 +1,8 @@ +build.xml.data.CRC32=72787bde +build.xml.script.CRC32=57d18e43 +build.xml.stylesheet.CRC32=8064a381@1.68.1.46 +# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. +# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. +nbproject/build-impl.xml.data.CRC32=72787bde +nbproject/build-impl.xml.script.CRC32=4304d30d +nbproject/build-impl.xml.stylesheet.CRC32=5a01deb7@1.68.1.46 diff --git a/nbproject/project.properties b/nbproject/project.properties new file mode 100644 index 0000000..e32b494 --- /dev/null +++ b/nbproject/project.properties @@ -0,0 +1,135 @@ +annotation.processing.enabled=true
+annotation.processing.enabled.in.editor=false
+annotation.processing.processors.list=
+annotation.processing.run.all.processors=true
+annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=TwitterDataAnalytics
+application.vendor=skumar34
+build.classes.dir=${build.dir}/classes
+build.classes.excludes=**/*.java,**/*.form
+# This directory is removed when the project is cleaned:
+build.dir=build
+build.generated.dir=${build.dir}/generated
+build.generated.sources.dir=${build.dir}/generated-sources
+# Only compile against the classpath explicitly listed here:
+build.sysclasspath=ignore
+build.test.classes.dir=${build.dir}/test/classes
+build.test.results.dir=${build.dir}/test/results
+# Uncomment to specify the preferred debugger connection transport:
+#debug.transport=dt_socket
+debug.classpath=\
+ ${run.classpath}
+debug.test.classpath=\
+ ${run.test.classpath}
+# This directory is removed when the project is cleaned:
+dist.dir=dist
+dist.jar=${dist.dir}/TwitterDataAnalytics.jar
+dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
+excludes=
+file.reference.collections-generic-4.01.jar=lib/collections-generic-4.01.jar
+file.reference.colt-1.2.0.jar=lib/colt-1.2.0.jar
+file.reference.commons-codec-1.7.jar=lib/commons-codec-1.7.jar
+file.reference.commons-httpclient-3.1_1.jar=lib/commons-httpclient-3.1_1.jar
+file.reference.commons-lang-2.6.jar=lib/commons-lang-2.6.jar
+file.reference.commons-logging-1.1.1.jar=lib/commons-logging-1.1.1.jar
+file.reference.concurrent-1.3.4.jar=lib/concurrent-1.3.4.jar
+file.reference.gson-2.2.4.jar=lib/gson-2.2.4.jar
+file.reference.httpclient-4.2.1.jar=lib/httpclient-4.2.1.jar
+file.reference.httpcore-4.2.1.jar=lib/httpcore-4.2.1.jar
+file.reference.j3d-core-1.3.1.jar=lib/j3d-core-1.3.1.jar
+file.reference.jfig-1.5.2.jar=lib/jfig-1.5.2.jar
+file.reference.json.jar=lib/json.jar
+file.reference.jung-3d-2.0.1.jar=lib/jung-3d-2.0.1.jar
+file.reference.jung-3d-demos-2.0.1.jar=lib/jung-3d-demos-2.0.1.jar
+file.reference.jung-algorithms-2.0.1.jar=lib/jung-algorithms-2.0.1.jar
+file.reference.jung-api-2.0.1.jar=lib/jung-api-2.0.1.jar
+file.reference.jung-graph-impl-2.0.1.jar=lib/jung-graph-impl-2.0.1.jar
+file.reference.jung-io-2.0.1.jar=lib/jung-io-2.0.1.jar
+file.reference.jung-jai-2.0.1.jar=lib/jung-jai-2.0.1.jar
+file.reference.jung-jai-samples-2.0.1.jar=lib/jung-jai-samples-2.0.1.jar
+file.reference.jung-samples-2.0.1.jar=lib/jung-samples-2.0.1.jar
+file.reference.jung-visualization-2.0.1.jar=lib/jung-visualization-2.0.1.jar
+file.reference.log4j-1.2.15.jar=lib/log4j-1.2.15.jar
+file.reference.mallet-deps.jar=lib/mallet-deps.jar
+file.reference.mallet.jar=lib/mallet.jar
+file.reference.signpost-commonshttp4-1.2.1.2.jar=lib/signpost-commonshttp4-1.2.1.2.jar
+file.reference.signpost-core-1.2.1.2.jar=lib/signpost-core-1.2.1.2.jar
+file.reference.stax-api-1.0.1.jar=lib/stax-api-1.0.1.jar
+file.reference.TwitterDataAnalytics-src=src
+file.reference.vecmath-1.3.1.jar=lib/vecmath-1.3.1.jar
+file.reference.wstx-asl-3.2.6.jar=lib/wstx-asl-3.2.6.jar
+includes=**
+jar.compress=false
+javac.classpath=\
+ ${file.reference.collections-generic-4.01.jar}:\
+ ${file.reference.colt-1.2.0.jar}:\
+ ${file.reference.commons-codec-1.7.jar}:\
+ ${file.reference.commons-httpclient-3.1_1.jar}:\
+ ${file.reference.commons-lang-2.6.jar}:\
+ ${file.reference.commons-logging-1.1.1.jar}:\
+ ${file.reference.concurrent-1.3.4.jar}:\
+ ${file.reference.gson-2.2.4.jar}:\
+ ${file.reference.httpclient-4.2.1.jar}:\
+ ${file.reference.httpcore-4.2.1.jar}:\
+ ${file.reference.j3d-core-1.3.1.jar}:\
+ ${file.reference.jfig-1.5.2.jar}:\
+ ${file.reference.json.jar}:\
+ ${file.reference.jung-3d-2.0.1.jar}:\
+ ${file.reference.jung-3d-demos-2.0.1.jar}:\
+ ${file.reference.jung-algorithms-2.0.1.jar}:\
+ ${file.reference.jung-api-2.0.1.jar}:\
+ ${file.reference.jung-graph-impl-2.0.1.jar}:\
+ ${file.reference.jung-io-2.0.1.jar}:\
+ ${file.reference.jung-jai-2.0.1.jar}:\
+ ${file.reference.jung-jai-samples-2.0.1.jar}:\
+ ${file.reference.jung-samples-2.0.1.jar}:\
+ ${file.reference.jung-visualization-2.0.1.jar}:\
+ ${file.reference.log4j-1.2.15.jar}:\
+ ${file.reference.mallet-deps.jar}:\
+ ${file.reference.mallet.jar}:\
+ ${file.reference.signpost-commonshttp4-1.2.1.2.jar}:\
+ ${file.reference.signpost-core-1.2.1.2.jar}:\
+ ${file.reference.stax-api-1.0.1.jar}:\
+ ${file.reference.vecmath-1.3.1.jar}:\
+ ${file.reference.wstx-asl-3.2.6.jar}
+# Space-separated list of extra javac options
+javac.compilerargs=
+javac.deprecation=false
+javac.processorpath=\
+ ${javac.classpath}
+javac.source=1.7
+javac.target=1.7
+javac.test.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+javac.test.processorpath=\
+ ${javac.test.classpath}
+javadoc.additionalparam=
+javadoc.author=false
+javadoc.encoding=${source.encoding}
+javadoc.noindex=false
+javadoc.nonavbar=false
+javadoc.notree=false
+javadoc.private=false
+javadoc.splitindex=true
+javadoc.use=true
+javadoc.version=false
+javadoc.windowtitle=
+main.class=
+manifest.file=manifest.mf
+meta.inf.dir=${src.dir}/META-INF
+mkdist.disabled=false
+platform.active=default_platform
+run.classpath=\
+ ${javac.classpath}:\
+ ${build.classes.dir}
+# Space-separated list of JVM arguments used when running the project.
+# You may also define separate properties like run-sys-prop.name=value instead of -Dname=value.
+# To set system properties for unit tests define test-sys-prop.name=value:
+run.jvmargs=
+run.test.classpath=\
+ ${javac.test.classpath}:\
+ ${build.test.classes.dir}
+source.encoding=UTF-8
+src.dir=${file.reference.TwitterDataAnalytics-src}
diff --git a/nbproject/project.xml b/nbproject/project.xml new file mode 100644 index 0000000..c85b6f7 --- /dev/null +++ b/nbproject/project.xml @@ -0,0 +1,16 @@ +<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://www.netbeans.org/ns/project/1">
+ <type>org.netbeans.modules.java.j2seproject</type>
+ <configuration>
+ <data xmlns="http://www.netbeans.org/ns/j2se-project/3">
+ <name>TwitterDataAnalytics</name>
+ <source-roots>
+ <root id="src.dir"/>
+ </source-roots>
+ <test-roots/>
+ </data>
+ <libraries xmlns="http://www.netbeans.org/ns/ant-project-libraries/1">
+ <definitions>.\lib\nblibraries.properties</definitions>
+ </libraries>
+ </configuration>
+</project>
diff --git a/src/Chapter2/Location/LocationTranslationExample.java b/src/Chapter2/Location/LocationTranslationExample.java new file mode 100644 index 0000000..69178dc --- /dev/null +++ b/src/Chapter2/Location/LocationTranslationExample.java @@ -0,0 +1,124 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.Location; + +import Chapter2.support.Location; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLEncoder; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; + +public class LocationTranslationExample +{ + + /** + * Translates a location string to coordinates using the database or Nominatim Service + * @param loc + * @return + */ + public Location TranslateLoc(String loc) + { + if(loc!=null&&!loc.isEmpty()) + { + String encodedLoc=""; + try { + //Step 1: Encode the location name + encodedLoc = URLEncoder.encode(loc, "UTF-8"); + } catch (UnsupportedEncodingException ex) { + Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex); + } + //Step 2: Create a get request to MapQuest API with the name of the location + String url= "http://open.mapquestapi.com/nominatim/v1/search?q="+encodedLoc+"&format=json"; + String page = ReadHTML(url); + if(page!=null) + { + try{ + JSONArray results = new JSONArray(page); + if(results.length()>0) + { + //Step 3: Read and extract the coordinates of the location as a JSONObject + Location loca = new Location(results.getJSONObject(0).getDouble("lat"),results.getJSONObject(0).getDouble("lon")); + return loca; + } + }catch(JSONException ex) + { + Logger.getLogger(LocationTranslationExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + } + return null; + } + + /** + * Extracts the html content of a URL + * @param url + * @return html page + */ + public String ReadHTML(String url) + { + URLConnection conn = null; + URL theURL = null; + try + { + theURL = new URL(url); + } + catch ( MalformedURLException e) + { + System.out.println("Bad URL: " + theURL); + return null; + } + String page = ""; + try + { + conn = theURL.openConnection(); + HttpURLConnection huc = (HttpURLConnection) conn; + conn.setConnectTimeout(2000); + huc.setRequestProperty("User-Agent", "Mozilla/4.5"); + //Set your email address in the request so MapQuest knows how to reach you in the event of problems + huc.setRequestProperty("Email", "twitterdataanalytics@gmail.com"); + if(huc.getResponseCode()>=400&&huc.getResponseCode()<=404) + { + return null; + } + conn.connect(); + BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) conn.getContent())); + String temp=null; + while( (temp= bRead.readLine())!=null) + { + page = page+"\n"+temp; + } + bRead.close(); + } + catch (IOException e) { + //System.out.print("ReadHTML IO Error:" + e.getMessage()+" \n"); + return null; + } + return page; + } + + public static void main(String[] args) + { + LocationTranslationExample lte = new LocationTranslationExample(); + if(args!=null) + { + if(args.length>0) + { + for(int i=0;i<args.length;i++) + { + System.out.println(lte.TranslateLoc(args[i]).toString()); + } + } + } + } +} diff --git a/src/Chapter2/openauthentication/OAuthExample.java b/src/Chapter2/openauthentication/OAuthExample.java new file mode 100644 index 0000000..9b2ec7a --- /dev/null +++ b/src/Chapter2/openauthentication/OAuthExample.java @@ -0,0 +1,79 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.openauthentication; + +import Chapter2.support.OAuthTokenSecret; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import oauth.signpost.OAuth; +import oauth.signpost.OAuthConsumer; +import oauth.signpost.OAuthProvider; +import oauth.signpost.basic.DefaultOAuthProvider; +import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer; +import oauth.signpost.exception.OAuthCommunicationException; +import oauth.signpost.exception.OAuthExpectationFailedException; +import oauth.signpost.exception.OAuthMessageSignerException; +import oauth.signpost.exception.OAuthNotAuthorizedException; +import utils.OAuthUtils; + +public class OAuthExample +{ + public OAuthTokenSecret GetUserAccessKeySecret() + { + try { + //consumer key for Twitter Data Analytics application + if(OAuthUtils.CONSUMER_KEY.isEmpty()) + { + System.out.println("Register an application and copy the consumer key into the configuration file."); + return null; + } + if(OAuthUtils.CONSUMER_SECRET.isEmpty()) + { + System.out.println("Register an application and copy the consumer secret into the configuration file."); + return null; + } + OAuthConsumer consumer = new CommonsHttpOAuthConsumer(OAuthUtils.CONSUMER_KEY,OAuthUtils.CONSUMER_SECRET); + OAuthProvider provider = new DefaultOAuthProvider(OAuthUtils.REQUEST_TOKEN_URL, OAuthUtils.ACCESS_TOKEN_URL, OAuthUtils.AUTHORIZE_URL); + String authUrl = provider.retrieveRequestToken(consumer, OAuth.OUT_OF_BAND); + System.out.println("Now visit:\n" + authUrl + "\n and grant this app authorization"); + System.out.println("Enter the PIN code and hit ENTER when you're done:"); + BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); + String pin = br.readLine(); + System.out.println("Fetching access token from Twitter"); + provider.retrieveAccessToken(consumer,pin); + String accesstoken = consumer.getToken(); + String accesssecret = consumer.getTokenSecret(); + OAuthTokenSecret tokensecret = new OAuthTokenSecret(accesstoken,accesssecret); + return tokensecret; + } catch (OAuthNotAuthorizedException ex) { + ex.printStackTrace(); + } catch (OAuthMessageSignerException ex) { + ex.printStackTrace(); + } catch (OAuthExpectationFailedException ex) { + ex.printStackTrace(); + } catch (OAuthCommunicationException ex) { + ex.printStackTrace(); + } catch(IOException ex) + { + ex.printStackTrace(); + } + return null; + } + + public static OAuthTokenSecret DEBUGUserAccessSecret() + { + String accesstoken = "1262619914-tcCPB1SyXy3BMuui9OAhprcPmqg3z2csSjDSCNY"; + String accesssecret = "cXXO0qFLBjLXGtE97pnf5Vv1RZGxZ2FZ97wCYiaVU"; + OAuthTokenSecret tokensecret = new OAuthTokenSecret(accesstoken,accesssecret); + return tokensecret; + } + + public static void main(String[] args) + { + OAuthExample aue = new OAuthExample(); + OAuthTokenSecret tokensecret = aue.GetUserAccessKeySecret(); + System.out.println(tokensecret.toString()); + } +} diff --git a/src/Chapter2/restapi/RESTApiExample.java b/src/Chapter2/restapi/RESTApiExample.java new file mode 100644 index 0000000..9ceb88b --- /dev/null +++ b/src/Chapter2/restapi/RESTApiExample.java @@ -0,0 +1,676 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.restapi; + +import Chapter2.support.APIType; +import Chapter2.support.OAuthTokenSecret; +import Chapter2.openauthentication.OAuthExample; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.logging.Level; +import java.util.logging.Logger; +import oauth.signpost.OAuthConsumer; +import oauth.signpost.basic.DefaultOAuthConsumer; +import oauth.signpost.exception.OAuthCommunicationException; +import oauth.signpost.exception.OAuthExpectationFailedException; +import oauth.signpost.exception.OAuthMessageSignerException; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class RESTApiExample +{ + //file handlers to store the collected user information + BufferedWriter OutFileWriter; + OAuthTokenSecret OAuthTokens; + /** + * name of the file containing a list of users + */ + final String DEF_FILENAME = "users.txt"; + final String DEF_OUTFILENAME = "restapiresults.json"; + ArrayList<String> Usernames = new ArrayList<String>(); + OAuthConsumer Consumer; + + /** + * Creates a OAuthConsumer with the current consumer & user access tokens and secrets + * @return consumer + */ + public OAuthConsumer GetConsumer() + { + OAuthConsumer consumer = new DefaultOAuthConsumer(utils.OAuthUtils.CONSUMER_KEY,utils.OAuthUtils.CONSUMER_SECRET); + consumer.setTokenWithSecret(OAuthTokens.getAccessToken(),OAuthTokens.getAccessSecret()); + return consumer; + } + + /** + * Reads the file and loads the users in the file to be crawled + * @param filename + */ + public void ReadUsers(String filename) + { + BufferedReader br = null; + try { + br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + if(!temp.isEmpty()) + { + Usernames.add(temp); + } + } + } catch (IOException ex) { + ex.printStackTrace(); + } + finally{ + try { + br.close(); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + } + + /** + * Load the User Access Token, and the User Access Secret + */ + public void LoadTwitterToken() + { + //Un-comment before release +// OAuthExample oae = new OAuthExample(); +// OAuthTokens = oae.GetUserAccessKeySecret(); + //Remove before release + OAuthTokens = OAuthExample.DEBUGUserAccessSecret(); + } + + public static void main(String[] args) + { + RESTApiExample rae = new RESTApiExample(); + rae.LoadTwitterToken(); + rae.Consumer = rae.GetConsumer(); +// System.out.println(rae.GetStatuses("twtanalyticsbk")); + System.out.println(rae.GetRateLimitStatus()); +// int apicode = InfoType.PROFILE_INFO; +// String infilename = rae.DEF_FILENAME; +// String outfilename = rae.DEF_OUTFILENAME; +// if(args!=null) +// { +// if(args.length>2) +// { +// apicode = Integer.parseInt(args[2]); +// outfilename = args[1]; +// infilename = args[0]; +// } +// if(args.length>1) +// { +// outfilename = args[1]; +// infilename = args[0]; +// } +// else +// if(args.length>0) +// { +// infilename = args[0]; +// } +// } +// rae.InitializeWriters(outfilename); +// rae.ReadUsers(infilename); +// if(apicode!=InfoType.PROFILE_INFO&&apicode!=InfoType.FOLLOWER_INFO&&apicode!=InfoType.FRIEND_INFO&&apicode!=InfoType.STATUSES_INFO) +// { +// System.out.println("Invalid API type: Use 0 for Profile, 1 for Followers, 2 for Friends, and 3 for Statuses"); +// System.exit(0); +// } +// if(rae.Usernames.size()>0) +// { +// //TO-DO: Print the possible API types and get user selection to crawl the users. +// rae.LoadTwitterToken(); +// for(String user:rae.Usernames) +// { +// if(apicode==InfoType.PROFILE_INFO) +// { +// JSONObject jobj = rae.GetProfile(user); +// if(jobj!=null&&jobj.length()==0) +// { +// rae.WriteToFile(user, jobj.toString()); +// } +// } +// else +// if(apicode==InfoType.FRIEND_INFO) +// { +// JSONArray statusarr = rae.GetFriends(user); +// if(statusarr.length()>0) +// { +// rae.WriteToFile(user, statusarr.toString()); +// } +// } +// else +// if(apicode == InfoType.FOLLOWER_INFO) +// { +// JSONArray statusarr = rae.GetFollowers(user); +// if(statusarr.length()>0) +// { +// rae.WriteToFile(user, statusarr.toString()); +// } +// } +// else +// if(apicode == InfoType.STATUSES_INFO) +// { +// JSONArray statusarr = rae.GetStatuses(user); +// if(statusarr.length()>0) +// { +// rae.GetStatuses(user); +// } +// } +// } +// } +//// now you can close the files as all the threads have finished +// rae.CleanupAfterFinish(); + } + + /** + * Retrieves the rate limit status of the application + * @return + */ + public JSONObject GetRateLimitStatus() + { + try{ + URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json"); + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setReadTimeout(5000); + Consumer.sign(huc); + huc.connect(); + BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); + StringBuffer page = new StringBuffer(); + String temp= ""; + while((temp = bRead.readLine())!=null) + { + page.append(temp); + } + bRead.close(); + return (new JSONObject(page.toString())); + } catch (JSONException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthCommunicationException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthMessageSignerException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthExpectationFailedException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + }catch(IOException ex) + { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + return null; + } + + /** + * Initialize the file writer + * @param path of the file + * @param outFilename name of the file + */ + public void InitializeWriters(String outFilename) { + try { + File fl = new File(outFilename); + if(!fl.exists()) + { + fl.createNewFile(); + } + /** + * Use UTF-8 encoding when saving files to avoid + * losing Unicode characters in the data + */ + OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8")); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + /** + * Close the opened filewriter to save the data + */ + public void CleanupAfterFinish() + { + try { + OutFileWriter.close(); + } catch (IOException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + + /** + * Writes the retrieved data to the output file + * @param data containing the retrived information in JSON + * @param user name of the user currently being written + */ + public void WriteToFile(String user, String data) + { + try + { + OutFileWriter.write(data); + OutFileWriter.newLine(); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + /** + * Retrives the profile information of the user + * @param username of the user whose profile needs to be retrieved + * @return the profile information as a JSONObject + */ + public JSONObject GetProfile(String username) + { + BufferedReader bRead = null; + JSONObject profile = null; + try { + System.out.println("Processing profile of "+username); + boolean flag = true; + URL url = new URL("https://api.twitter.com/1.1/users/show.json?screen_name="+username); + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setReadTimeout(5000); + // Step 2: Sign the request using the OAuth Secret + Consumer.sign(huc); + huc.connect(); + if(huc.getResponseCode()==404||huc.getResponseCode()==401) + { + System.out.println(huc.getResponseMessage()); + } + else + if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503) + { + try { + huc.disconnect(); + System.out.println(huc.getResponseMessage()); + Thread.sleep(3000); + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + else + // Step 3: If the requests have been exhausted, then wait until the quota is renewed + if(huc.getResponseCode()==429) + { + try { + huc.disconnect(); + Thread.sleep(this.GetWaitTime("/users/show/:id")); + flag = false; + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + if(!flag) + { + //recreate the connection because something went wrong the first time. + huc.connect(); + } + StringBuilder content=new StringBuilder(); + if(flag) + { + bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); + String temp= ""; + while((temp = bRead.readLine())!=null) + { + content.append(temp); + } + } + huc.disconnect(); + try { + profile = new JSONObject(content.toString()); + } catch (JSONException ex) { + ex.printStackTrace(); + } + } catch (OAuthCommunicationException ex) { + ex.printStackTrace(); + } catch (OAuthMessageSignerException ex) { + ex.printStackTrace(); + } catch (OAuthExpectationFailedException ex) { + ex.printStackTrace(); + } catch (IOException ex) { + ex.printStackTrace(); + } + return profile; + } + + /** + * Retrieves the followers of a user + * @param username the name of the user whose followers need to be retrieved + * @return a list of user objects corresponding to the followers of the user + */ + public JSONArray GetFollowers(String username) + { + BufferedReader bRead = null; + JSONArray followers = new JSONArray(); + try { + System.out.println(" followers user = "+username); + long cursor = -1; + while(true) + { + if(cursor==0) + { + break; + } + // Step 1: Create the APi request using the supplied username + URL url = new URL("https://api.twitter.com/1.1/followers/list.json?screen_name="+username+"&cursor=" + cursor); + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setReadTimeout(5000); + // Step 2: Sign the request using the OAuth Secret + Consumer.sign(huc); + huc.connect(); + if(huc.getResponseCode()==400||huc.getResponseCode()==404) + { + System.out.println(huc.getResponseMessage()); + break; + } + else + if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503||huc.getResponseCode()==504) + { + try{ + System.out.println(huc.getResponseMessage()); + huc.disconnect(); + Thread.sleep(3000); + continue; + } catch (InterruptedException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + else + // Step 3: If the requests have been exhausted, then wait until the quota is renewed + if(huc.getResponseCode()==429) + { + try { + huc.disconnect(); + Thread.sleep(this.GetWaitTime("/followers/list")); + continue; + } catch (InterruptedException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + // Step 4: Retrieve the followers list from Twitter + bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); + StringBuilder content = new StringBuilder(); + String temp = ""; + while((temp = bRead.readLine())!=null) + { + content.append(temp); + } + try { + JSONObject jobj = new JSONObject(content.toString()); + // Step 5: Retrieve the token for the next request + cursor = jobj.getLong("next_cursor"); + JSONArray idlist = jobj.getJSONArray("users"); + if(idlist.length()==0) + { + break; + } + for(int i=0;i<idlist.length();i++) + { + followers.put(idlist.getJSONObject(i)); + } + } catch (JSONException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + } catch (OAuthCommunicationException ex) { + ex.printStackTrace(); + } catch (OAuthMessageSignerException ex) { + ex.printStackTrace(); + } catch (OAuthExpectationFailedException ex) { + ex.printStackTrace(); + } catch (IOException ex) { + ex.printStackTrace(); + } + return followers; + } + + /** + * Retrieved the status messages of a user + * @param username the name of the user whose status messages need to be retrieved + * @return a list of status messages + */ + public JSONArray GetStatuses(String username) + { + BufferedReader bRead = null; + //Get the maximum number of tweets possible in a single page 200 + int tweetcount = 200; + //Include include_rts because it is counted towards the limit anyway. + boolean include_rts = true; + JSONArray statuses = new JSONArray(); + try { + System.out.println("Processing status messages of "+username); + long maxid = 0; + while(true) + { + URL url = null; + if(maxid==0) + { + url = new URL("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + username+"&include_rts="+include_rts+"&count="+tweetcount); + } + else + { + //use max_id to get the tweets in the next page. Use max_id-1 to avoid getting redundant tweets. + url = new URL("https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=" + username+"&include_rts="+include_rts+"&count="+tweetcount+"&max_id="+(maxid-1)); + } + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setReadTimeout(5000); + Consumer.sign(huc); + huc.connect(); + if(huc.getResponseCode()==400||huc.getResponseCode()==404) + { + System.out.println(huc.getResponseCode()); + break; + } + else + if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503) + { + try {System.out.println(huc.getResponseCode()); + Thread.sleep(3000); + } catch (InterruptedException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + else + // Step 3: If the requests have been exhausted, then wait until the quota is renewed + if(huc.getResponseCode()==429) + { + try { + huc.disconnect(); + Thread.sleep(this.GetWaitTime("/statuses/user_timeline")); + continue; + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getInputStream())); + StringBuilder content = new StringBuilder(); + String temp = ""; + while((temp = bRead.readLine())!=null) + { + content.append(temp); + } + try { + JSONArray statusarr = new JSONArray(content.toString()); + if(statusarr.length()==0) + { + break; + } + for(int i=0;i<statusarr.length();i++) + { + JSONObject jobj = statusarr.getJSONObject(i); + statuses.put(jobj); + //Get the max_id to get the next batch of tweets + if(!jobj.isNull("id")) + { + maxid = jobj.getLong("id"); + } + } + } catch (JSONException ex) { + ex.printStackTrace(); + } + } + System.out.println(statuses.length()); + } catch (OAuthCommunicationException ex) { + ex.printStackTrace(); + } catch (OAuthMessageSignerException ex) { + ex.printStackTrace(); + } catch (OAuthExpectationFailedException ex) { + ex.printStackTrace(); + } catch (IOException ex) { + ex.printStackTrace(); + } + return statuses; + } + + /** + * Retrieves the friends of a user + * @param username the name of the user whose friends need to be fetched + * @return a list of user objects who are friends of the user + */ + public JSONArray GetFriends(String username) + { + BufferedReader bRead = null; + JSONArray friends = new JSONArray(); + try { + System.out.println("Processing friends of "+username); + long cursor = -1; + while(true) + { + if(cursor==0) + { + break; + } + // Step 1: Create the APi request using the supplied username + URL url = new URL("https://api.twitter.com/1.1/friends/list.json?screen_name="+username+"&cursor="+cursor); + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setReadTimeout(5000); + //Step 2: Sign the request using the OAuth Secret + Consumer.sign(huc); + huc.connect(); + if(huc.getResponseCode()==400||huc.getResponseCode()==401) + { + System.out.println(huc.getResponseMessage()); + break; + } + else + if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503) + { + try { + System.out.println(huc.getResponseMessage()); + Thread.sleep(3000); + continue; + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + else + // Step 3: If the requests have been exhausted, then wait until the quota is renewed + if(huc.getResponseCode()==429) + { + try { + huc.disconnect(); + Thread.sleep(this.GetWaitTime("/friends/list")); + continue; + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + // Step 4: Retrieve the friends list from Twitter + bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); + StringBuilder content = new StringBuilder(); + String temp = ""; + while((temp = bRead.readLine())!=null) + { + content.append(temp); + } + try { + JSONObject jobj = new JSONObject(content.toString()); + // Step 5: Retrieve the token for the next request + cursor = jobj.getLong("next_cursor"); + JSONArray userlist = jobj.getJSONArray("users"); + if(userlist.length()==0) + { + break; + } + for(int i=0;i<userlist.length();i++) + { + friends.put(userlist.get(i)); + } + } catch (JSONException ex) { + ex.printStackTrace(); + } + huc.disconnect(); + } + } catch (OAuthCommunicationException ex) { + ex.printStackTrace(); + } catch (OAuthMessageSignerException ex) { + ex.printStackTrace(); + } catch (OAuthExpectationFailedException ex) { + ex.printStackTrace(); + } catch (IOException ex) { + ex.printStackTrace(); + } + return friends; + } + + /** + * Retrieves the wait time if the API Rate Limit has been hit + * @param api the name of the API currently being used + * @return the number of milliseconds to wait before initiating a new request + */ + public long GetWaitTime(String api) + { + JSONObject jobj = this.GetRateLimitStatus(); + if(jobj!=null) + { + try { + if(!jobj.isNull("resources")) + { + JSONObject resourcesobj = jobj.getJSONObject("resources"); + JSONObject apilimit = null; + if(api.equals(APIType.USER_TIMELINE)) + { + JSONObject statusobj = resourcesobj.getJSONObject("statuses"); + apilimit = statusobj.getJSONObject(api); + } + else + if(api.equals(APIType.FOLLOWERS)) + { + JSONObject followersobj = resourcesobj.getJSONObject("followers"); + apilimit = followersobj.getJSONObject(api); + } + else + if(api.equals(APIType.FRIENDS)) + { + JSONObject friendsobj = resourcesobj.getJSONObject("friends"); + apilimit = friendsobj.getJSONObject(api); + } + else + if(api.equals(APIType.USER_PROFILE)) + { + JSONObject userobj = resourcesobj.getJSONObject("users"); + apilimit = userobj.getJSONObject(api); + } + int numremhits = apilimit.getInt("remaining"); + if(numremhits<=1) + { + long resettime = apilimit.getInt("reset"); + resettime = resettime*1000; //convert to milliseconds + return resettime; + } + } + } catch (JSONException ex) { + ex.printStackTrace(); + } + } + return 0; + } +} diff --git a/src/Chapter2/restapi/RESTSearchExample.java b/src/Chapter2/restapi/RESTSearchExample.java new file mode 100644 index 0000000..510661c --- /dev/null +++ b/src/Chapter2/restapi/RESTSearchExample.java @@ -0,0 +1,311 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.restapi; + +import Chapter2.support.OAuthTokenSecret; +import Chapter2.openauthentication.OAuthExample; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.logging.Level; +import java.util.logging.Logger; +import oauth.signpost.OAuthConsumer; +import oauth.signpost.basic.DefaultOAuthConsumer; +import oauth.signpost.exception.OAuthCommunicationException; +import oauth.signpost.exception.OAuthExpectationFailedException; +import oauth.signpost.exception.OAuthMessageSignerException; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; + +public class RESTSearchExample +{ + BufferedWriter OutFileWriter; + OAuthTokenSecret OAuthTokens; + OAuthConsumer Consumer; + String query = "#protest"; + String DEF_FILENAME = "searchresults.json"; + + /** + * Creates a OAuthConsumer with the current consumer & user access tokens and secrets + * @return consumer + */ + public OAuthConsumer GetConsumer() + { + OAuthConsumer consumer = new DefaultOAuthConsumer(utils.OAuthUtils.CONSUMER_KEY,utils.OAuthUtils.CONSUMER_SECRET); + consumer.setTokenWithSecret(OAuthTokens.getAccessToken(), OAuthTokens.getAccessSecret()); + return consumer; + } + + /** + * Load the User Access Token, and the User Access Secret + */ + public void LoadTwitterToken() + { + //Un-comment before release +// OAuthExample oae = new OAuthExample(); +// OAuthTokens = oae.GetUserAccessKeySecret(); + //Remove before release + OAuthTokens = OAuthExample.DEBUGUserAccessSecret(); + } + + /** + * Fetches tweets matching a query + * @param query for which tweets need to be fetched + * @return an array of status objects + */ + public JSONArray GetSearchResults(String query) + { + try{ + //construct the request url + String URL_PARAM_SEPERATOR = "&"; + StringBuilder url = new StringBuilder(); + url.append("https://api.twitter.com/1.1/search/tweets.json?q="); + //query needs to be encoded + url.append(URLEncoder.encode(query, "UTF-8")); + url.append(URL_PARAM_SEPERATOR); + url.append("count=100"); + URL navurl = new URL(url.toString()); + HttpURLConnection huc = (HttpURLConnection) navurl.openConnection(); + huc.setReadTimeout(5000); + Consumer.sign(huc); + huc.connect(); + if(huc.getResponseCode()==400||huc.getResponseCode()==404||huc.getResponseCode()==429) + { + System.out.println(huc.getResponseMessage()); + try { + huc.disconnect(); + Thread.sleep(this.GetWaitTime("/friends/list")); + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + } + if(huc.getResponseCode()==500||huc.getResponseCode()==502||huc.getResponseCode()==503) + { + System.out.println(huc.getResponseMessage()); + try { + Thread.sleep(2000); + } catch (InterruptedException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getInputStream())); + String temp; + StringBuilder page = new StringBuilder(); + while( (temp = bRead.readLine())!=null) + { + page.append(temp); + } + JSONTokener jsonTokener = new JSONTokener(page.toString()); + try { + JSONObject json = new JSONObject(jsonTokener); + JSONArray results = json.getJSONArray("statuses"); + return results; + } catch (JSONException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + } + } catch (OAuthCommunicationException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthMessageSignerException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthExpectationFailedException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + }catch(IOException ex) + { + ex.printStackTrace(); + } + return null; + } + + /** + * Retrieves the rate limit status of the application + * @return + */ + public JSONObject GetRateLimitStatus() + { + try{ + URL url = new URL("https://api.twitter.com/1.1/application/rate_limit_status.json"); + HttpURLConnection huc = (HttpURLConnection) url.openConnection(); + huc.setReadTimeout(5000); + OAuthConsumer consumer = new DefaultOAuthConsumer(utils.OAuthUtils.CONSUMER_KEY,utils.OAuthUtils.CONSUMER_SECRET); + consumer.setTokenWithSecret(OAuthTokens.getAccessToken(), OAuthTokens.getAccessSecret()); + consumer.sign(huc); + huc.connect(); + BufferedReader bRead = new BufferedReader(new InputStreamReader((InputStream) huc.getContent())); + StringBuffer page = new StringBuffer(); + String temp= ""; + while((temp = bRead.readLine())!=null) + { + page.append(temp); + } + bRead.close(); + return (new JSONObject(page.toString())); + } catch (JSONException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthCommunicationException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthMessageSignerException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } catch (OAuthExpectationFailedException ex) { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + }catch(IOException ex) + { + Logger.getLogger(RESTApiExample.class.getName()).log(Level.SEVERE, null, ex); + } + return null; + } + + /** + * Initialize the file writer + * @param path of the file + * @param outFilename name of the file + */ + public void InitializeWriters(String outFilename) { + try { + File fl = new File(outFilename); + if(!fl.exists()) + { + fl.createNewFile(); + } + /** + * Use UTF-8 encoding when saving files to avoid + * losing Unicode characters in the data + */ + OutFileWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilename,true),"UTF-8")); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + /** + * Close the opened filewriter to save the data + */ + public void CleanupAfterFinish() + { + try { + OutFileWriter.close(); + } catch (IOException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + + /** + * Writes the retrieved data to the output file + * @param data containing the retrived information in JSON + * @param user name of the user currently being written + */ + public void WriteToFile(JSONArray searchResults) + { + try + { + for(int i=0;i<searchResults.length();i++) + { + try { + OutFileWriter.write(searchResults.getJSONObject(i).toString()); + OutFileWriter.newLine(); + } catch (JSONException ex) { + Logger.getLogger(RESTSearchExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + /** + * Retrieves the wait time if the API Rate Limit has been hit + * @param api the name of the API currently being used + * @return the number of milliseconds to wait before initiating a new request + */ + public long GetWaitTime(String api) + { + JSONObject jobj = this.GetRateLimitStatus(); + if(jobj!=null) + { + try { + if(!jobj.isNull("resources")) + { + JSONObject resourcesobj = jobj.getJSONObject("resources"); + JSONObject statusobj = resourcesobj.getJSONObject("statuses"); + JSONObject apilimit = statusobj.getJSONObject(api); + int numremhits = apilimit.getInt("remaining"); + if(numremhits<=1) + { + long resettime = apilimit.getInt("reset"); + resettime = resettime*1000; //convert to milliseconds + return resettime; + } + } + } catch (JSONException ex) { + ex.printStackTrace(); + } + } + return 0; + } + + /** + * Creates an OR search query from the supplied terms + * @param queryTerms + * @return a String formatted as term1 OR term2 + */ + public String CreateORQuery(ArrayList<String> queryTerms) + { + String OR_Operator = " OR "; + StringBuffer querystr = new StringBuffer(); + int count = 1; + for(String term:queryTerms) + { + if(count==1) + { + querystr.append(term); + } + else + { + querystr.append(OR_Operator).append(term); + } + } + return querystr.toString(); + } + + public static void main(String[] args) + { + RESTSearchExample rse = new RESTSearchExample(); + ArrayList<String> queryterms = new ArrayList<String>(); + String outfilename = rse.DEF_FILENAME; + if(args!=null) + { + if(args.length>0) + { + for(int i=0;i<args.length;i++) + { + queryterms.add(args[i]); + } + } + else + { + queryterms.add(rse.query); + } + } + rse.LoadTwitterToken(); + rse.Consumer = rse.GetConsumer(); + System.out.println(rse.GetRateLimitStatus()); + rse.InitializeWriters(outfilename); + JSONArray results = rse.GetSearchResults(rse.CreateORQuery(queryterms)); + if(results!=null) + { + rse.WriteToFile(results); + } + rse.CleanupAfterFinish(); + } +} diff --git a/src/Chapter2/streamingapi/StreamingApiExample.java b/src/Chapter2/streamingapi/StreamingApiExample.java new file mode 100644 index 0000000..8abfff4 --- /dev/null +++ b/src/Chapter2/streamingapi/StreamingApiExample.java @@ -0,0 +1,372 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.streamingapi; + +import Chapter2.support.OAuthTokenSecret; +import Chapter2.openauthentication.OAuthExample; +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import oauth.signpost.OAuthConsumer; +import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer; +import oauth.signpost.exception.OAuthCommunicationException; +import oauth.signpost.exception.OAuthExpectationFailedException; +import oauth.signpost.exception.OAuthMessageSignerException; +import org.apache.commons.httpclient.HttpStatus; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.HttpEntity; +import org.apache.http.NameValuePair; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.DefaultHttpClient; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.params.CoreConnectionPNames; +import org.json.JSONException; +import org.json.JSONObject; +import org.json.JSONTokener; +import utils.OAuthUtils; + +public class StreamingApiExample +{ + OAuthTokenSecret OAuthToken; + final int RECORDS_TO_PROCESS = 1000; + final int MAX_GEOBOXES = 25; + final int MAX_KEYWORDS = 400; + final int MAX_USERS = 5000; + HashSet<String> Keywords; + HashSet<String> Geoboxes; + HashSet<String> Userids; + final String CONFIG_FILE_PATH = "streaming/streaming.config"; + final String DEF_OUTPATH = "streaming/"; + + /** + * Loads the Twitter access token and secret for a user + */ + public void LoadTwitterToken() + { +// OAuthExample oae = new OAuthExample(); +// OAuthToken = oae.GetUserAccessKeySecret(); + OAuthToken = OAuthExample.DEBUGUserAccessSecret(); + } + + /** + * Creates a connection to the Streaming Filter API + * @param baseUrl the URL for Twitter Filter API + * @param outFilePath Location to place the exported file + */ + public void CreateStreamingConnection(String baseUrl, String outFilePath) + { + HttpClient httpClient = new DefaultHttpClient(); + httpClient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, new Integer(90000)); + //Step 1: Initialize OAuth Consumer + OAuthConsumer consumer = new CommonsHttpOAuthConsumer(OAuthUtils.CONSUMER_KEY,OAuthUtils.CONSUMER_SECRET); + consumer.setTokenWithSecret(OAuthToken.getAccessToken(),OAuthToken.getAccessSecret()); + //Step 2: Create a new HTTP POST request and set parameters + HttpPost httppost = new HttpPost(baseUrl); + try { + httppost.setEntity(new UrlEncodedFormEntity(CreateRequestBody(), "UTF-8")); + } catch (UnsupportedEncodingException ex) { + ex.printStackTrace(); + } + try { + //Step 3: Sign the request + consumer.sign(httppost); + } catch (OAuthMessageSignerException ex) { + ex.printStackTrace(); + } catch (OAuthExpectationFailedException ex) { + ex.printStackTrace(); + } catch (OAuthCommunicationException ex) { + ex.printStackTrace(); + } + HttpResponse response; + InputStream is = null; + try { + //Step 4: Connect to the API + response = httpClient.execute(httppost); + if (response.getStatusLine().getStatusCode()!= HttpStatus.SC_OK) + { + throw new IOException("Got status " +response.getStatusLine().getStatusCode()); + } + else + { + System.out.println(OAuthToken.getAccessToken()+ ": Processing from " + baseUrl); + HttpEntity entity = response.getEntity(); + try { + is = entity.getContent(); + } catch (IOException ex) { + ex.printStackTrace(); + } catch (IllegalStateException ex) { + ex.printStackTrace(); + } + //Step 5: Process the incoming Tweet Stream + this.ProcessTwitterStream(is, outFilePath); + } + } catch (IOException ex) { + ex.printStackTrace(); + }finally { + // Abort the method, otherwise releaseConnection() will + // attempt to finish reading the never-ending response. + // These methods do not throw exceptions. + if(is!=null) + { + try { + is.close(); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + } + } + + /** + * Processes a stream of tweets and writes them to a file one tweet per line. Each tweet here is represented by a JSON document. + * @param is input stream already connected to the streaming API + * @param outFilePath file to put the collected tweets in + * @throws InterruptedException + * @throws IOException + */ + public void ProcessTwitterStream(InputStream is, String outFilePath) + { + BufferedWriter bwrite = null; + try { + JSONTokener jsonTokener = new JSONTokener(new InputStreamReader(is, "UTF-8")); + ArrayList<JSONObject> rawtweets = new ArrayList<JSONObject>(); + int nooftweetsuploaded = 0; + while (true) { + try { + JSONObject temp = new JSONObject(jsonTokener); + rawtweets.add(temp); +// System.out.println(temp); + if (rawtweets.size() >= RECORDS_TO_PROCESS) + { + Calendar cal = Calendar.getInstance(); + String filename = outFilePath + "tweets_" + cal.getTimeInMillis() + ".json"; + bwrite = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), "UTF-8")); + nooftweetsuploaded += RECORDS_TO_PROCESS; + //Write the collected tweets to a file + for (JSONObject jobj : rawtweets) { + bwrite.write(jobj.toString()); + bwrite.newLine(); + } + System.out.println("Written "+nooftweetsuploaded+" records so far"); + bwrite.close(); + rawtweets.clear(); + } + } catch (JSONException ex) { + ex.printStackTrace(); + } + } + } catch (IOException ex) { + ex.printStackTrace(); + } + } + + public static void main(String[] args) + { + StreamingApiExample sae = new StreamingApiExample(); + sae.LoadTwitterToken(); + //load parameters from a TSV file + String filename = sae.CONFIG_FILE_PATH; + String outfilepath = sae.DEF_OUTPATH; + if(args!=null) + { + if(args.length>0) + { + filename = args[0]; + } + if(args.length>1) + { + File fl = new File(args[1]); + if(fl.exists()&&fl.isDirectory()) + { + outfilepath = args[1]; + } + } + } + sae.ReadParameters(filename); + sae.CreateStreamingConnection("https://stream.twitter.com/1.1/statuses/filter.json", outfilepath); + } + + /** + * Reads the file and loads the parameters to be crawled. Expects that the parameters are tab separated values and the + * @param filename + */ + public void ReadParameters(String filename) + { + BufferedReader br = null; + try { + br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); + String temp = ""; + int count = 1; + if(Userids==null) + { + Userids = new HashSet<String>(); + } + if(Geoboxes==null) + { + Geoboxes = new HashSet<String>(); + } + if(Keywords==null) + { + Keywords = new HashSet<String>(); + } + while((temp = br.readLine())!=null) + { + if(!temp.isEmpty()) + { + if(count==1) + { + String[] keywords = temp.split("\t"); + HashSet<String> temptags = new HashSet<String>(); + for(String word:keywords) + { + if(!temptags.contains(word)) + { + temptags.add(word); + } + } + FilterKeywords(temptags); + } + else + if(count==2) + { + String[] geoboxes = temp.split("\t"); + HashSet<String> tempboxes = new HashSet<String>(); + for(String box:geoboxes) + { + if(!tempboxes.contains(box)) + { + tempboxes.add(box); + } + } + FilterGeoboxes(tempboxes); + } + else + if(count==3) + { + String[] userids = temp.split("\t"); + HashSet<String> tempids = new HashSet<String>(); + for(String id:userids) + { + if(!tempids.contains(id)) + { + tempids.add(id); + } + } + FilterUserids(tempids); + } + count++; + } + } + } catch (IOException ex) { + ex.printStackTrace(); + } + finally{ + try { + br.close(); + } catch (IOException ex) { + ex.printStackTrace(); + } + } + } + + private void FilterUserids(HashSet<String> userids) + { + if(userids!=null) + { + int maxsize = MAX_USERS; + if(userids.size()<maxsize) + { + maxsize = userids.size(); + } + for(String id:userids) + { + Userids.add(id); + } + } + } + + private void FilterGeoboxes(HashSet<String> geoboxes) + { + if(geoboxes!=null) + { + int maxsize = MAX_GEOBOXES; + if(geoboxes.size()<maxsize) + { + maxsize = geoboxes.size(); + } + for(String box:geoboxes) + { + Geoboxes.add(box); + } + } + } + /** + * Keep only the maximum permitted number of parameters for a connection. Ignoring the rest. + * This can be extended to create multiple sets to be crawled by different threads. + */ + private void FilterKeywords(HashSet<String> hashtags) + { + if(hashtags!=null) + { + int maxsize = MAX_KEYWORDS; + if(hashtags.size()<maxsize) + { + maxsize = hashtags.size(); + } + for(String tag:hashtags) + { + Keywords.add(tag); + } + } + + } + + private List<NameValuePair> CreateRequestBody() + { + List<NameValuePair> params = new ArrayList<NameValuePair>(); + if(Userids != null&&Userids.size()>0) + { + params.add(CreateNameValuePair("follow", Userids)); + System.out.println("userids = "+Userids); + } + if (Geoboxes != null&&Geoboxes.size()>0) { + params.add(CreateNameValuePair("locations", Geoboxes)); + System.out.println("locations = "+Geoboxes); + + } + if (Keywords != null&&Keywords.size()>0) { + params.add(CreateNameValuePair("track", Keywords)); + System.out.println("keywords = "+Keywords); + } + return params; + } + + private NameValuePair CreateNameValuePair(String name, Collection<String> items) + { + StringBuilder sb = new StringBuilder(); + boolean needComma = false; + for (String item : items) { + if (needComma) { + sb.append(','); + } + needComma = true; + sb.append(item); + } + return new BasicNameValuePair(name, sb.toString()); + } +} diff --git a/src/Chapter2/support/APIType.java b/src/Chapter2/support/APIType.java new file mode 100644 index 0000000..94449f8 --- /dev/null +++ b/src/Chapter2/support/APIType.java @@ -0,0 +1,12 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.support; + +public class APIType +{ + public static String USER_TIMELINE = "/statuses/user_timeline"; + public static String FOLLOWERS = "/followers/list"; + public static String FRIENDS = "/friends/list"; + public static String USER_PROFILE = "/users/show"; +} diff --git a/src/Chapter2/support/InfoType.java b/src/Chapter2/support/InfoType.java new file mode 100644 index 0000000..42b0334 --- /dev/null +++ b/src/Chapter2/support/InfoType.java @@ -0,0 +1,12 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.support; + +public class InfoType +{ + public static final int PROFILE_INFO = 0; + public static final int FOLLOWER_INFO = 1; + public static final int FRIEND_INFO = 2; + public static final int STATUSES_INFO = 3; +} diff --git a/src/Chapter2/support/Location.java b/src/Chapter2/support/Location.java new file mode 100644 index 0000000..7f6234f --- /dev/null +++ b/src/Chapter2/support/Location.java @@ -0,0 +1,28 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter2.support; + +/** + * + * @author shamanth + */ +public class Location +{ + public Double latitude; + public Double longitude; + + public Location(Double lat,Double lng) + { + latitude = lat; + longitude = lng; + } + + @Override + public String toString() + { + return "Latitude: "+latitude+" & Longitude: "+longitude; + } +} diff --git a/src/Chapter2/support/OAuthTokenSecret.java b/src/Chapter2/support/OAuthTokenSecret.java new file mode 100644 index 0000000..8fee4a8 --- /dev/null +++ b/src/Chapter2/support/OAuthTokenSecret.java @@ -0,0 +1,38 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter2.support; + +public class OAuthTokenSecret +{ + String UserAccessToken; + String UserAccessSecret; + + public String getAccessSecret() { + return UserAccessSecret; + } + + public void setAccessSecret(String AccessSecret) { + this.UserAccessSecret = AccessSecret; + } + + public String getAccessToken() { + return UserAccessToken; + } + + public void setAccessToken(String AccessToken) { + this.UserAccessToken = AccessToken; + } + + public OAuthTokenSecret(String token,String secret) + { + this.setAccessToken(token); + this.setAccessSecret(secret); + } + + @Override + public String toString() + { + return "Access Token: "+getAccessToken()+" Access Secret: "+getAccessSecret(); + } +} diff --git a/src/Chapter4/GraphElements/RetweetEdge.java b/src/Chapter4/GraphElements/RetweetEdge.java new file mode 100644 index 0000000..83836a0 --- /dev/null +++ b/src/Chapter4/GraphElements/RetweetEdge.java @@ -0,0 +1,53 @@ +package GraphElements; + + +public class RetweetEdge { + private UserNode to, from; + private int retweetCount; + + public RetweetEdge(UserNode to, UserNode from){ + this.to = to; + this.from = from; + retweetCount = 1; + } + + public void incrementRTCount(){ + retweetCount++; + } + + public UserNode getTo() { + return to; + } + public void setTo(UserNode to) { + this.to = to; + } + public UserNode getFrom() { + return from; + } + public void setFrom(UserNode from) { + this.from = from; + } + public int getRetweetCount() { + return retweetCount; + } + public void setRetweetCount(int retweetCount) { + this.retweetCount = retweetCount; + } + + public boolean equals(Object maybeEdge){ + if(maybeEdge instanceof RetweetEdge){ + RetweetEdge edge = (RetweetEdge) maybeEdge; + return edge.to.equals(to) && edge.from.equals(from); + } + return false; + + } + + public String toString(){ + return from + " -> " + to; + } + + public int hashCode(){ + return toString().hashCode(); + } +} diff --git a/src/Chapter4/GraphElements/UserNode.java b/src/Chapter4/GraphElements/UserNode.java new file mode 100644 index 0000000..fba4419 --- /dev/null +++ b/src/Chapter4/GraphElements/UserNode.java @@ -0,0 +1,34 @@ +package GraphElements; + + + +public class UserNode { + private String username; + + public UserNode(String username){ + this.username = username; + } + + public String getUsername() { + return username; + } + + public void setUsername(String username) { + this.username = username; + } + + public boolean equals(Object un){ + if(un instanceof UserNode){ + return username.equals(((UserNode)un).username); + } + return false; + } + + public String toString(){ + return username; + } + + public int hashCode(){ + return username.hashCode(); + } +} diff --git a/src/Chapter4/centrality/examples/BetweennessCentralityExample.java b/src/Chapter4/centrality/examples/BetweennessCentralityExample.java new file mode 100644 index 0000000..ab9f7e6 --- /dev/null +++ b/src/Chapter4/centrality/examples/BetweennessCentralityExample.java @@ -0,0 +1,31 @@ +package centrality.examples; + +import Chapter4.util.TweetFileToGraph; +import java.io.File; +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.algorithms.importance.BetweennessCentrality; +import edu.uci.ics.jung.graph.DirectedGraph; + +public class BetweennessCentralityExample { + public static void main(String[] args){ + + File tweetFile; + + if(args.length > 0){ + tweetFile = new File(args[0]); + } + else{ + tweetFile = new File("synthetic_retweet_network.json"); + } + + DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); + + //calculate the betweenness centrality + BetweennessCentrality<UserNode, RetweetEdge> betweenness = new BetweennessCentrality<UserNode, RetweetEdge>(retweetGraph); + + betweenness.evaluate(); + betweenness.printRankings(true, true); + + } +} diff --git a/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java b/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java new file mode 100644 index 0000000..172dd16 --- /dev/null +++ b/src/Chapter4/centrality/examples/EigenvectorCentralityExample.java @@ -0,0 +1,36 @@ +package centrality.examples; + +import Chapter4.util.TweetFileToGraph; +import java.io.File; +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality; +import edu.uci.ics.jung.graph.DirectedGraph; + +public class EigenvectorCentralityExample { + public static void main(String[] args){ + + File tweetFile; + + if(args.length > 0){ + tweetFile = new File(args[0]); + } + else{ + tweetFile = new File("synthetic_retweet_network.json"); + } + + DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); + +// EigenVectorScorer scorer = new EigenVectorScorer(retweetGraph); +// for(UserNode node : retweetGraph.getVertices()){ +// System.out.println(node + " - " + scorer.getVertexScore(node)); +// } + + EigenvectorCentrality<UserNode, RetweetEdge> eig = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph); + eig.evaluate(); + + for(UserNode node : retweetGraph.getVertices()){ + System.out.println(node + " - " + eig.getVertexScore(node)); + } + } +} diff --git a/src/Chapter4/centrality/examples/InDegreeCentralityExample.java b/src/Chapter4/centrality/examples/InDegreeCentralityExample.java new file mode 100644 index 0000000..6a027ac --- /dev/null +++ b/src/Chapter4/centrality/examples/InDegreeCentralityExample.java @@ -0,0 +1,30 @@ +package Chapter4.centrality.examples; + +import Chapter4.util.TweetFileToGraph; +import java.io.File; +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.graph.DirectedGraph; + +public class InDegreeCentralityExample { + + public static void main(String[] args){ + + File tweetFile; + + if(args.length > 0){ + tweetFile = new File(args[0]); + } + else{ + tweetFile = new File("synthetic_retweet_network.json"); + } + + DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); + + //calculate the betweenness centrality + for(UserNode node : retweetGraph.getVertices()){ + System.out.println(node + " - " + retweetGraph.getInEdges(node).size()); + } + + } +} diff --git a/src/Chapter4/centrality/examples/PageRankCentralityExample.java b/src/Chapter4/centrality/examples/PageRankCentralityExample.java new file mode 100644 index 0000000..dd44efd --- /dev/null +++ b/src/Chapter4/centrality/examples/PageRankCentralityExample.java @@ -0,0 +1,39 @@ +package Chapter4.centrality.examples; + +import Chapter4.util.TweetFileToGraph; +import java.io.File; +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.algorithms.scoring.PageRank; +import edu.uci.ics.jung.graph.DirectedGraph; + +public class PageRankCentralityExample { + public static void main(String[] args){ + + File tweetFile; + + if(args.length > 0){ + tweetFile = new File(args[0]); + } + else{ + tweetFile = new File("synthetic_retweet_network.json"); + } + + DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); + + + PageRank<UserNode, RetweetEdge> pageRank = new PageRank<UserNode, RetweetEdge>(retweetGraph, .5); + pageRank.evaluate(); + + for(UserNode node : retweetGraph.getVertices()){ + System.out.println(node + " - " + pageRank.getVertexScore(node)); + } + +// EigenvectorCentrality<UserNode, RetweetEdge> eig = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph); +// eig.evaluate(); +// +// for(UserNode node : retweetGraph.getVertices()){ +// System.out.println(node + " - " + eig.getVertexScore(node)); +// } + } +} diff --git a/src/Chapter4/classification/bayes/Classification.java b/src/Chapter4/classification/bayes/Classification.java new file mode 100644 index 0000000..ea9aba7 --- /dev/null +++ b/src/Chapter4/classification/bayes/Classification.java @@ -0,0 +1,22 @@ +package Chapter4.classification.bayes; + +public class Classification { + private String label; + private double confidence; + + public Classification(String label, double confidence){ + this.label = label; + this.confidence = confidence; + } + + public String getLabel() { + return label; + } + public double getConfidence() { + return confidence; + } + + public String toString(){ + return "(" + label + ", " + confidence + ")"; + } +} diff --git a/src/Chapter4/classification/bayes/NBCxv.java b/src/Chapter4/classification/bayes/NBCxv.java new file mode 100644 index 0000000..5c48e28 --- /dev/null +++ b/src/Chapter4/classification/bayes/NBCxv.java @@ -0,0 +1,60 @@ +package Chapter4.classification.bayes; + +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import com.google.gson.JsonObject; +import com.google.gson.JsonStreamParser; + +public class NBCxv { + public static void main(String[] args){ + + String filename = args.length >= 1 ? args[0] : "owsemoticons.json"; + + ArrayList<String> allTexts = new ArrayList<String>(); + + try { + //read the file, and train each document + JsonStreamParser parser = new JsonStreamParser(new FileReader(filename)); + JsonObject elem; + while (parser.hasNext()) { + elem = parser.next().getAsJsonObject(); + allTexts.add(elem.get("text").getAsString()); + } + } catch (IOException e) { + e.printStackTrace(); + } + + //do 5-fold cross validation 3 times + Map<Integer, ArrayList<String>> buckets; + int bucketIdx; + NaiveBayesSentimentClassifier nbsc; + for(int i = 0; i < 3; i++){ + + //randomly split the texts into 5 buckets + buckets = new HashMap<Integer, ArrayList<String>>(); + //initialize the 5 buckets + for(int j = 0; j < 5; j++) buckets.put(j, new ArrayList<String>()); + for(String text : allTexts){ + bucketIdx = (int) (Math.random()*5); + buckets.get(bucketIdx).add(text); + } + + for(int j = 0; j < 5; j++){ + //use all but j as the training, use j as the test. + nbsc = new NaiveBayesSentimentClassifier(); + for(int k = 0; k < 5; k++){ + if(k != j){ + nbsc.trainInstances(buckets.get(k)); + } + } + //test with bucket j + + } + } + + } +} diff --git a/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java b/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java new file mode 100644 index 0000000..923416c --- /dev/null +++ b/src/Chapter4/classification/bayes/NaiveBayesSentimentClassifier.java @@ -0,0 +1,264 @@ +package Chapter4.classification.bayes; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.StringTokenizer; + +/** + * This class performs both the training and classification steps of a Naive Bayes Classifier. + * + */ +public class NaiveBayesSentimentClassifier { + //the possible sentiment labels + private static final String[] SENTIMENT_LABELS = {"happy", "sad"}; + //the tokens to look for in labeling the sentiment. + private static final String[] HAPPY_SMILEYS = {":)", ";)", ":D", ":-)", ":o)", ":-D"}; + private static final String[] SAD_SMILEYS = {":(", ":-(", ":'(", ":'-(", "D:"}; + //store these as a set for faster retrieval + private static final Set<String> HAPPY_SMILEY_SET = new HashSet<String>(Arrays.asList(HAPPY_SMILEYS)); + private static final Set<String> SAD_SMILEY_SET = new HashSet<String>(Arrays.asList(SAD_SMILEYS)); + + //counter for the number of times each word has been associated with each sentiment. + private Map<String, Integer[]> sentOccurs; + //counter for the number of times we've seen each sentiment. + private Integer[] sentCount; + + public NaiveBayesSentimentClassifier(){ + //initialize the counters + sentOccurs = new HashMap<String, Integer[]>(); + sentCount = new Integer[SENTIMENT_LABELS.length]; + for(int i = 0; i < SENTIMENT_LABELS.length; i++){ + sentCount[i] = 0; + } + } + + /** + * Tokenize a string. Turns string into list of words based on whitespace, then + * removes stopwords, punctuation, and reduces the word to its stem. + * @param text + * The piece of text + * @return + * Each individual word. + */ + private List<String> getTokens(String text){ + StringTokenizer tokens = new StringTokenizer(text); + ArrayList<String> words = new ArrayList<String>(); + + String tmp; + StringBuilder sb; + while(tokens.hasMoreTokens()){ + sb = new StringBuilder(); + tmp = tokens.nextToken(); + tmp = tmp.toLowerCase(); + + for(char ch : tmp.toCharArray()){ + if(Character.isLetter(ch)){ + sb.append(ch); + } + } + tmp = sb.toString(); + if(tmp.length() > 0 && !StopwordsList.stopwordsSet.contains(tmp)){ + words.add(sb.toString()); + } + } + + return words; + } + + /** + * Checks if tweet has a "label" (emoticon). If so, stores the words in + * the prior. + * @param tweetText + * The text of the document to check. + */ + public void trainInstance(String tweetText){ + //see if the tweet is labeled (i.e. has a smiley) + int tweetLabel = extractLabel(tweetText); + List<String> tokens = getTokens(tweetText); + if(tweetLabel != -1){ + //add these words to the classifier + updateClassifier(tokens, tweetLabel); + } + } + + public String printWordOccurs(int sentIndex, int topN){ + StringBuilder sb = new StringBuilder(); + + WordCountPair wpcset[] = new WordCountPair[sentOccurs.keySet().size()]; + + String s; + int t = 0; + Iterator<String> sIter = sentOccurs.keySet().iterator(); +// int totalCount = 0; +// while(sIter.hasNext()){ +// s = sIter.next(); +// totalCount += sentOccurs.get(s)[sentIndex]; +// } + + sIter = sentOccurs.keySet().iterator(); + while(sIter.hasNext()){ + s = sIter.next(); +// wpcset[t++] = new WordCountPair(s, sentOccurs.get(s)[sentIndex] * 1.0 / totalCount); + wpcset[t++] = new WordCountPair(s, Math.sqrt(sentOccurs.get(s)[sentIndex] * 1.0 )); + } + + Arrays.sort(wpcset); + + double frac; + for(int i = 0; (i < topN || topN <= 0) && i < wpcset.length; i++){ + s = wpcset[i].getWord(); + frac = wpcset[i].getCount(); + + sb.append(s); + sb.append(":"); + sb.append(frac); + sb.append("\n"); + } + + return sb.toString(); + } + + public void trainInstances(List<String> tweetTexts){ + for(String text : tweetTexts){ + trainInstance(text); + } + } + + /** + * Classify a tweet as happy or sad. This ignores the emoticon for demonstration purposes. + * @param tweetText + * The text of the tweet + * @return + * A Classification object that returns the sentiment of the tweet. + */ + public Classification classify(String tweetText){ + //stores the probability of each sentiment being the tweets true sentiment. + double[] labelProbs = new double[SENTIMENT_LABELS.length]; + //tokenize the string + List<String> tokens = getTokens(tweetText); + int maxLabelIdx = 0; + for(int i = 0; i < labelProbs.length; i++){ + //calculate the probability that the tweet has that sentiment. + labelProbs[i] = calcLabelProb(tokens, i); + System.out.println(i + " -> " + labelProbs[i] ); + //keep track of the label probability + maxLabelIdx = labelProbs[i] > labelProbs[maxLabelIdx] ? i : maxLabelIdx; + } + //calc the confidence + double conf = labelProbs[maxLabelIdx]; + labelProbs[maxLabelIdx] = 0; + conf -= sumVector(labelProbs); + + return new Classification(SENTIMENT_LABELS[maxLabelIdx], conf); + } + + private int extractLabel(String tweetText){ + StringTokenizer tokens = new StringTokenizer(tweetText); + while(tokens.hasMoreTokens()){ + String token = tokens.nextToken(); + if(HAPPY_SMILEY_SET.contains(token)){ + return 0; + } + else if(SAD_SMILEY_SET.contains(token)){ + return 1; + } + } + return -1; + } + + /** + * This updates the classifier's probabilites for each word + * with the new piece of text. + * @param tokens + * The tokens in the tweet. + * @param sentIndex + * The sentiment label. + */ + private void updateClassifier(List<String> tokens, int sentIndex){ + for(String token : tokens){ + if(sentOccurs.containsKey(token)){ + sentOccurs.get(token)[sentIndex] ++ ; + } + else{ + //make a new array and put it + Integer[] newArray = {0, 0}; + newArray[sentIndex] ++; + sentOccurs.put(token, newArray); + } + } + //update the overall document count + sentCount[sentIndex]++; + } + + /** + * The probability of the tweet having a given label. + * @param tokens + * The tokens in the tweet. + * @param sentIndex + * The probability we are testing. + * @return + * The probability the tweet has the class label indicated by "sentIndex". + */ + private double calcLabelProb(List<String> tokens, int sentIndex){ + + //calculate the class probabilities + double[] pClass = new double[SENTIMENT_LABELS.length]; + int cSum = sumVector(sentCount); + int totalWordCount = 0; + + for(int i = 0; i < sentCount.length; i++){ + pClass[i] = sentCount[i] * 1.0 / cSum; + } + + for(String word : sentOccurs.keySet()){ + Integer[] wordCt = sentOccurs.get(word); + totalWordCount = sumVector(wordCt); + } + + + double p = 1.0; + boolean foundOne = false; + for(String token : tokens){ + if(sentOccurs.containsKey(token)){ + foundOne = true; + Integer[] probs = sentOccurs.get(token); + double pWordGivenClass = probs[sentIndex] / (double)(sumVector(probs)); + double pWord = sumVector(probs) / totalWordCount; + p *= pWordGivenClass * pClass[sentIndex] / pWord; + } + } + return foundOne ? p : 0.0; + } + + /** + * Helper function to sum the values in a 1D array. + * @param vector + * The 1D array to sum. + * @return + * The sum. + */ + private double sumVector(double[] vector){ + double sum = 0.0; + for(double d : vector) sum += d; + return sum; + } + + /** + * Helper function to sum the values in a 1D array. + * @param vector + * The 1D array to sum. + * @return + * The sum. + */ + private int sumVector(Integer[] vector){ + int sum = 0; + for(int d : vector) sum += d; + return sum; + } +} diff --git a/src/Chapter4/classification/bayes/StopwordsList.java b/src/Chapter4/classification/bayes/StopwordsList.java new file mode 100644 index 0000000..06edd5a --- /dev/null +++ b/src/Chapter4/classification/bayes/StopwordsList.java @@ -0,0 +1,10 @@ +package Chapter4.classification.bayes; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +public class StopwordsList { + private static final String[] stopwords = {"a", "about", "above", "after", "again", "against", "all", "am", "an", "and", "any", "are", "as", "at", "be", "because", "been", "before", "being", "below", "between", "both", "but", "by", "can", "did", "do", "does", "doing", "don", "down", "during", "each", "few", "for", "from", "further", "get", "had", "has", "have", "having", "he", "her", "here", "hers", "herself", "him", "himself", "his", "how", "i", "if", "im", "i'm", "in", "into", "is", "it", "its", "itself", "just", "me", "more", "most", "my", "myself", "no", "nor", "not", "now", "of", "off", "on", "once", "only", "or", "other", "our", "ours", "ourselves", "out", "over", "own", "rt", "s", "same", "she", "should", "so", "some", "such", "t", "than", "that", "the", "their", "theirs", "them", "themselves", "then", "there", "these", "they", "this", "those", "through", "to", "too", "under", "until", "up", "us", "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", "yourselves"}; + public static final Set<String> stopwordsSet = new HashSet<String>(Arrays.asList(stopwords)); +} diff --git a/src/Chapter4/classification/bayes/TestNBC.java b/src/Chapter4/classification/bayes/TestNBC.java new file mode 100644 index 0000000..7e0e743 --- /dev/null +++ b/src/Chapter4/classification/bayes/TestNBC.java @@ -0,0 +1,49 @@ +package Chapter4.classification.bayes; + +import java.io.FileReader; +import java.io.IOException; + +import com.google.gson.JsonObject; +import com.google.gson.JsonStreamParser; + +public class TestNBC { + public static void main(String[] args){ + + String filename = args.length >= 1 ? args[0] : "owsemoticons.json"; + + //initialize the sentiment classifier + NaiveBayesSentimentClassifier nbsc = new NaiveBayesSentimentClassifier(); + + try { + //read the file, and train each document + JsonStreamParser parser = new JsonStreamParser(new FileReader(filename)); + JsonObject elem; + String text; + while (parser.hasNext()) { + elem = parser.next().getAsJsonObject(); + text = elem.get("text").getAsString(); + nbsc.trainInstance(text); + } + + //print out the positive and negative dictionary + System.out.println("=== Positive Dictionary ==="); + System.out.println(nbsc.printWordOccurs(0, 25)); + System.out.println("=== Negative Dictionary ==="); + System.out.println(nbsc.printWordOccurs(1, 25)); + + //now go through and classify each line as positive or negative +// parser = new JsonStreamParser(new FileReader(filename)); +// while (parser.hasNext()) { +// elem = parser.next().getAsJsonObject(); +// text = elem.get("text").getAsString(); +// Classification c = nbsc.classify(text); +// System.out.println(c + " -> " + text); +// } + System.out.println(nbsc.classify("I love new york")); + + } catch (IOException e) { + e.printStackTrace(); + } + + } +} diff --git a/src/Chapter4/classification/bayes/WordCountPair.java b/src/Chapter4/classification/bayes/WordCountPair.java new file mode 100644 index 0000000..b96be92 --- /dev/null +++ b/src/Chapter4/classification/bayes/WordCountPair.java @@ -0,0 +1,34 @@ +package Chapter4.classification.bayes; + +public class WordCountPair implements Comparable<WordCountPair>{ + + + private String word; + private double count; + + public WordCountPair(String word, double count){ + this.word = word; + this.count = count; + } + + public int compareTo(WordCountPair arg0) { + return arg0.count - count < 0 ? -1 : 1; + } + + public String getWord() { + return word; + } + + public void setWord(String word) { + this.word = word; + } + + public double getCount() { + return count; + } + + public void setCount(int count) { + this.count = count; + } + +} diff --git a/src/Chapter4/graph/visualization/SimpleGraphViewer.java b/src/Chapter4/graph/visualization/SimpleGraphViewer.java new file mode 100644 index 0000000..7cb46e4 --- /dev/null +++ b/src/Chapter4/graph/visualization/SimpleGraphViewer.java @@ -0,0 +1,86 @@ +package chapter4.graph.visualization; + +import Chapter4.util.TweetFileToGraph; +import java.awt.Dimension; +import java.awt.Shape; +import java.awt.geom.Ellipse2D; +import java.io.File; + +import javax.swing.JFrame; + +import org.apache.commons.collections15.Transformer; +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.algorithms.layout.KKLayout; +import edu.uci.ics.jung.algorithms.layout.Layout; +import edu.uci.ics.jung.algorithms.scoring.EigenvectorCentrality; +import edu.uci.ics.jung.graph.DirectedGraph; +import edu.uci.ics.jung.visualization.BasicVisualizationServer; + +public class SimpleGraphViewer { + public static void main(String[] args){ + + File tweetFile; + + if(args.length > 0){ + tweetFile = new File(args[0]); + } + else{ + tweetFile = new File("synthetic_retweet_network.json"); + } + + DirectedGraph<UserNode, RetweetEdge> retweetGraph = TweetFileToGraph.getRetweetNetwork(tweetFile); + + /* + * Converts a node to its string representation + */ + Transformer<UserNode, String> stringer = new Transformer<UserNode, String>(){ + public String transform(UserNode n){ + return n.toString(); + } + }; + + /* + * Calculate the centrality + */ + //calculate the betweenness centrality +// final InDegreeScorer<UserNode> centralityScore = new InDegreeScorer<UserNode>(retweetGraph); +// final BetweennessCentrality<UserNode, RetweetEdge> centralityScore = new BetweennessCentrality<UserNode, RetweetEdge>(retweetGraph); +// final PageRank<UserNode, RetweetEdge> centralityScore = new PageRank<UserNode, RetweetEdge>(retweetGraph, 0.85); + final EigenvectorCentrality<UserNode, RetweetEdge> centralityScore = new EigenvectorCentrality<UserNode, RetweetEdge>(retweetGraph); + centralityScore.evaluate(); + + double centralityMax = 0.0f; + for(UserNode node : retweetGraph.getVertices()){ + centralityMax = Math.max(centralityMax, centralityScore.getVertexScore(node)); + } + final double centralityMaxFinal = centralityMax; + + /* + * Sizes a node by some centrality measure + */ + Transformer<UserNode, Shape> shaper = new Transformer<UserNode, Shape>(){ + public Shape transform(UserNode n){ + System.out.println("User: " + n.getUsername() + " Cent: " + centralityScore.getVertexScore(n) + " Max: " + centralityMaxFinal); + double radius = 50 * (centralityScore.getVertexScore(n)) / centralityMaxFinal; + radius = Math.max(radius, 5.0f); + float fRadius = (float) radius; + return new Ellipse2D.Float(-fRadius/2, -fRadius/2, fRadius, fRadius); + } + }; + + Layout<UserNode, RetweetEdge> layout = new KKLayout<UserNode, RetweetEdge>(retweetGraph); + layout.setSize(new Dimension(500, 500)); + + BasicVisualizationServer<UserNode, RetweetEdge> vv = new BasicVisualizationServer<UserNode, RetweetEdge>(layout); + vv.setPreferredSize(new Dimension(550, 550)); + vv.getRenderContext().setVertexLabelTransformer(stringer); + vv.getRenderContext().setVertexShapeTransformer(shaper); + + JFrame jframe = new JFrame("Simple Graph View"); + jframe.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + jframe.getContentPane().add(vv); + jframe.pack(); + jframe.setVisible(true); + } +} diff --git a/src/Chapter4/tweetlda/LDA.java b/src/Chapter4/tweetlda/LDA.java new file mode 100644 index 0000000..ca7f9a3 --- /dev/null +++ b/src/Chapter4/tweetlda/LDA.java @@ -0,0 +1,89 @@ +package tweetlda; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.TreeSet; +import java.util.regex.Pattern; + +import org.json.JSONObject; + +import cc.mallet.pipe.CharSequence2TokenSequence; +import cc.mallet.pipe.CharSequenceLowercase; +import cc.mallet.pipe.Pipe; +import cc.mallet.pipe.SerialPipes; +import cc.mallet.pipe.TokenSequence2FeatureSequence; +import cc.mallet.pipe.TokenSequenceRemoveStopwords; +import cc.mallet.pipe.iterator.StringArrayIterator; +import cc.mallet.topics.ParallelTopicModel; +import cc.mallet.types.Alphabet; +import cc.mallet.types.IDSorter; +import cc.mallet.types.InstanceList; + +public class LDA { + + private static final String STOP_WORDS = "stopwords.txt"; + private static final int ITERATIONS = 100; + private static final int THREADS = 4; + private static final int NUM_TOPICS = 25; + private static final int NOM_WORDS_TO_ANALYZE = 25; + + public static void main(String[] args) throws Exception { + ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); + File stopwords = new File(STOP_WORDS); + + String inputFileName = args.length >= 1 ? args[0] : "testows.json"; + + File inputFile = new File(inputFileName); + + // Lowercase, tokenize, remove stopwords, stem, and convert to features + pipeList.add((Pipe) new CharSequenceLowercase()); + pipeList.add((Pipe) new CharSequence2TokenSequence(Pattern.compile("\\p{L}[\\p{L}\\p{P}]+\\p{L}"))); + pipeList.add((Pipe) new TokenSequenceRemoveStopwords(stopwords, "UTF-8", false, false, false)); + pipeList.add((Pipe) new PorterStemmer()); + pipeList.add((Pipe) new TokenSequence2FeatureSequence()); + + InstanceList instances = new InstanceList(new SerialPipes(pipeList)); + + BufferedReader fileReader = new BufferedReader(new FileReader(inputFile)); + LinkedList<String> textList = new LinkedList<String>(); + String line; + while((line = fileReader.readLine()) != null){ + JSONObject elem = new JSONObject(line); + if(elem.has("text")){ + textList.add(elem.getString("text")); + } + } + + instances.addThruPipe(new StringArrayIterator(textList.toArray(new String[textList.size()]))); + + ParallelTopicModel model = new ParallelTopicModel(NUM_TOPICS); + model.addInstances(instances); + model.setNumThreads(THREADS); + model.setNumIterations(ITERATIONS); + model.estimate(); + + // The data alphabet maps word IDs to strings + Alphabet dataAlphabet = instances.getDataAlphabet(); + + int topicIdx=0; + StringBuilder sb; + for (TreeSet<IDSorter> set : model.getSortedWords()) { + sb = new StringBuilder().append(topicIdx); + sb.append(" - "); + int j = 0; + double sum = 0.0; + for (IDSorter s : set) { + sum += s.getWeight(); + } + for (IDSorter s : set) { + sb.append(dataAlphabet.lookupObject(s.getID())).append(":").append(s.getWeight() / sum).append(", "); + if (++j >= NOM_WORDS_TO_ANALYZE) break; + } + System.out.println(sb.append("\n").toString()); + topicIdx++; + } + } +} diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java new file mode 100644 index 0000000..1a7149e --- /dev/null +++ b/src/Chapter4/tweetlda/PorterStemmer.java @@ -0,0 +1,33 @@ +package tweetlda; + +import cc.mallet.pipe.Pipe; +import cc.mallet.types.Instance; +import cc.mallet.types.TokenSequence; + +public class PorterStemmer extends Pipe { + + private static final long serialVersionUID = 154100332101873830L; + + public Instance pipe(Instance carrier){ + TokenSequence ts = (TokenSequence) carrier.getData(); + String word; + Stemmer s; + + for(int i = 0; i < ts.size(); i++){ + word = ts.get(i).getText(); + //stem the word + s = new Stemmer(); + for(char ch : word.toCharArray()){ + if(Character.isLetter(ch)){ + s.add(ch); + } + } + s.stem(); + ts.get(i).setText(s.toString()); + } + carrier.setData(ts); + + return carrier; + } + +} diff --git a/src/Chapter4/tweetlda/Stemmer.java b/src/Chapter4/tweetlda/Stemmer.java new file mode 100644 index 0000000..f06dfc6 --- /dev/null +++ b/src/Chapter4/tweetlda/Stemmer.java @@ -0,0 +1,428 @@ +package tweetlda; + + +/* + + Porter stemmer in Java. The original paper is in + + Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14, + no. 3, pp 130-137, + + See also http://www.tartarus.org/~martin/PorterStemmer + + History: + + Release 1 + + Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below. + The words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1] + is then out outside the bounds of b. + + Release 2 + + Similarly, + + Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below. + 'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and + b[j] is then outside the bounds of b. + + Release 3 + + Considerably revised 4/9/00 in the light of many helpful suggestions + from Brian Goetz of Quiotix Corporation (brian@quiotix.com). + + Release 4 + +*/ + +import java.io.*; + +/** + * Stemmer, implementing the Porter Stemming Algorithm + * + * The Stemmer class transforms a word into its root form. The input + * word can be provided a character at time (by calling add()), or at once + * by calling one of the various stem(something) methods. + */ + +class Stemmer +{ private char[] b; + private int i, /* offset into b */ + i_end, /* offset to end of stemmed word */ + j, k; + private static final int INC = 50; + /* unit of size whereby b is increased */ + public Stemmer() + { b = new char[INC]; + i = 0; + i_end = 0; + } + + /** + * Add a character to the word being stemmed. When you are finished + * adding characters, you can call stem(void) to stem the word. + */ + + public void add(char ch) + { if (i == b.length) + { char[] new_b = new char[i+INC]; + for (int c = 0; c < i; c++) new_b[c] = b[c]; + b = new_b; + } + b[i++] = ch; + } + + + /** Adds wLen characters to the word being stemmed contained in a portion + * of a char[] array. This is like repeated calls of add(char ch), but + * faster. + */ + + public void add(char[] w, int wLen) + { if (i+wLen >= b.length) + { char[] new_b = new char[i+wLen+INC]; + for (int c = 0; c < i; c++) new_b[c] = b[c]; + b = new_b; + } + for (int c = 0; c < wLen; c++) b[i++] = w[c]; + } + + /** + * After a word has been stemmed, it can be retrieved by toString(), + * or a reference to the internal buffer can be retrieved by getResultBuffer + * and getResultLength (which is generally more efficient.) + */ + public String toString() { return new String(b,0,i_end); } + + /** + * Returns the length of the word resulting from the stemming process. + */ + public int getResultLength() { return i_end; } + + /** + * Returns a reference to a character buffer containing the results of + * the stemming process. You also need to consult getResultLength() + * to determine the length of the result. + */ + public char[] getResultBuffer() { return b; } + + /* cons(i) is true <=> b[i] is a consonant. */ + + private final boolean cons(int i) + { switch (b[i]) + { case 'a': case 'e': case 'i': case 'o': case 'u': return false; + case 'y': return (i==0) ? true : !cons(i-1); + default: return true; + } + } + + /* m() measures the number of consonant sequences between 0 and j. if c is + a consonant sequence and v a vowel sequence, and <..> indicates arbitrary + presence, + + <c><v> gives 0 + <c>vc<v> gives 1 + <c>vcvc<v> gives 2 + <c>vcvcvc<v> gives 3 + .... + */ + + private final int m() + { int n = 0; + int i = 0; + while(true) + { if (i > j) return n; + if (! cons(i)) break; i++; + } + i++; + while(true) + { while(true) + { if (i > j) return n; + if (cons(i)) break; + i++; + } + i++; + n++; + while(true) + { if (i > j) return n; + if (! cons(i)) break; + i++; + } + i++; + } + } + + /* vowelinstem() is true <=> 0,...j contains a vowel */ + + private final boolean vowelinstem() + { int i; for (i = 0; i <= j; i++) if (! cons(i)) return true; + return false; + } + + /* doublec(j) is true <=> j,(j-1) contain a double consonant. */ + + private final boolean doublec(int j) + { if (j < 1) return false; + if (b[j] != b[j-1]) return false; + return cons(j); + } + + /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant + and also if the second c is not w,x or y. this is used when trying to + restore an e at the end of a short word. e.g. + + cav(e), lov(e), hop(e), crim(e), but + snow, box, tray. + + */ + + private final boolean cvc(int i) + { if (i < 2 || !cons(i) || cons(i-1) || !cons(i-2)) return false; + { int ch = b[i]; + if (ch == 'w' || ch == 'x' || ch == 'y') return false; + } + return true; + } + + private final boolean ends(String s) + { int l = s.length(); + int o = k-l+1; + if (o < 0) return false; + for (int i = 0; i < l; i++) if (b[o+i] != s.charAt(i)) return false; + j = k-l; + return true; + } + + /* setto(s) sets (j+1),...k to the characters in the string s, readjusting + k. */ + + private final void setto(String s) + { int l = s.length(); + int o = j+1; + for (int i = 0; i < l; i++) b[o+i] = s.charAt(i); + k = j+l; + } + + /* r(s) is used further down. */ + + private final void r(String s) { if (m() > 0) setto(s); } + + /* step1() gets rid of plurals and -ed or -ing. e.g. + + caresses -> caress + ponies -> poni + ties -> ti + caress -> caress + cats -> cat + + feed -> feed + agreed -> agree + disabled -> disable + + matting -> mat + mating -> mate + meeting -> meet + milling -> mill + messing -> mess + + meetings -> meet + + */ + + private final void step1() + { if (b[k] == 's') + { if (ends("sses")) k -= 2; else + if (ends("ies")) setto("i"); else + if (b[k-1] != 's') k--; + } + if (ends("eed")) { if (m() > 0) k--; } else + if ((ends("ed") || ends("ing")) && vowelinstem()) + { k = j; + if (ends("at")) setto("ate"); else + if (ends("bl")) setto("ble"); else + if (ends("iz")) setto("ize"); else + if (doublec(k)) + { k--; + { int ch = b[k]; + if (ch == 'l' || ch == 's' || ch == 'z') k++; + } + } + else if (m() == 1 && cvc(k)) setto("e"); + } + } + + /* step2() turns terminal y to i when there is another vowel in the stem. */ + + private final void step2() { if (ends("y") && vowelinstem()) b[k] = 'i'; } + + /* step3() maps double suffices to single ones. so -ization ( = -ize plus + -ation) maps to -ize etc. note that the string before the suffix must give + m() > 0. */ + + private final void step3() { if (k == 0) return; /* For Bug 1 */ switch (b[k-1]) + { + case 'a': if (ends("ational")) { r("ate"); break; } + if (ends("tional")) { r("tion"); break; } + break; + case 'c': if (ends("enci")) { r("ence"); break; } + if (ends("anci")) { r("ance"); break; } + break; + case 'e': if (ends("izer")) { r("ize"); break; } + break; + case 'l': if (ends("bli")) { r("ble"); break; } + if (ends("alli")) { r("al"); break; } + if (ends("entli")) { r("ent"); break; } + if (ends("eli")) { r("e"); break; } + if (ends("ousli")) { r("ous"); break; } + break; + case 'o': if (ends("ization")) { r("ize"); break; } + if (ends("ation")) { r("ate"); break; } + if (ends("ator")) { r("ate"); break; } + break; + case 's': if (ends("alism")) { r("al"); break; } + if (ends("iveness")) { r("ive"); break; } + if (ends("fulness")) { r("ful"); break; } + if (ends("ousness")) { r("ous"); break; } + break; + case 't': if (ends("aliti")) { r("al"); break; } + if (ends("iviti")) { r("ive"); break; } + if (ends("biliti")) { r("ble"); break; } + break; + case 'g': if (ends("logi")) { r("log"); break; } + } } + + /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */ + + private final void step4() { switch (b[k]) + { + case 'e': if (ends("icate")) { r("ic"); break; } + if (ends("ative")) { r(""); break; } + if (ends("alize")) { r("al"); break; } + break; + case 'i': if (ends("iciti")) { r("ic"); break; } + break; + case 'l': if (ends("ical")) { r("ic"); break; } + if (ends("ful")) { r(""); break; } + break; + case 's': if (ends("ness")) { r(""); break; } + break; + } } + + /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */ + + private final void step5() + { if (k == 0) return; /* for Bug 1 */ switch (b[k-1]) + { case 'a': if (ends("al")) break; return; + case 'c': if (ends("ance")) break; + if (ends("ence")) break; return; + case 'e': if (ends("er")) break; return; + case 'i': if (ends("ic")) break; return; + case 'l': if (ends("able")) break; + if (ends("ible")) break; return; + case 'n': if (ends("ant")) break; + if (ends("ement")) break; + if (ends("ment")) break; + /* element etc. not stripped before the m */ + if (ends("ent")) break; return; + case 'o': if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break; + /* j >= 0 fixes Bug 2 */ + if (ends("ou")) break; return; + /* takes care of -ous */ + case 's': if (ends("ism")) break; return; + case 't': if (ends("ate")) break; + if (ends("iti")) break; return; + case 'u': if (ends("ous")) break; return; + case 'v': if (ends("ive")) break; return; + case 'z': if (ends("ize")) break; return; + default: return; + } + if (m() > 1) k = j; + } + + /* step6() removes a final -e if m() > 1. */ + + private final void step6() + { j = k; + if (b[k] == 'e') + { int a = m(); + if (a > 1 || a == 1 && !cvc(k-1)) k--; + } + if (b[k] == 'l' && doublec(k) && m() > 1) k--; + } + + /** Stem the word placed into the Stemmer buffer through calls to add(). + * Returns true if the stemming process resulted in a word different + * from the input. You can retrieve the result with + * getResultLength()/getResultBuffer() or toString(). + */ + public void stem() + { k = i - 1; + if (k > 1) { step1(); step2(); step3(); step4(); step5(); step6(); } + i_end = k+1; i = 0; + } + + /** Test program for demonstrating the Stemmer. It reads text from a + * a list of files, stems each word, and writes the result to standard + * output. Note that the word stemmed is expected to be in lower case: + * forcing lower case must be done outside the Stemmer class. + * Usage: Stemmer file-name file-name ... + */ + public static void main(String[] args) + { + char[] w = new char[501]; + Stemmer s = new Stemmer(); + for (int i = 0; i < args.length; i++) + try + { + FileInputStream in = new FileInputStream(args[i]); + + try + { while(true) + + { int ch = in.read(); + if (Character.isLetter((char) ch)) + { + int j = 0; + while(true) + { ch = Character.toLowerCase((char) ch); + w[j] = (char) ch; + if (j < 500) j++; + ch = in.read(); + if (!Character.isLetter((char) ch)) + { + /* to test add(char ch) */ + for (int c = 0; c < j; c++) s.add(w[c]); + + /* or, to test add(char[] w, int j) */ + /* s.add(w, j); */ + + s.stem(); + { String u; + + /* and now, to test toString() : */ + u = s.toString(); + + /* to test getResultBuffer(), getResultLength() : */ + /* u = new String(s.getResultBuffer(), 0, s.getResultLength()); */ + + System.out.print(u); + } + break; + } + } + } + if (ch < 0) break; + System.out.print((char)ch); + } + } + catch (IOException e) + { System.out.println("error reading " + args[i]); + break; + } + } + catch (FileNotFoundException e) + { System.out.println("file " + args[i] + " not found"); + break; + } + } +} diff --git a/src/Chapter4/util/BetweennessScorer.java b/src/Chapter4/util/BetweennessScorer.java new file mode 100644 index 0000000..0926d34 --- /dev/null +++ b/src/Chapter4/util/BetweennessScorer.java @@ -0,0 +1,25 @@ +package util; + +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import edu.uci.ics.jung.algorithms.scoring.VertexScorer; +import edu.uci.ics.jung.algorithms.shortestpath.DijkstraShortestPath; +import edu.uci.ics.jung.graph.Graph; +import edu.uci.ics.jung.graph.Hypergraph; + +public class BetweennessScorer implements VertexScorer<UserNode, Double>{ + + public BetweennessScorer(Hypergraph<UserNode, RetweetEdge> graph){ + /* + * Step 1: Calculate the shortest path between each pair of nodes. + */ + DijkstraShortestPath<UserNode, RetweetEdge> paths = new DijkstraShortestPath<UserNode, RetweetEdge>((Graph<UserNode, RetweetEdge>) graph); +// paths.getDistance(source, target); + } + + public Double getVertexScore(UserNode arg0) { + // TODO Auto-generated method stub + return null; + } + +} diff --git a/src/Chapter4/util/EigenVectorScorer.java b/src/Chapter4/util/EigenVectorScorer.java new file mode 100644 index 0000000..da0c1a8 --- /dev/null +++ b/src/Chapter4/util/EigenVectorScorer.java @@ -0,0 +1,64 @@ +package Chapter4.util; + +import GraphElements.RetweetEdge; +import GraphElements.UserNode; +import cern.colt.matrix.DoubleMatrix2D; +import cern.colt.matrix.impl.SparseDoubleMatrix2D; +import cern.colt.matrix.linalg.EigenvalueDecomposition; +import edu.uci.ics.jung.algorithms.scoring.VertexScorer; +import edu.uci.ics.jung.graph.Hypergraph; + +/** + * This is a Jung Node Scorer that computes the Eigenvector Centrality for each node. + */ +public class EigenVectorScorer implements VertexScorer<UserNode, Double> { + + private UserNode[] users; + private DoubleMatrix2D eigenVectors; + private int dominantEigenvectorIdx; + + public EigenVectorScorer(Hypergraph<UserNode, RetweetEdge> graph){ + users = new UserNode[graph.getVertexCount()]; + graph.getVertices().toArray(users); + + /* Step 1: Create the adjacency matrix. + * + * An adjacency matrix is a matrix with N users and N columns, + * where N is the number of nodes in the network. + * An entry in the matrix is 1 when node i connects to node j, + * and 0 otherwise. + */ + SparseDoubleMatrix2D matrix = new SparseDoubleMatrix2D(users.length, users.length); + for(int i = 0; i < users.length; i++){ + for(int j = 0; j < users.length; j++){ + matrix.setQuick(i, j, graph.containsEdge(new RetweetEdge(users[i], users[j])) ? 1 : 0); + } + } + + /* Step 2: Find the principle eigenvector. + * For more information on eigen-decomposition please see + * http://mathworld.wolfram.com/EigenDecomposition.html + */ + EigenvalueDecomposition eig = new EigenvalueDecomposition(matrix); + DoubleMatrix2D eigenVals = eig.getD(); + eigenVectors = eig.getV(); + + dominantEigenvectorIdx = 0; + for(int i = 1; i < eigenVals.columns(); i++){ + if(eigenVals.getQuick(dominantEigenvectorIdx, dominantEigenvectorIdx) < + eigenVals.getQuick(i, i)){ + dominantEigenvectorIdx = i; + } + } + } + + public Double getVertexScore(UserNode arg0) { + for(int i = 0; i < users.length; i++){ + if(users[i].equals(arg0)){ + return Math.abs(eigenVectors.getQuick(i, dominantEigenvectorIdx)); + } + } + return null; + } + +} diff --git a/src/Chapter4/util/InDegreeScorer.java b/src/Chapter4/util/InDegreeScorer.java new file mode 100644 index 0000000..014adc6 --- /dev/null +++ b/src/Chapter4/util/InDegreeScorer.java @@ -0,0 +1,30 @@ +package Chapter4.util; + +import edu.uci.ics.jung.algorithms.scoring.VertexScorer; +import edu.uci.ics.jung.graph.Hypergraph; + +/** + * This is a Jung Node Scorer that computes the + * In-Degree Centrality of nodes. + */ +public class InDegreeScorer<T> implements VertexScorer<T, Double>{ + + //The graph representation in JUNG. + private Hypergraph<T, ?> graph; + + /** + * Initialize the graph scorer. + * @param graph + * The graph we wish to score. + */ + public InDegreeScorer(Hypergraph<T, ?> graph){ + this.graph = graph; + } + + /** + * @return The In-Degree Centrality of the vertex. + */ + public Double getVertexScore(T node) { + return (double) graph.getInEdges(node).size(); + } +}
\ No newline at end of file diff --git a/src/Chapter4/util/TweetFileProcessor.java b/src/Chapter4/util/TweetFileProcessor.java new file mode 100644 index 0000000..9b6b99c --- /dev/null +++ b/src/Chapter4/util/TweetFileProcessor.java @@ -0,0 +1,76 @@ +package Chapter4.util; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.util.Iterator; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONException; +import org.json.JSONObject; + +public class TweetFileProcessor implements Iterator<JSONObject>{ + + protected BufferedReader fileBuffer; + protected boolean endOfFile; + protected String nextLine; + + public TweetFileProcessor(File f){ + + endOfFile = false; + + InputStreamReader isr; + BufferedReader br = null; + try { + isr = new InputStreamReader(new FileInputStream(f), "UTF-8"); + br = new BufferedReader(isr); + nextLine = br.readLine(); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + endOfFile = true; + } catch (FileNotFoundException e) { + e.printStackTrace(); + endOfFile = true; + } catch (IOException e) { + e.printStackTrace(); + endOfFile = true; + } + finally{ + fileBuffer = br; + } + } + + @Override + public boolean hasNext() { + return !endOfFile; + } + + @Override + public JSONObject next() { + JSONObject obj = null; + try { + obj = new JSONObject(nextLine); + } catch (JSONException ex) { + Logger.getLogger(TweetFileProcessor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + nextLine = fileBuffer.readLine(); + if(nextLine == null){ + endOfFile = true; + } + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return obj; + } + + @Override + public void remove() throws UnsupportedOperationException{ + throw new UnsupportedOperationException(); + } +} diff --git a/src/Chapter4/util/TweetFileToGraph.java b/src/Chapter4/util/TweetFileToGraph.java new file mode 100644 index 0000000..6cf2e3a --- /dev/null +++ b/src/Chapter4/util/TweetFileToGraph.java @@ -0,0 +1,77 @@ +package Chapter4.util; + +import java.io.File; + +import GraphElements.RetweetEdge; +import GraphElements.UserNode; + +import edu.uci.ics.jung.graph.DirectedGraph; +import edu.uci.ics.jung.graph.DirectedSparseGraph; +import edu.uci.ics.jung.graph.util.EdgeType; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONException; +import org.json.JSONObject; + +/** + * Some basic functionality to convert files collected + * in Chapter 2 to JUNG graphs. + */ +public class TweetFileToGraph { + + public static DirectedGraph<UserNode, RetweetEdge> getRetweetNetwork(File tweetFile){ + + JSONObject tmp; + + TweetFileProcessor tfp = new TweetFileProcessor(tweetFile); + DirectedSparseGraph<UserNode, RetweetEdge> dsg = new DirectedSparseGraph<UserNode, RetweetEdge>(); + + while (tfp.hasNext()){ + tmp = tfp.next(); + if(tmp==null) + { + continue; + } + //get the author + String user=null; + try { + user = tmp.getJSONObject("user").getString("screen_name"); + } catch (JSONException ex) { + Logger.getLogger(TweetFileToGraph.class.getName()).log(Level.SEVERE, null, ex); + } + if(user==null) + { + continue; + } + //get the retweeted user + try{ + JSONObject retweet = tmp.getJSONObject("retweeted_status"); + String retweeted_user = retweet.getJSONObject("user").getString("screen_name"); + + //make an edge or increment the weight if it exists. + UserNode toUser = new UserNode(retweeted_user); + UserNode fromUser = new UserNode(user); + + dsg.addVertex(toUser); + dsg.addVertex(fromUser); + + RetweetEdge edge = new RetweetEdge(toUser, fromUser); + + if(dsg.containsEdge(edge)){ + dsg.findEdge(fromUser, toUser).incrementRTCount(); + } + else{ + dsg.addEdge(edge, fromUser, toUser); + } + dsg.addEdge(edge, fromUser, toUser, EdgeType.DIRECTED); + } + catch(JSONException ex){ + //the tweet is not a retweet. this is not a problem. + } + + + } + + return dsg; + } +} diff --git a/src/Chapter5/network/CreateD3Network.java b/src/Chapter5/network/CreateD3Network.java new file mode 100644 index 0000000..d4c25af --- /dev/null +++ b/src/Chapter5/network/CreateD3Network.java @@ -0,0 +1,716 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter5.network; + + +import Chapter5.support.HashTagDS; +import Chapter5.support.NetworkNode; +import Chapter5.support.NodeIDComparator; +import Chapter5.support.NodeSizeComparator; +import Chapter5.support.ToNodeInfo; +import Chapter5.support.Tweet; +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import utils.TextUtils; + +/** + * + * @author shamanth + */ +public class CreateD3Network +{ + static final String DEF_INFILENAME = "ows.json"; + private String RTPATTERN = "rt @[_a-zA-Z0-9]+"; + private final int DEFAULT_NODE_SIZE = 0; +// private final int NODE_COUNT_LIMIT = 1; + //private final String[] node_color_scheme = new String[]{"#FFFFD9","#EDF8B1","#C7E9B4","#7FCDBB","#41B6C4","#1D91C0","#225EA8","#253494","#081D58"}; + //private final String[] node_color_scheme = new String[]{"#A6BDDB","#74A9CF","#3690C0","#0570B0","#045A8D","#023858"}; + + /** + * Extracts the users who have been retweeted using the RTPATTERN + * @param text + * @return + */ + public ArrayList<String> GetRTUsers(String text) + { + Pattern p = Pattern.compile(RTPATTERN, Pattern.CASE_INSENSITIVE); + Matcher m = p.matcher(text); + ArrayList<String> rtusers = new ArrayList<String>(); + while(m.find()) + { + String nuser = text.substring(m.start(),m.end()); + nuser = nuser.replaceAll("rt @|RT @", ""); +// nuser = nuser.replaceAll("RT @", ""); + rtusers.add(nuser.toLowerCase()); + } + return rtusers; + } + + /** + * Identifies the category to which the tweet belongs. Each category is defined by a group of words/hashtags + * @param tweet + * @param usercategories + * @return + */ + public int GetCategory(String tweet, HashTagDS[] usercategories) + { + HashMap<Integer,Integer> categoryvotes = new HashMap<Integer,Integer>(); + tweet = tweet.toLowerCase(); + int i=0; + for(HashTagDS cat:usercategories) + { + + for(String s :cat.tags) + { + if(tweet.indexOf(s)!=-1) + { + if(categoryvotes.containsKey(i)) + { + categoryvotes.put(i, categoryvotes.get(i)+1); + } + else + { + categoryvotes.put(i, 1); + } + } + } + i++; + } + Set<Integer> keyset = categoryvotes.keySet(); + int maxvote = 0; + //by default the tweet will be in the first category + int maxcategoryindex = 0; + for(int key:keyset) + { + if(categoryvotes.get(key)>maxvote) + { + maxvote = categoryvotes.get(key); + maxcategoryindex = key; + } + } + return maxcategoryindex; + } + + /** + * Converts the input jsonobject containing category descriptions to an array for processing. + * @param hashtagcoll JSONObject containing the list of hashtags, color, and the topic information + * @return An array of hashtags + */ + public HashTagDS[] ConvertJSONArrayToArray(JSONObject hashtagcoll) + { + HashTagDS[] hashtags = new HashTagDS[hashtagcoll.length()]; + int j=0; + try{ + if(hashtagcoll!=null) + { + Iterator keyit = hashtagcoll.keys(); + while(keyit.hasNext()) + { + HashTagDS ht = new HashTagDS(); + JSONObject tags = (JSONObject) hashtagcoll.get((String)keyit.next()); + ht.groupname = keyit.toString(); + ht.color = tags.getString("color"); + JSONArray tagjson = tags.getJSONArray("hts"); + ht.tags = new String[tagjson.length()]; + for(int i=0;i<tagjson.length();i++) + { + ht.tags[i] = tagjson.getString(i); + } + hashtags[j++] = ht; + } + } + }catch(JSONException ex) + { + ex.printStackTrace(); + } + return hashtags; + } + + /** + * Identifies the category of a node based on the content of his tweets(each tweet can be assigned a category based on it's text). A simple majority is sufficient to make this decision. + * @param tnfs + * @param hashtagarray + * @return + */ + public int GetMajorityTopicColor(NetworkNode tnfs,HashTagDS[] hashtagarray) + { + HashMap<Integer,Integer> catcount = new HashMap<Integer,Integer>(); + //if the node has no tolinks then look at the node that it retweeted to decide the color of the node + for(String tweet:tnfs.data) + { + int id = this.GetCategory(tweet, hashtagarray); + if(catcount.containsKey(id)) + { + catcount.put(id, catcount.get(id)+1); + } + else + catcount.put(id, 1); + } + Set<Integer> keys = catcount.keySet(); + int maxcatID = -1; + int maxcount = 0; + for(int k:keys) + { + if(maxcatID==-1) + { + maxcatID = k; + maxcount = catcount.get(k); + } + else + { + if(maxcount<catcount.get(k)) + { + maxcount = catcount.get(k); + maxcatID = k; + } + } + } + return maxcatID; + } + + /** + * Takes as input a JSON file and reads through the file sequentially to process and create a retweet network from the tweets. + * @param inFilename + * @param numNodeClasses + * @param hashtags category info containing hashtags + * @param num_nodes number of seed nodes to be included in the network + * @return a JSONObject consisting of nodes and links of the network + */ + public JSONObject ConvertTweetsToDiffusionPath(String inFilename,int numNodeClasses, + JSONObject hashtags, int num_nodes) + { + HashMap<String,NetworkNode> userconnections = new HashMap<String,NetworkNode>(); +// HashMap<String,Integer> tweet_class_codes = new HashMap<String,Integer>(); +// int tweet_class_counter = 1; + HashTagDS[] hashtagarray = ConvertJSONArrayToArray(hashtags); + BufferedReader br = null; + try{ + br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + JSONObject tweetobj; + try { + tweetobj = new JSONObject(temp); + } catch (JSONException ex) { + ex.printStackTrace(); + continue; + } + //Extract the tweet first + Tweet t = new Tweet(); + String text=""; + try { + text = TextUtils.GetCleanText(tweetobj.getString("text")).toLowerCase(); + } catch (JSONException ex) { + ex.printStackTrace(); + continue; + } + //Check that the tweet matches at least one of the topics + boolean groupmatch = false; + for(HashTagDS ht:hashtagarray) + { + String[] tags = ht.tags; + for(String tg:tags) + { + if(text.contains(tg)) + { + groupmatch = true; + break; + } + } + if(groupmatch) + { + break; + } + } + if(!groupmatch) + { + continue; + } + // + ArrayList<String> fromusers = new ArrayList<String>(); + if(!tweetobj.isNull("retweeted_status")) + { + JSONObject rtstatus; + try { + rtstatus = tweetobj.getJSONObject("retweeted_status"); + if(rtstatus.isNull("user")) + { + JSONObject rtuserobj = rtstatus.getJSONObject("user"); + try{ + fromusers.add(rtuserobj.get("screen_name").toString()); + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + } catch (JSONException ex) { + Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); + } + } + else + { + //use the tweet text to retrieve the pattern "RT @username:" + fromusers = GetRTUsers(text); + } + if(fromusers.isEmpty()) + { + continue; + } + + //identify the class values to be applied to all the nodes and + //edges. +// String prunedtext = TextUtils.RemoveTwitterElements(text); +// Integer class_code = tweet_class_codes.get(prunedtext); +// if(class_code==null) +// { +// class_code = tweet_class_counter; +// tweet_class_codes.put(prunedtext, class_code); //set the unique id for this tweet +// tweet_class_counter++; +// } + t.text = TextUtils.RemoveRTElements(text); + if(!tweetobj.isNull("user")) + { + JSONObject userobj; + try { + userobj = tweetobj.getJSONObject("user"); + t.user = userobj.getString("screen_name").toLowerCase(); + } catch (JSONException ex) { + Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); + } + } +// try { +// t.pubdate = String.valueOf(tweetobj.get("timestamp")); +// } catch (JSONException ex) { +// Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); +// } + t.catColor = hashtagarray[t.catID].color; + //update the size of the from fromuser + int cur_level = 0; + for(int i=fromusers.size()-1;i>=0;i--) + { + String touser = ""; + if(i==0) + {//if this is the last user in the retweet sequence then use the user of the tweet as the next link + touser = t.user; + } + else + { //if there are still fromuser in the retweet chain then use them as the next link + touser = fromusers.get(i-1); + } + //don't add any selflinks + if(fromusers.get(i).equals(touser)) + { + continue; + } + NetworkNode fromuser = null; + if(userconnections.containsKey(fromusers.get(i))) + { + //from node already exists simply add this new connection to it + fromuser = userconnections.get(fromusers.get(i)); + } + else + { + //the from user was not found. add the node + fromuser = new NetworkNode(); + // fromuser.id = nodeid++; + fromuser.username = fromusers.get(i); + fromuser.tonodes = new ArrayList<ToNodeInfo>(); + fromuser.class_codes = new ArrayList<Integer>(); + fromuser.size = DEFAULT_NODE_SIZE; + fromuser.level = cur_level; + fromuser.data = new ArrayList<String>(); + fromuser.data.add(t.text); + //fromuser.category = ; + } +// if(!fromuser.class_codes.contains(class_code)) +// { +// //add the marker to from node if it does not have it already +// fromuser.class_codes.add(class_code); +// } + //if to node is not in the list then create it + NetworkNode tonode = null; + if(!userconnections.containsKey(touser)) + { + tonode = new NetworkNode(); + // System.out.println(touser+" "+nodeid); + // tonode.id= nodeid++; + tonode.username = touser; + tonode.tonodes= new ArrayList<ToNodeInfo>(); + tonode.class_codes = new ArrayList<Integer>(); + tonode.catID = t.catID; + tonode.catColor = t.catColor; + tonode.size = DEFAULT_NODE_SIZE; + tonode.data= new ArrayList<String>(); + tonode.data.add(t.text); + tonode.level = cur_level+1; + //add the classcode to the node if it doesn't already exist +// if(!tonode.class_codes.contains(class_code)) +// { +// tonode.class_codes.add(class_code); +// } + //add the touser info + userconnections.put(touser, tonode); + } + else + { + tonode = userconnections.get(touser); + tonode.data.add(t.text); + if(tonode.level<cur_level+1) + { + tonode.level = cur_level; + } + //add the classcode to the node if it doesn't already exist +// if(!tonode.class_codes.contains(class_code)) +// { +// tonode.class_codes.add(class_code); +// } + } + ToNodeInfo inf = new ToNodeInfo(); + inf.tonodeid = tonode.id; + inf.text = t.text; +// inf.date = t.pubdate; +// inf.class_code = class_code; + inf.tousername = touser; + inf.catID = t.catID; + inf.catColor = t.catColor; + fromuser.tonodes.add(inf); + //update from node size + fromuser.size++; + //add back updated fromuser + userconnections.put(fromusers.get(i), fromuser); + //update the level for next iteration + cur_level++; + } + } + }catch(IOException ex) + { + ex.printStackTrace(); + } + Set<String> keys = userconnections.keySet(); + ArrayList<NetworkNode> returnnodes = new ArrayList<NetworkNode>(); + //its +1 because nodes with size 0 are not going to be used to calculate the class + int min = DEFAULT_NODE_SIZE+1; + int max = DEFAULT_NODE_SIZE+1; + for(String k:keys) + { + NetworkNode n = userconnections.get(k); + int maxcat = GetMajorityTopicColor(n,hashtagarray); + n.catID = maxcat; + n.catColor = hashtagarray[maxcat].color; + userconnections.put(k, n); + // +// if(n.size==0) +// {//mark the node as a zero node +// n.class_codes.add(-1); +// } +// else +// { + if(n.size>max) + { + max = n.size; + } + if(n.size<min) + { + min = n.size; + } +// } + returnnodes.add(n); + } + //create node groups to assign unique colors to nodes in different Categories based upon the number of connections + ArrayList<NetworkNode> nodes = ComputeGroupsSqrt(returnnodes, max, min, numNodeClasses); + Collections.sort(nodes,Collections.reverseOrder(new NodeSizeComparator())); + //select how many nodes to show. + int nodes_to_visit = 0; + if(nodes.size()>=num_nodes) + { + nodes_to_visit = num_nodes; + } + else + { + nodes_to_visit = nodes.size(); + } + + HashMap<String,NetworkNode> prunednodes = new HashMap<String,NetworkNode>(); + HashMap<String,Integer> nodeidlist = new HashMap<String,Integer>(); + int nodeid = 0; //node nodeid counter + for(int k=0;k<nodes_to_visit;k++) + { + NetworkNode nd = nodes.get(k); +// System.out.println("visiting node "+nd.username); + nd.level = 0; + HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections,nd,new HashMap<String,NetworkNode>()); + Set<String> names = rtnodes.keySet(); + for(String n:names) + { + if(!prunednodes.containsKey(n)) + { + NetworkNode newnode = rtnodes.get(n); + if(newnode.size>0) + { + prunednodes.put(n, newnode); + nodeidlist.put(n, nodeid++); + } + } + } + } + + /** We now have all the nodes of the network. compute their ids sequentially + * and assign them to the respective nodes. Simultaneously compact the nodes + * of the network to remove all nodes which have not been retweeted and are + * of size 0 + */ + + Set<String> allnodes = prunednodes.keySet(); +// System.out.println(prunednodes.size()); + ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>(); +// HashMap<Integer,ArrayList<Integer>> conninfo = new HashMap<Integer,ArrayList<Integer>>(); + for(String n:allnodes) + { + NetworkNode nd = prunednodes.get(n); + nd.id = nodeidlist.get(nd.username); + ArrayList<Integer> connids = new ArrayList<Integer>(); +// ArrayList<ToNodeInfo> compact_To_nodes = new ArrayList<ToNodeInfo>(); + int counter = 0; + for(ToNodeInfo tnf: nd.tonodes) + { + //user has never been retweeted. the chain terminates here, so remove it + if(nodeidlist.containsKey(tnf.tousername)) + { + tnf.tonodeid = nodeidlist.get(tnf.tousername); + connids.add(tnf.tonodeid); + nd.tonodes.set(counter, tnf); + counter++; + } + } + finalnodes.add(nd); + //store the connections to compute the clusterids later +// if(!conninfo.containsKey(nd.id)) +// { +// conninfo.put(nd.id, connids); +// } + } + //generate the clusterids +// ArrayList<Integer>[] clusterids = (ArrayList<Integer>[])new ArrayList[allnodes.size()]; +// Set<Integer> idkeys = conninfo.keySet(); +// for(int id:idkeys) +// { +// for(int x:conninfo.get(id)) +// { +// if(clusterids[x]==null) +// { +// ArrayList<Integer> toclusterid = new ArrayList<Integer>(); +// toclusterid.add(id); +// clusterids[x] = toclusterid; +// } +// else +// { +// ArrayList<Integer> toclusterid = clusterids[x]; +// if(!toclusterid.contains(id)) +// { +// toclusterid.add(id); +// clusterids[x] = toclusterid; +// } +// } +// } +// } + //now create the final node list with the clusterids +// for(String n:allnodes) +// { +// NetworkNode nd = prunednodes.get(n); +// ArrayList<Integer> cids = clusterids[nd.id]; +// if(cids!=null) +// { +// int size = cids.size(); +// nd.clusterID = new int[size+1]; +// int counter=0; +// nd.clusterID[counter++] = nd.id; +// for(int c:cids) +// { +// nd.clusterID[counter++] = c; +// } +// } + //System.out.println(nd.class_codes.toString()); +// finalnodes.add(nd); +// } + Collections.sort(finalnodes,new NodeIDComparator()); + System.out.println(finalnodes.size()); + for(NetworkNode node:finalnodes) + { + System.out.println(node.id+" "+node.username+" "+node.level+" "+node.size+" "+node.catColor+node.data.get(0)); + } + return GetD3Structure(finalnodes); + } + + /** + * Creates a D3 representation of the nodes, consisting of two JSONArray a set of nodes and a set of links between the nodes + * @param finalnodes + * @return + */ + public JSONObject GetD3Structure(ArrayList<NetworkNode> finalnodes) + { + JSONObject alltweets = new JSONObject(); + try { + JSONArray nodes = new JSONArray(); + JSONArray links = new JSONArray(); + for (NetworkNode node : finalnodes) + { + try { + //create adjacencies + JSONArray nodedata = new JSONArray(); + for (ToNodeInfo tnf : node.tonodes) { + JSONObject jsadj = new JSONObject(); + jsadj.put("source", node.id); + jsadj.put("target", tnf.tonodeid); + //weight of the edge + jsadj.put("value", 1); + //class code is a unique id corresponding to the text + jsadj.put("data", tnf.class_code); + links.put(jsadj); + //create a data object for the node + JSONObject jsdata = new JSONObject(); + jsdata.put("tonodeid", tnf.tonodeid); + jsdata.put("nodefrom", node.username); + jsdata.put("nodeto", tnf.tousername); + jsdata.put("tweet", tnf.text); +// jsdata.put("pubtime", tnf.date); + //class code for tweet to be used to filter +// jsdata.put("classcode", tnf.class_code); + nodedata.put(jsdata); + } + //add node + JSONObject nd = new JSONObject(); + nd.put("name", node.username); + nd.put("group", node.group); + nd.put("id", node.id); + nd.put("size", node.size); + nd.put("catColor", node.catColor); + nd.put("catID", node.catID); + nd.put("data", nodedata); + nd.put("level", node.level); + //clusterids for the node +// JSONArray cids = new JSONArray(); +// if (node.clusterID != null) { +// for (int code : node.clusterID) { +// cids.put(code); +// } +// } else { +// cids.put(node.id); +// } +// nd.put("clusterids", cids); + //classcodes for the node +// JSONArray codes = new JSONArray(); +// for (int c : node.class_codes) { +// codes.put(c); +// } +// nd.put("classcodes", codes); + nodes.put(nd); + } catch (JSONException ex) { + ex.printStackTrace(); + } + } + alltweets.put("nodes", nodes); + alltweets.put("links", links); + } catch (JSONException ex) { + Logger.getLogger(CreateD3Network.class.getName()).log(Level.SEVERE, null, ex); + } + return alltweets; + } + + /** + * Recursively traverses the list of nodes to identify all nodes reachable from a starting node. + * @param userconnections A map containing the usernames as keys and the node information as value + * @param cur_node Node currently being processed. + * @param newnodes A list of nodes which can be reached from the current node + * @return A map of the usernames and the node information for all nodes reachable + */ + public HashMap<String,NetworkNode> GetNextHopConnections(HashMap<String,NetworkNode> userconnections,NetworkNode cur_node,HashMap<String,NetworkNode> newnodes) + { + cur_node.level = cur_node.level+1; + newnodes.put(cur_node.username,cur_node); + for(int i=0;i<cur_node.tonodes.size();i++) + { + ToNodeInfo tnf = cur_node.tonodes.get(i); + if(newnodes.containsKey(tnf.tousername)) + { + continue; + } + + HashMap<String,NetworkNode> rtnodes = GetNextHopConnections(userconnections, userconnections.get(tnf.tousername),newnodes); + newnodes = rtnodes; + } + return newnodes; + } + + /** + * Divides a list of nodes into groups using the square root binning + * technique. If a node has size x and there are y groups in total. Then the + * group of the node is computed as ceil((sqrt(x)/sqrt(max))*y), where max is + * the size of the largest node. + * @param nodes A list of nodes + * @param max The maximum size of a node + * @param min The minimum size of a node + * @param noofclasses Number of classes into which the nodes must be classified + * @return A list of nodes along with their class + */ + public ArrayList<NetworkNode> ComputeGroupsSqrt(ArrayList<NetworkNode> nodes, int max, int min, int noofclasses) + { + ArrayList<NetworkNode> finalnodes = new ArrayList<NetworkNode>(); + for(int i=0;i<nodes.size();i++) + { + NetworkNode node = nodes.get(i); + int color_index = 0; + if(node.size>0) + { + color_index = (int) Math.ceil(((double)Math.sqrt(node.size)/Math.sqrt(max))*noofclasses)-1; +// node.size = color_index*6; + } + node.group = color_index; + finalnodes.add(node); + } + return finalnodes; + } + + + //DEBUG use only + public static void main(String[] args) + { + try { + CreateD3Network cdn = new CreateD3Network(); + JSONObject jobj = new JSONObject(); + JSONObject obj = new JSONObject(); + obj.put("color", "#800000"); + JSONArray ja = new JSONArray(); + ja.put("zuccotti"); + obj.put("hts", ja); + jobj.put("Group 1", obj); + obj = new JSONObject(); + obj.put("color", "#0FFF00"); + ja = new JSONArray(); + ja.put("#nypd"); + obj.put("hts", ja); + jobj.put("Group 2", obj); + String filename = "D:\\Twitter Data Analytics\\Data\\testows.json"; + JSONObject nodes = cdn.ConvertTweetsToDiffusionPath(filename,7, jobj,5); + } catch (JSONException ex) { + ex.printStackTrace(); + } + } +} diff --git a/src/Chapter5/network/ExtractUserTagNetwork.java b/src/Chapter5/network/ExtractUserTagNetwork.java new file mode 100644 index 0000000..43ae680 --- /dev/null +++ b/src/Chapter5/network/ExtractUserTagNetwork.java @@ -0,0 +1,173 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.network; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class ExtractUserTagNetwork +{ + + static final String DEF_INFILENAME = "ows.json"; + + /** + * Extracts a map of all the hashtags a user has used in his tweets resulting in a bipartite network. The frequency of each tag is also returned in the form of a map. + * @param inFilename File containing a list of tweets as JSON objects + * @return A map containing the users as keys and a map containing the hashtags they use along with their frequency. + */ + public HashMap<String,HashMap<String,Integer>> ExtractUserHashtagNetwork(String inFilename) + { + HashMap<String,HashMap<String,Integer>> usertagmap = new HashMap<String,HashMap<String,Integer>>(); + BufferedReader br = null; + try{ + br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + try{ + JSONObject tweetobj = new JSONObject(temp); + String text; + String username; + HashMap<String,Integer> tags = new HashMap<String,Integer>(); + if(!tweetobj.isNull("entities")) + { + JSONObject entities = tweetobj.getJSONObject("entities"); + JSONArray hashtags; + try { + hashtags = entities.getJSONArray("hashtags"); + for(int i=0;i<hashtags.length();i++) + { + JSONObject tag = hashtags.getJSONObject(i); + String tg = tag.getString("text").toLowerCase(); + if(!tags.containsKey(tg)) + { + tags.put(tg,1); + } + else + { + tags.put(tg, tags.get(tg)+1); + } + } + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + else + if(!tweetobj.isNull("text")) + { + text = tweetobj.getString("text"); + tags = ExtractHashTags(text); + } + if(!tweetobj.isNull("user")) + { + JSONObject userobj = tweetobj.getJSONObject("user"); + username = "@"+userobj.getString("screen_name").toLowerCase(); + if(usertagmap.containsKey(username)) + { + HashMap<String,Integer> usertags = usertagmap.get(username); + Set<String> keys = tags.keySet(); + for(String k:keys) + { + if(usertags.containsKey(k)) + { + usertags.put(k, usertags.get(k)+tags.get(k)); + } + else + { + usertags.put(k, tags.get(k)); + } + } + usertagmap.put(username, usertags); + } + else + { + usertagmap.put(username, tags); + } + } + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + }catch(IOException ex) + { + ex.printStackTrace(); + }finally{ + try { + br.close(); + } catch (IOException ex) { + Logger.getLogger(ExtractUserTagNetwork.class.getName()).log(Level.SEVERE, null, ex); + } + } + return usertagmap; + } + + /** + * Extracts all the hashtags mentioned in a tweet and creates a map with the frequency of their occurrence. + * @param text + * @return A map containing the hashtags as keys and their frequency as value + */ + public HashMap<String,Integer> ExtractHashTags(String text) + { + Pattern p = Pattern.compile("#[a-zA-Z0-9]+"); + Matcher m = p.matcher(text); + HashMap<String,Integer> tags = new HashMap<String,Integer>(); + while(m.find()) + { + String tag = text.substring(m.start(),m.end()).toLowerCase(); + if(!tags.containsKey(tag)) + { + tags.put(tag,1); + } + else + { + tags.put(tag, tags.get(tag)+1); + } + } + return tags; + } + + public static void main(String[] args) + { + ExtractUserTagNetwork eutn = new ExtractUserTagNetwork(); + + String infilename = DEF_INFILENAME; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + } + HashMap<String, HashMap<String,Integer>> usertagmap = eutn.ExtractUserHashtagNetwork(infilename); + Set<String> keys = usertagmap.keySet(); + for(String key:keys) + { + System.out.println(key); + HashMap<String,Integer> tags = usertagmap.get(key); + Set<String> tagkeys = tags.keySet(); + for(String tag:tagkeys) + { + System.out.println(tag+","+tags.get(tag)); + } + } + } +} diff --git a/src/Chapter5/support/DateInfo.java b/src/Chapter5/support/DateInfo.java new file mode 100644 index 0000000..9a32d4c --- /dev/null +++ b/src/Chapter5/support/DateInfo.java @@ -0,0 +1,30 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.support; + +import java.util.Date; +import java.util.HashMap; + +public class DateInfo implements Comparable +{ + public Date d; + public HashMap<String,Integer> catcounts = new HashMap<String,Integer>(); + + public int compareTo(Object o) { + DateInfo temp = (DateInfo) o; + if(temp.d.after(this.d)) + { + return 1; + } + else + if(temp.d.before(this.d)) + { + return -1; + } + else + { + return 0; + } + } +} diff --git a/src/Chapter5/support/HashTagDS.java b/src/Chapter5/support/HashTagDS.java new file mode 100644 index 0000000..b338b6d --- /dev/null +++ b/src/Chapter5/support/HashTagDS.java @@ -0,0 +1,18 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter5.support; + +/** + * + * @author shamanth + */ +public class HashTagDS +{ + public String groupname; + public String[] tags; + public String color; + +} diff --git a/src/Chapter5/support/NetworkNode.java b/src/Chapter5/support/NetworkNode.java new file mode 100644 index 0000000..4f662e8 --- /dev/null +++ b/src/Chapter5/support/NetworkNode.java @@ -0,0 +1,49 @@ +package Chapter5.support; + + +import java.util.ArrayList; + +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +/** + * + * @author shamanth + */ +public class NetworkNode +{ + public int id; + public String username; + public int size; + public String catColor; + public int group; +// public int[] clusterID; + public int catID; +// public double lat; +// public double lng; + public ArrayList<String> data; + public int level; + public ArrayList<Integer> class_codes; + public ArrayList<ToNodeInfo> tonodes; + + public NetworkNode Copy() + { + NetworkNode tempnode = new NetworkNode(); + tempnode.catColor = this.catColor; + tempnode.id = this.id; + tempnode.username= this.username; + tempnode.size = this.size; + tempnode.group = this.group; +// tempnode.clusterID = this.clusterID; + tempnode.catID = this.catID; +// tempnode.lat = this.lat; +// tempnode.lng = this.lng; + tempnode.data = this.data; +// tempnode.level = this.level; + tempnode.class_codes = this.class_codes; + tempnode.tonodes = this.tonodes; + return tempnode; + } +} diff --git a/src/Chapter5/support/NodeIDComparator.java b/src/Chapter5/support/NodeIDComparator.java new file mode 100644 index 0000000..0b41ae7 --- /dev/null +++ b/src/Chapter5/support/NodeIDComparator.java @@ -0,0 +1,32 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter5.support; + +import java.util.Comparator; + +/** + * + * @author shamanth + */ +public class NodeIDComparator implements Comparator +{ + + public int compare(Object o1, Object o2) { + int id1 = ((NetworkNode) o1).id; + int id2 = ((NetworkNode) o2).id; + if(id1>id2) + { + return 1; + } + else + if(id1<id2) + return -1; + else + return 0; + } + + +} diff --git a/src/Chapter5/support/NodeSizeComparator.java b/src/Chapter5/support/NodeSizeComparator.java new file mode 100644 index 0000000..23ecb4e --- /dev/null +++ b/src/Chapter5/support/NodeSizeComparator.java @@ -0,0 +1,29 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter5.support; +import java.util.Comparator; + +/** + * + * @author shamanth + */ +public class NodeSizeComparator implements Comparator +{ + public int compare(Object o1, Object o2) + { + int size1 = ((NetworkNode) o1).size; + int size2 = ((NetworkNode) o2).size; + if(size1>size2) + { + return 1; + } + if(size1<size2) + return -1; + else + return 0; + } + +} diff --git a/src/Chapter5/support/ToNodeInfo.java b/src/Chapter5/support/ToNodeInfo.java new file mode 100644 index 0000000..725a10a --- /dev/null +++ b/src/Chapter5/support/ToNodeInfo.java @@ -0,0 +1,23 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter5.support; + +/** + * + * @author shamanth + */ +public class ToNodeInfo +{ + public int tonodeid; + public String text; + public String tousername; + public String date; + public int class_code; + public int catID; + public String catColor; + //this is the default direction invert option. If the library adds nodes to the adjacency then that should be set to true in the client side +// public boolean direction = false; +} diff --git a/src/Chapter5/support/Tweet.java b/src/Chapter5/support/Tweet.java new file mode 100644 index 0000000..be53166 --- /dev/null +++ b/src/Chapter5/support/Tweet.java @@ -0,0 +1,21 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package Chapter5.support; + +/** + * + * @author shamanth + */ +public class Tweet { + public String text; + public long id; + public double lat; + public double lng; + public String pubdate; + public String user; + public int catID; + public String catColor; +} diff --git a/src/Chapter5/text/EventSummaryExtractor.java b/src/Chapter5/text/EventSummaryExtractor.java new file mode 100644 index 0000000..e76f42e --- /dev/null +++ b/src/Chapter5/text/EventSummaryExtractor.java @@ -0,0 +1,269 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.text; + +import Chapter5.support.DateInfo; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class EventSummaryExtractor +{ + + final String DEF_INFILENAME = "ows.json"; + HashMap<String,ArrayList<String>> CATEGORIES = new HashMap<String,ArrayList<String>>(); + SimpleDateFormat twittersdm = new SimpleDateFormat("EEE MMM dd HH:mm:ss Z yyyy"); + SimpleDateFormat dayhoursdm = new SimpleDateFormat("yyyy-MM-dd:HH"); +// SimpleDateFormat daysdm = new SimpleDateFormat("MM/dd/yyyy"); + SimpleDateFormat hoursdm = new SimpleDateFormat("HH"); + + /** + * + */ + public void InitializeCategories() + { + ArrayList<String> people = new ArrayList<String>(); + people.add("protesters"); + people.add("people"); + CATEGORIES.put("People",people); + ArrayList<String> police = new ArrayList<String>(); + police.add("police"); + police.add("cops"); + police.add("nypd"); + police.add("raid"); + CATEGORIES.put("Police",police); + ArrayList<String> media = new ArrayList<String>(); + media.add("press"); + media.add("news"); + media.add("media"); + CATEGORIES.put("Media",media); + ArrayList<String> city = new ArrayList<String>(); + city.add("nyc"); + city.add("zucotti"); + city.add("park"); + CATEGORIES.put("Location",city); + ArrayList<String> judiciary = new ArrayList<String>(); + judiciary.add("judge"); + judiciary.add("eviction"); + judiciary.add("order"); + judiciary.add("court"); + CATEGORIES.put("Judiciary", judiciary); + } + + /** + * + * @param filename + * @return + */ + public JSONObject ExtractCategoryTrends(String filename) + { + JSONObject result = new JSONObject(); + try { + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF-8")); + String temp = ""; + Set<String> catkeys = CATEGORIES.keySet(); + HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); + while((temp = br.readLine())!=null) + { + Date d = new Date(); + try { + JSONObject jobj = new JSONObject(temp); + //Published time + if(!jobj.isNull("created_at")) + { + String time = ""; + try { + time = jobj.getString("created_at"); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + if(time.isEmpty()) + { + continue; + } + else + { + try { + d = twittersdm.parse(time); + } catch (ParseException ex) { + continue; + } + } + } + else + if(!jobj.isNull("timestamp")) + { + long time = new Date().getTime(); + try{ + time = jobj.getLong("timestamp"); + }catch(JSONException ex) + { + ex.printStackTrace(); + } + d = new Date(); + d.setTime(time); + } + String datestr = dayhoursdm.format(d); + String text = jobj.getString("text").toLowerCase(); +// System.out.println(text); + for(String key:catkeys) + { + ArrayList<String> words = CATEGORIES.get(key); + for(String word:words) + { + if(text.contains(word)) + { + HashMap<String,Integer> categorycount = new HashMap<String,Integer>(); + if(datecount.containsKey(datestr)) + { + categorycount = datecount.get(datestr); + } + if(categorycount.containsKey(key)) + { + categorycount.put(key, categorycount.get(key)+1); + } + else + { + categorycount.put(key, 1); + } + //update the categorycount for the specific date + datecount.put(datestr, categorycount); + break; + } + } + } + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + } + //sort the dates + Set<String> datekeys = datecount.keySet(); + ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); + for(String date:datekeys) + { + Date d = null; + try { + d = dayhoursdm.parse(date); + } catch (ParseException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + if(d!=null) + { + DateInfo info = new DateInfo(); + info.d = d; + info.catcounts = datecount.get(date); + dinfos.add(info); + } + } + Collections.sort(dinfos, Collections.reverseOrder()); + try { + result.put("axisxstep", dinfos.size()-1); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + result.put("axisystep", CATEGORIES.size()-1); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + JSONArray xcoordinates = new JSONArray(); + JSONArray ycoordinates = new JSONArray(); + //now add the data and the axis labels + JSONArray axisxlabels = new JSONArray(); + JSONArray axisylabels = new JSONArray(); + JSONArray data = new JSONArray(); + for(String key:catkeys) + { + axisylabels.put(key); + } + //counters to mark the indices of the values added to data field. i is the x coordinate and j is the y coordinate + int i=0,j=0; + + for(DateInfo date:dinfos) + { + String strdate = hoursdm.format(date.d); + axisxlabels.put(strdate); + HashMap<String,Integer> catcounts = date.catcounts; + for(String key:catkeys) + { + xcoordinates.put(j); + ycoordinates.put(i++); + if(catcounts.containsKey(key)) + { + data.put(catcounts.get(key)); + } + else + { + data.put(0); + } + } + //reset the x coordinate as we move to the next y item + i=0; + j++; + } + try { + result.put("xcoordinates", xcoordinates); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + result.put("ycoordinates", ycoordinates); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + result.put("axisxlabels", axisxlabels); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + result.put("axisylabels", axisylabels); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + try { + result.put("data", data); + } catch (JSONException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + br.close(); + } catch (IOException ex) { + Logger.getLogger(EventSummaryExtractor.class.getName()).log(Level.SEVERE, null, ex); + } + return result; + } + + public static void main(String[] args) + { + EventSummaryExtractor ese = new EventSummaryExtractor(); + String infilename = ese.DEF_INFILENAME; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + } + ese.InitializeCategories(); + System.out.println(ese.ExtractCategoryTrends(infilename).toString()); + } +} diff --git a/src/Chapter5/text/ExtractTopKeywords.java b/src/Chapter5/text/ExtractTopKeywords.java new file mode 100644 index 0000000..8ab412a --- /dev/null +++ b/src/Chapter5/text/ExtractTopKeywords.java @@ -0,0 +1,151 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.text; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import utils.Tags; +import utils.TextUtils; + +public class ExtractTopKeywords +{ + + static final String DEF_INFILENAME = "ows.json"; + static final int DEF_K = 60; + + /** + * Extracts the most frequently occurring keywords from the tweets by processing them sequentially. Stopwords are ignored. + * @param inFilename File containing a list of tweets as JSON objects + * @param K Count of the top keywords to return + * @param ignoreHashtags If true, hashtags are not considered while counting the most frequent keywords + * @param ignoreUsernames If true, usernames are not considered while counting the most frequent keywords + * @param tu TextUtils object which handles the stopwords + * @return a JSONArray containing an array of JSONObjects. Each object contains two elements "text" and "size" referring to the word and it's frequency + */ + public JSONArray GetTopKeywords(String inFilename, int K, boolean ignoreHashtags, boolean ignoreUsernames, TextUtils tu) + { + HashMap<String, Integer> words = new HashMap<String,Integer>(); + BufferedReader br = null; + try{ + br = new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + try{ + JSONObject tweetobj = new JSONObject(temp); + if(!tweetobj.isNull("text")) + { + String text = tweetobj.getString("text"); + //System.out.println(text); + text = text.toLowerCase().replaceAll("\\s+", " "); + /** Step 1: Tokenize tweets into individual words. and count their frequency in the corpus + * Remove stop words and special characters. Ignore user names and hashtags if the user chooses to. + */ + HashMap<String,Integer> tokens = tu.TokenizeText(text,ignoreHashtags,ignoreUsernames); + Set<String> keys = tokens.keySet(); + for(String key:keys) + { + if(words.containsKey(key)) + { + words.put(key, words.get(key)+tokens.get(key)); + } + else + { + words.put(key, tokens.get(key)); + } + } + } + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + }catch(IOException ex) + { + ex.printStackTrace(); + }finally{ + try { + br.close(); + } catch (IOException ex) { + Logger.getLogger(ExtractTopKeywords.class.getName()).log(Level.SEVERE, null, ex); + } + } + Set<String> keys = words.keySet(); + ArrayList<Tags> tags = new ArrayList<Tags>(); + for(String key:keys) + { + Tags tag = new Tags(); + tag.setKey(key); + tag.setValue(words.get(key)); + tags.add(tag); + } + // Step 2: Sort the words in descending order of frequency + Collections.sort(tags, Collections.reverseOrder()); + JSONArray cloudwords = new JSONArray(); + int numwords = K; + if(tags.size()<numwords) + { + numwords = tags.size(); + } + for(int i=0;i<numwords;i++) + { + JSONObject wordfreq = new JSONObject(); + Tags tag = tags.get(i); + try{ + wordfreq.put("text", tag.getKey()); + wordfreq.put("size",tag.getValue()); + cloudwords.put(wordfreq); + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + return cloudwords; + } + + public static void main(String[] args) + { + ExtractTopKeywords etk = new ExtractTopKeywords(); + + //Initialize the TextUtils class which handles all the processing of text. + TextUtils tu = new TextUtils(); + tu.LoadStopWords("C:/tweettracker/stopwords.txt"); + String infilename = DEF_INFILENAME; + int K = DEF_K; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + if(args.length>=2&&!args[1].isEmpty()) + { + try{ + K = Integer.parseInt(args[1]); + }catch(NumberFormatException ex) + { + ex.printStackTrace(); + } + } + } + System.out.println(etk.GetTopKeywords(infilename, K, false,true,tu)); + } + +} diff --git a/src/Chapter5/trends/ControlChartExample.java b/src/Chapter5/trends/ControlChartExample.java new file mode 100644 index 0000000..2df814f --- /dev/null +++ b/src/Chapter5/trends/ControlChartExample.java @@ -0,0 +1,144 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.trends; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class ControlChartExample +{ + static final String DEF_INFILENAME = "ows.json"; + static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); + + public JSONArray GenerateDataTrend(String inFilename) + { + BufferedReader br = null; + JSONArray result = new JSONArray(); + HashMap<String,Integer> datecount = new HashMap<String,Integer>(); + try{ + br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + try { + JSONObject jobj = new JSONObject(temp); + long timestamp = jobj.getLong("timestamp"); + Date d = new Date(timestamp); + String strdate = SDM.format(d); + if(datecount.containsKey(strdate)) + { + datecount.put(strdate, datecount.get(strdate)+1); + } + else + { + datecount.put(strdate, 1); + } + } catch (JSONException ex) { + Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); + Set<String> keys = datecount.keySet(); + for(String key:keys) + { + DateInfo dinfo = new DateInfo(); + try { + dinfo.d = SDM.parse(key); + } catch (ParseException ex) { + ex.printStackTrace(); + continue; + } + dinfo.count = datecount.get(key); + dinfos.add(dinfo); + } + double mean = this.GetMean(dinfos); + double stddev = this.GetStandardDev(dinfos, mean); + Collections.sort(dinfos); + //Normalize the trend by subtracting the mean and dividing by standard deviation to get a distribution with 0 mean and a standard deviation of 1 + for(DateInfo dinfo:dinfos) + { + try{ + JSONObject jobj = new JSONObject(); + jobj.put("date", SDM.format(dinfo.d)); + jobj.put("count", (dinfo.count-mean)/stddev); + jobj.put("mean", 0); + jobj.put("stdev+3", 3); + jobj.put("stdev-3", -3); + result.put(jobj); + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + }catch(IOException ex) + { + ex.printStackTrace(); + }finally{ + try { + br.close(); + } catch (IOException ex) { + Logger.getLogger(ControlChartExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + return result; + } + + public double GetStandardDev(ArrayList<DateInfo> dateinfos,double mean) + { + double intsum = 0; + int numperiods = dateinfos.size(); + for(DateInfo dinfo:dateinfos) + { + intsum+=Math.pow((dinfo.count - mean),2); + } +// System.out.println(Math.sqrt((double)intsum/timePeriodCounts.size())); + return Math.sqrt((double)intsum/numperiods); + } + + public double GetMean(ArrayList<DateInfo> dateinfos) + { + int numperiods = dateinfos.size(); + int sum = 0; + for(DateInfo dinfo:dateinfos) + { + sum +=dinfo.count; + } +// System.out.println((double)sum/numPeriods); + return ((double)sum/numperiods); + } + + public static void main(String[] args) + { + ControlChartExample cce = new ControlChartExample(); + String infilename = DEF_INFILENAME; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + } + System.out.println(cce.GenerateDataTrend(infilename)); + } + +} diff --git a/src/Chapter5/trends/DateInfo.java b/src/Chapter5/trends/DateInfo.java new file mode 100644 index 0000000..209f4a3 --- /dev/null +++ b/src/Chapter5/trends/DateInfo.java @@ -0,0 +1,29 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.trends; + +import java.util.Date; + +public class DateInfo implements Comparable +{ + public Date d; + public int count; + + public int compareTo(Object o) { + DateInfo temp = (DateInfo) o; + if(temp.d.after(this.d)) + { + return -1; + } + else + if(temp.d.before(this.d)) + { + return 1; + } + else + { + return 0; + } + } +} diff --git a/src/Chapter5/trends/ExtractDatasetTrend.java b/src/Chapter5/trends/ExtractDatasetTrend.java new file mode 100644 index 0000000..dad7f27 --- /dev/null +++ b/src/Chapter5/trends/ExtractDatasetTrend.java @@ -0,0 +1,120 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.trends; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class ExtractDatasetTrend +{ + static final String DEF_INFILENAME = "ows.json"; + // Date pattern used to count the volume of tweets + final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); + + public JSONArray GenerateDataTrend(String inFilename) + { + BufferedReader br = null; + JSONArray result = new JSONArray(); + HashMap<String,Integer> datecount = new HashMap<String,Integer>(); + try{ + br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + try { + JSONObject jobj = new JSONObject(temp); + long timestamp = jobj.getLong("timestamp"); + Date d = new Date(timestamp); + String strdate = SDM.format(d); + if(datecount.containsKey(strdate)) + { + datecount.put(strdate, datecount.get(strdate)+1); + } + else + { + datecount.put(strdate, 1); + } + } catch (JSONException ex) { + Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); + } + } + /** DateInfo consists of a date string and the corresponding count. + * It also implements a Comparator for sorting by date + */ + ArrayList<DateInfo> dinfos = new ArrayList<DateInfo>(); + Set<String> keys = datecount.keySet(); + for(String key:keys) + { + DateInfo dinfo = new DateInfo(); + try { + dinfo.d = SDM.parse(key); + } catch (ParseException ex) { + ex.printStackTrace(); + continue; + } + dinfo.count = datecount.get(key); + dinfos.add(dinfo); + } + Collections.sort(dinfos); + // Format and return the date string and the corresponding count + for(DateInfo dinfo:dinfos) + { + try{ + JSONObject jobj = new JSONObject(); + jobj.put("date", SDM.format(dinfo.d)); + jobj.put("count", dinfo.count); + result.put(jobj); + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + }catch(IOException ex) + { + ex.printStackTrace(); + }finally{ + try { + br.close(); + } catch (IOException ex) { + Logger.getLogger(ExtractDatasetTrend.class.getName()).log(Level.SEVERE, null, ex); + } + } + return result; + } + + public static void main(String[] args) + { + ExtractDatasetTrend edt = new ExtractDatasetTrend(); + + String infilename = DEF_INFILENAME; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + } + System.out.println(edt.GenerateDataTrend(infilename)); + } + +} diff --git a/src/Chapter5/trends/SparkLineExample.java b/src/Chapter5/trends/SparkLineExample.java new file mode 100644 index 0000000..4a0164b --- /dev/null +++ b/src/Chapter5/trends/SparkLineExample.java @@ -0,0 +1,163 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.trends; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class SparkLineExample +{ + static final String DEF_INFILENAME = "ows.json"; + static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH"); + + public JSONObject GenerateDataTrend(String inFilename, ArrayList<String> keywords) + { + BufferedReader br = null; + JSONObject result = new JSONObject(); + HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); + try{ + br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + try { + JSONObject jobj = new JSONObject(temp); + String text = jobj.getString("text").toLowerCase(); + long timestamp = jobj.getLong("timestamp"); + Date d = new Date(timestamp); + String strdate = SDM.format(d); + for(String word:keywords) + { + if(text.contains(word)) + { + HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); + if(datecount.containsKey(strdate)) + { + wordcount = datecount.get(strdate); + } + if(wordcount.containsKey(word)) + { + wordcount.put(word, wordcount.get(word)+1); + } + else + { + wordcount.put(word, 1); + } + //update the wordcount for the specific date + datecount.put(strdate, wordcount); + } + } + } catch (JSONException ex) { + Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + //sort the dates + ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>(); + Set<String> keys = datecount.keySet(); + for(String key:keys) + { + TCDateInfo dinfo = new TCDateInfo(); + try { + dinfo.d = SDM.parse(key); + } catch (ParseException ex) { + ex.printStackTrace(); + continue; + } + dinfo.wordcount = datecount.get(key); + dinfos.add(dinfo); + } + Collections.sort(dinfos); + JSONArray[] tseriesvals = new JSONArray[keywords.size()]; + for(int i=0;i<tseriesvals.length;i++) + { + tseriesvals[i] = new JSONArray(); + } + //prepare the output + for(TCDateInfo date:dinfos) + { + HashMap<String,Integer> wordcount = date.wordcount; + int counter=0; + for(String word:keywords) + { + if(wordcount.containsKey(word)) + { + tseriesvals[counter].put(wordcount.get(word)); + } + else + { + tseriesvals[counter].put(0); + } + counter++; + } + } + int counter=0; + for(String word:keywords) + { + try { + result.put(word, tseriesvals[counter]); + } catch (JSONException ex) { + Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); + } + counter++; + } + }catch(IOException ex) + { + ex.printStackTrace(); + }finally{ + try { + br.close(); + } catch (IOException ex) { + Logger.getLogger(SparkLineExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + return result; + } + + public static void main(String[] args) + { + SparkLineExample sle = new SparkLineExample(); + ArrayList<String> words = new ArrayList<String>(); + String infilename = DEF_INFILENAME; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + for(int i=1;i<args.length;i++) + { + if(args[i]!=null&&!args[i].isEmpty()) + { + words.add(args[i]); + } + } + } + if(words.isEmpty()) + { + words.add("#nypd"); + words.add("#ows"); + } + System.out.println(sle.GenerateDataTrend(infilename,words)); + } + +} diff --git a/src/Chapter5/trends/TCDateInfo.java b/src/Chapter5/trends/TCDateInfo.java new file mode 100644 index 0000000..88450e9 --- /dev/null +++ b/src/Chapter5/trends/TCDateInfo.java @@ -0,0 +1,31 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.trends; + +import java.util.Date; +import java.util.HashMap; + +public class TCDateInfo implements Comparable +{ + public Date d; + public HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); + + public int compareTo(Object o) { + TCDateInfo temp = (TCDateInfo) o; + if(temp.d.after(this.d)) + { + return -1; + } + else + if(temp.d.before(this.d)) + { + return 1; + } + else + { + return 0; + } + } + +} diff --git a/src/Chapter5/trends/TrendComparisonExample.java b/src/Chapter5/trends/TrendComparisonExample.java new file mode 100644 index 0000000..20991cd --- /dev/null +++ b/src/Chapter5/trends/TrendComparisonExample.java @@ -0,0 +1,155 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package Chapter5.trends; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class TrendComparisonExample +{ + static final String DEF_INFILENAME = "ows.json"; + static final SimpleDateFormat SDM = new SimpleDateFormat("dd MMM yyyy HH:mm"); + + public JSONArray GenerateDataTrend(String inFilename, ArrayList<String> keywords) + { + BufferedReader br = null; + JSONArray result = new JSONArray(); + HashMap<String,HashMap<String,Integer>> datecount = new HashMap<String,HashMap<String,Integer>>(); + try{ + br= new BufferedReader(new InputStreamReader(new FileInputStream(inFilename),"UTF-8")); + String temp = ""; + while((temp = br.readLine())!=null) + { + try { + JSONObject jobj = new JSONObject(temp); + String text = jobj.getString("text").toLowerCase(); + long timestamp = jobj.getLong("timestamp"); + Date d = new Date(timestamp); + String strdate = SDM.format(d); + for(String word:keywords) + { + if(text.contains(word)) + { + HashMap<String,Integer> wordcount = new HashMap<String,Integer>(); + if(datecount.containsKey(strdate)) + { + wordcount = datecount.get(strdate); + } + if(wordcount.containsKey(word)) + { + wordcount.put(word, wordcount.get(word)+1); + } + else + { + wordcount.put(word, 1); + } + //update the wordcount for the specific date + datecount.put(strdate, wordcount); + } + } + } catch (JSONException ex) { + Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + //sort the dates + ArrayList<TCDateInfo> dinfos = new ArrayList<TCDateInfo>(); + Set<String> keys = datecount.keySet(); + for(String key:keys) + { + TCDateInfo dinfo = new TCDateInfo(); + try { + dinfo.d = SDM.parse(key); + } catch (ParseException ex) { + ex.printStackTrace(); + continue; + } + dinfo.wordcount = datecount.get(key); + dinfos.add(dinfo); + } + Collections.sort(dinfos); + //prepare the output + for(TCDateInfo date:dinfos) + { + JSONObject item = new JSONObject(); + String strdate = SDM.format(date.d); + try{ + item.put("date",strdate); + HashMap<String,Integer> wordcount = date.wordcount; + for(String word:keywords) + { + if(wordcount.containsKey(word)) + { + item.put(word, wordcount.get(word)); + } + else + { + item.put(word, 0); + } + } + result.put(item); + }catch(JSONException ex) + { + ex.printStackTrace(); + } + } + }catch(IOException ex) + { + ex.printStackTrace(); + }finally{ + try { + br.close(); + } catch (IOException ex) { + Logger.getLogger(TrendComparisonExample.class.getName()).log(Level.SEVERE, null, ex); + } + } + return result; + } + + public static void main(String[] args) + { + TrendComparisonExample tce = new TrendComparisonExample(); + ArrayList<String> words = new ArrayList<String>(); + String infilename = DEF_INFILENAME; + if(args!=null) + { + if(args.length>=1&&!args[0].isEmpty()) + { + File fl = new File(args[0]); + if(fl.exists()) + { + infilename = args[0]; + } + } + for(int i=1;i<args.length;i++) + { + if(args[i]!=null&&!args[i].isEmpty()) + { + words.add(args[i]); + } + } + } + if(words.isEmpty()) + { + words.add("#nypd"); + words.add("#ows"); + } + System.out.println(tce.GenerateDataTrend(infilename,words)); + } + +} diff --git a/src/utils/OAuthUtils.java b/src/utils/OAuthUtils.java new file mode 100644 index 0000000..3cd5703 --- /dev/null +++ b/src/utils/OAuthUtils.java @@ -0,0 +1,21 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package utils; + +public class OAuthUtils +{ + //Please replace the Consumer key and secret to the one representing your application. + public static final String CONSUMER_SECRET = "PPCTObQGbGm1gkNvdJiTPKhoTksG787RTBwardkbM"; + public static final String CONSUMER_KEY = "L8CRRCUoRl3xcZ9bdrfUw"; + public static final String REQUEST_TOKEN_URL = "https://twitter.com/oauth/request_token"; + public static final String AUTHORIZE_URL = "https://twitter.com/oauth/authorize"; + public static final String ACCESS_TOKEN_URL = "https://twitter.com/oauth/access_token"; + + + //Use a JFIG file for all the configurations + public void ReadApplicationIdentity() + { + + } +} diff --git a/src/utils/Tags.java b/src/utils/Tags.java new file mode 100644 index 0000000..f1a5b56 --- /dev/null +++ b/src/utils/Tags.java @@ -0,0 +1,52 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ + +package utils; + +/** + * + * @author skumar34 + */ +public class Tags implements Comparable{ + public String key; + public double value; + + public Tags() + { + + } + + public Tags(String key, double value) { + this.key = key; + this.value = value; + } + public int compareTo(Object obj) + { + Tags tempObject=new Tags(); + tempObject=(Tags) obj; + if(this.value>tempObject.value) + return 1; + if(this.value<tempObject.value) + return -1; + else + return 0; + } + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public double getValue() { + return value; + } + + public void setValue(double value) { + this.value = value; + } +} diff --git a/src/utils/TextUtils.java b/src/utils/TextUtils.java new file mode 100644 index 0000000..764ce11 --- /dev/null +++ b/src/utils/TextUtils.java @@ -0,0 +1,212 @@ +/* TweetTracker. Copyright (c) Arizona Board of Regents on behalf of Arizona State University + * @author shamanth + */ +package utils; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class TextUtils +{ + //holds a list of stop words to be removed when generating word clouds etc. + HashSet<String> STOPWORDS = new HashSet<String>(); + + String SEPARATOR = " "; + + /** + * Loads the stop words from a file onto a collection. for use by all methods in this class + * @param filename + */ + public void LoadStopWords(String filename) + { + if(!filename.isEmpty()) + { + + BufferedReader bread = null; + try { + bread = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "UTF8")); + String temp = ""; + try { + while ((temp = bread.readLine()) != null) { + if (!temp.isEmpty()) { + String[] stwords = temp.split(","); + for (String t : stwords) { + t = t.toLowerCase(); + if (!STOPWORDS.contains(t)) { + STOPWORDS.add(t); + } + } + } + } + } catch (IOException ex) { + Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex); + } + } catch (UnsupportedEncodingException ex) { + Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex); + } catch (FileNotFoundException ex) { + Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex); + } finally { + try { + bread.close(); + } catch (IOException ex) { + Logger.getLogger(TextUtils.class.getName()).log(Level.SEVERE, null, ex); + } + } + } + } + + /** + * Converts a tweet/text into individual words/tokens. All stopwords are removed and the list also does not contain hyperlinks. + * Splitting is performed on space. + * @param text + * @param ignoreHashtags + * @param ignoreUsernames + * @return a list of words contained in text + */ + public HashMap<String,Integer> TokenizeText(String text, boolean ignoreHashtags, boolean ignoreUsernames) + { + String[] tokens = text.split(SEPARATOR); + HashMap<String,Integer> words = new HashMap<String,Integer>(); + for(String token:tokens) + { + token = token.replaceAll("\"|'|\\.||;|,", ""); + if(token.isEmpty()||token.length()<=2||STOPWORDS.contains(token)||token.startsWith("&")||token.startsWith("http")) + { + continue; + } + else + { + if(ignoreHashtags) + { + if(token.startsWith("#")) + { + continue; + } + } + if(ignoreUsernames) + { + if(token.startsWith("@")) + { + continue; + } + } + if(!words.containsKey(token)) + { + words.put(token,1); + } + else + { + words.put(token, words.get(token)+1); + } + } + } + return words; + } + + /** + * Checks whether the tweet is a retweet based on the presence of the RT pattern as the start of the text. Expects the tweet text to be in lowercase. + * @param text + * @return + */ + public static boolean IsTweetRT(String text) + { + Pattern p = Pattern.compile("^rt @[a-z_0-9]+"); + Matcher m = p.matcher(text); + if(m.find()) + { + return true; + } + return false; + } + + /** + * Checks whether the text contains a hyperlink in the text + * @param text + * @return + */ + public static boolean ContainsURL(String text) + { + Pattern urlpat = Pattern.compile("https?://[a-zA-Z0-9\\./]+"); + Matcher urlmat = urlpat.matcher(text); + if(urlmat.find()) + { + return true; + } + else + return false; + } + + /** + * extracts and returns a list of hashtags from the text + * @param text + * @return + */ + public static ArrayList<String> GetHashTags(String text) + { + Pattern p = Pattern.compile("#[a-zA-Z0-9]+"); + Matcher mat = p.matcher(text); + ArrayList<String> tags = new ArrayList<String>(); + while(mat.find()) + { + String tag = text.substring(mat.start(),mat.end()); + if(!tags.contains(tag.toLowerCase())) + { + tags.add(tag.toLowerCase()); + } + } + return tags; + } + + /** + * Removes LF and CR from the text as well as any quotes and backslashes + * @param text + * @return + */ + public static String GetCleanText(String text) + { + text = text.replaceAll("'|\"|"", ""); + text = text.replaceAll("\\\\", ""); + text = text.replaceAll("\r\n|\n|\r", " "); + text = text.trim(); + return text; + } + + /** + * Removes all patterns that correspond to Retweeted status leaving only original text + * @param tweet + * @return + */ + public static String RemoveRTElements(String tweet) + { + String text = tweet.replaceAll("rt @[a-z_A-Z0-9]+", " "); + text = text.replaceAll("RT @[a-z_A-Z0-9]+", " "); + text = text.replaceAll(":",""); + return text.trim(); + } + + /** + * Removes all hashtags, URLs, and usernames from the tweet text + * @param tweet + * @return + */ + public static String RemoveTwitterElements(String tweet) + { + String temptweet = tweet.replaceAll("#[a-zA-Z_0-9]+", ""); + temptweet = temptweet.replaceAll("https?://[a-zA-Z0-9\\./]+", ""); + temptweet = temptweet.replaceAll("@[a-zA-Z_0-9]+", ""); + temptweet = temptweet.replaceAll("[:?\\.;<>()]", ""); + return temptweet; + } + +} diff --git a/streaming/streaming.config b/streaming/streaming.config new file mode 100644 index 0000000..d6e27cb --- /dev/null +++ b/streaming/streaming.config @@ -0,0 +1,3 @@ +#morsi #egypt #tahrir #june30 #scaf
+-118.79,32.49,-115.23,34.67
+15127356 20627637
\ No newline at end of file |