diff options
author | Maurice Laveaux <m.laveaux@student.tue.nl> | 2014-05-22 16:33:24 +0200 |
---|---|---|
committer | Maurice Laveaux <m.laveaux@student.tue.nl> | 2014-05-22 16:33:24 +0200 |
commit | 4f32eedd2bd49837cc297acce399c108e8b558a7 (patch) | |
tree | e80ceb3fdf38db9552b52bd7f0c6b209c6c4bc28 /src/Chapter4/tweetlda/PorterStemmer.java | |
parent | 1da00321db0aa8c412c3ff1dff5734962ee49240 (diff) | |
download | TwitterDataAnalytics-4f32eedd2bd49837cc297acce399c108e8b558a7.tar.gz |
Removed unused source files
* Removed the shitty examples.
Diffstat (limited to 'src/Chapter4/tweetlda/PorterStemmer.java')
-rw-r--r-- | src/Chapter4/tweetlda/PorterStemmer.java | 33 |
1 files changed, 0 insertions, 33 deletions
diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java deleted file mode 100644 index 1a7149e..0000000 --- a/src/Chapter4/tweetlda/PorterStemmer.java +++ /dev/null @@ -1,33 +0,0 @@ -package tweetlda; - -import cc.mallet.pipe.Pipe; -import cc.mallet.types.Instance; -import cc.mallet.types.TokenSequence; - -public class PorterStemmer extends Pipe { - - private static final long serialVersionUID = 154100332101873830L; - - public Instance pipe(Instance carrier){ - TokenSequence ts = (TokenSequence) carrier.getData(); - String word; - Stemmer s; - - for(int i = 0; i < ts.size(); i++){ - word = ts.get(i).getText(); - //stem the word - s = new Stemmer(); - for(char ch : word.toCharArray()){ - if(Character.isLetter(ch)){ - s.add(ch); - } - } - s.stem(); - ts.get(i).setText(s.toString()); - } - carrier.setData(ts); - - return carrier; - } - -} |