summaryrefslogtreecommitdiff
path: root/src/Chapter4/tweetlda/PorterStemmer.java
diff options
context:
space:
mode:
authorMaurice Laveaux <m.laveaux@student.tue.nl>2014-05-22 16:33:24 +0200
committerMaurice Laveaux <m.laveaux@student.tue.nl>2014-05-22 16:33:24 +0200
commit4f32eedd2bd49837cc297acce399c108e8b558a7 (patch)
treee80ceb3fdf38db9552b52bd7f0c6b209c6c4bc28 /src/Chapter4/tweetlda/PorterStemmer.java
parent1da00321db0aa8c412c3ff1dff5734962ee49240 (diff)
downloadTwitterDataAnalytics-4f32eedd2bd49837cc297acce399c108e8b558a7.tar.gz
Removed unused source files
* Removed the shitty examples.
Diffstat (limited to 'src/Chapter4/tweetlda/PorterStemmer.java')
-rw-r--r--src/Chapter4/tweetlda/PorterStemmer.java33
1 files changed, 0 insertions, 33 deletions
diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java
deleted file mode 100644
index 1a7149e..0000000
--- a/src/Chapter4/tweetlda/PorterStemmer.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package tweetlda;
-
-import cc.mallet.pipe.Pipe;
-import cc.mallet.types.Instance;
-import cc.mallet.types.TokenSequence;
-
-public class PorterStemmer extends Pipe {
-
- private static final long serialVersionUID = 154100332101873830L;
-
- public Instance pipe(Instance carrier){
- TokenSequence ts = (TokenSequence) carrier.getData();
- String word;
- Stemmer s;
-
- for(int i = 0; i < ts.size(); i++){
- word = ts.get(i).getText();
- //stem the word
- s = new Stemmer();
- for(char ch : word.toCharArray()){
- if(Character.isLetter(ch)){
- s.add(ch);
- }
- }
- s.stem();
- ts.get(i).setText(s.toString());
- }
- carrier.setData(ts);
-
- return carrier;
- }
-
-}