summaryrefslogtreecommitdiff
path: root/src/Chapter4/tweetlda/PorterStemmer.java
diff options
context:
space:
mode:
Diffstat (limited to 'src/Chapter4/tweetlda/PorterStemmer.java')
-rw-r--r--src/Chapter4/tweetlda/PorterStemmer.java33
1 files changed, 0 insertions, 33 deletions
diff --git a/src/Chapter4/tweetlda/PorterStemmer.java b/src/Chapter4/tweetlda/PorterStemmer.java
deleted file mode 100644
index 1a7149e..0000000
--- a/src/Chapter4/tweetlda/PorterStemmer.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package tweetlda;
-
-import cc.mallet.pipe.Pipe;
-import cc.mallet.types.Instance;
-import cc.mallet.types.TokenSequence;
-
-public class PorterStemmer extends Pipe {
-
- private static final long serialVersionUID = 154100332101873830L;
-
- public Instance pipe(Instance carrier){
- TokenSequence ts = (TokenSequence) carrier.getData();
- String word;
- Stemmer s;
-
- for(int i = 0; i < ts.size(); i++){
- word = ts.get(i).getText();
- //stem the word
- s = new Stemmer();
- for(char ch : word.toCharArray()){
- if(Character.isLetter(ch)){
- s.add(ch);
- }
- }
- s.stem();
- ts.get(i).setText(s.toString());
- }
- carrier.setData(ts);
-
- return carrier;
- }
-
-}