blob: 1a7149e1fe9f33cb7d270112cb4bc136407cc8a6 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
package tweetlda;
import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.TokenSequence;
public class PorterStemmer extends Pipe {
private static final long serialVersionUID = 154100332101873830L;
public Instance pipe(Instance carrier){
TokenSequence ts = (TokenSequence) carrier.getData();
String word;
Stemmer s;
for(int i = 0; i < ts.size(); i++){
word = ts.get(i).getText();
//stem the word
s = new Stemmer();
for(char ch : word.toCharArray()){
if(Character.isLetter(ch)){
s.add(ch);
}
}
s.stem();
ts.get(i).setText(s.toString());
}
carrier.setData(ts);
return carrier;
}
}
|