Rev 1310 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 1310 | Rev 1350 | ||
---|---|---|---|
Line 1... | Line 1... | ||
1 | 1 | ||
2 | package jomm.utils; |
2 | package jomm.utils; |
3 | 3 | ||
- | 4 | import org.apache.lucene.analysis.Token; |
|
4 | import org.apache.lucene.analysis.TokenFilter; |
5 | import org.apache.lucene.analysis.TokenFilter; |
5 | import org.apache.lucene.analysis.TokenStream; |
6 | import org.apache.lucene.analysis.TokenStream; |
6 | import org.apache.lucene.analysis.Token; |
- | |
7 | 7 | ||
8 | import java.io.*; |
8 | import java.io.IOException; |
- | 9 | import java.text.Normalizer; |
|
9 | 10 | ||
10 | 11 | ||
11 | /** |
12 | /** |
12 | * Normalizes token text to lower case. |
13 | * Normalizes token text to lower case. |
13 | * |
14 | * |
Line 28... | Line 29... | ||
28 | return null; |
29 | return null; |
29 | 1.5.0/docs/api/java/lang/String.html">String value = t.termText(); |
30 | 1.5.0/docs/api/java/lang/String.html">String value = t.termText(); |
30 | return new Token(clean(value),t.startOffset(),t.endOffset()); |
31 | return new Token(clean(value),t.startOffset(),t.endOffset()); |
31 | } |
32 | } |
32 | 33 | ||
- | 34 | public static 1.5.0/docs/api/java/lang/String.html">String stripAccents(1.5.0/docs/api/java/lang/String.html">String s) |
|
- | 35 | { |
|
- | 36 | s = Normalizer.normalize(s, Normalizer.Form.NFD); |
|
- | 37 | s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}]", ""); |
|
- | 38 | return s; |
|
- | 39 | } |
|
33 | public static 1.5.0/docs/api/java/lang/String.html">String clean(1.5.0/docs/api/java/lang/String.html">String value) |
40 | public static 1.5.0/docs/api/java/lang/String.html">String clean(1.5.0/docs/api/java/lang/String.html">String value) |
34 | { |
41 | { |
35 | 1.5.0/docs/api/java/lang/StringBuilder.html">StringBuilder finalTerm = new 1.5.0/docs/api/java/lang/StringBuilder.html">StringBuilder(); |
42 | 1.5.0/docs/api/java/lang/StringBuilder.html">StringBuilder finalTerm = new 1.5.0/docs/api/java/lang/StringBuilder.html">StringBuilder(); |
36 | for (int j=0 ; j < value.length() ; j++) |
43 | for (int j=0 ; j < value.length() ; j++) |
37 | { |
44 | { |