1,11 → 1,12 |
|
package jomm.utils; |
|
import org.apache.lucene.analysis.Token; |
import org.apache.lucene.analysis.TokenFilter; |
import org.apache.lucene.analysis.TokenStream; |
import org.apache.lucene.analysis.Token; |
|
import java.io.*; |
import java.io.IOException; |
import java.text.Normalizer; |
|
|
/** |
30,6 → 31,12 |
return new Token(clean(value),t.startOffset(),t.endOffset()); |
} |
|
public static String stripAccents(String s) |
{ |
s = Normalizer.normalize(s, Normalizer.Form.NFD); |
s = s.replaceAll("[\\p{InCombiningDiacriticalMarks}]", ""); |
return s; |
} |
public static String clean(String value) |
{ |
StringBuilder finalTerm = new StringBuilder(); |