6175 |
19 Mar 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.analyze; |
6175 |
19 Mar 21 |
nicklas |
2 |
|
6175 |
19 Mar 21 |
nicklas |
3 |
import java.util.regex.Pattern; |
6175 |
19 Mar 21 |
nicklas |
4 |
|
6175 |
19 Mar 21 |
nicklas |
5 |
import org.apache.lucene.analysis.Analyzer; |
6175 |
19 Mar 21 |
nicklas |
6 |
import org.apache.lucene.analysis.LowerCaseFilter; |
6175 |
19 Mar 21 |
nicklas |
7 |
import org.apache.lucene.analysis.Tokenizer; |
6175 |
19 Mar 21 |
nicklas |
8 |
import org.apache.lucene.analysis.pattern.PatternReplaceFilter; |
6175 |
19 Mar 21 |
nicklas |
9 |
|
6175 |
19 Mar 21 |
nicklas |
10 |
/** |
6175 |
19 Mar 21 |
nicklas |
Analyzer implementation that uses the EffectTokenizer and LowerCaseFilter. |
6175 |
19 Mar 21 |
nicklas |
Since a lot of keywords ends with '_variant' we also implement a filter |
6175 |
19 Mar 21 |
nicklas |
that removes that suffix. |
6175 |
19 Mar 21 |
nicklas |
14 |
|
6175 |
19 Mar 21 |
nicklas |
@author nicklas |
6175 |
19 Mar 21 |
nicklas |
16 |
*/ |
6175 |
19 Mar 21 |
nicklas |
17 |
public class EffectAnalyzer |
6175 |
19 Mar 21 |
nicklas |
18 |
extends Analyzer |
6175 |
19 Mar 21 |
nicklas |
19 |
{ |
6175 |
19 Mar 21 |
nicklas |
20 |
|
6175 |
19 Mar 21 |
nicklas |
21 |
public EffectAnalyzer() |
6175 |
19 Mar 21 |
nicklas |
22 |
{} |
6175 |
19 Mar 21 |
nicklas |
23 |
|
6175 |
19 Mar 21 |
nicklas |
24 |
@Override |
6175 |
19 Mar 21 |
nicklas |
25 |
protected TokenStreamComponents createComponents(String field) |
6175 |
19 Mar 21 |
nicklas |
26 |
{ |
6175 |
19 Mar 21 |
nicklas |
27 |
Tokenizer t = new EffectTokenizer(); |
6175 |
19 Mar 21 |
nicklas |
28 |
LowerCaseFilter lc = new LowerCaseFilter(t); |
6175 |
19 Mar 21 |
nicklas |
29 |
PatternReplaceFilter pr = new PatternReplaceFilter(lc, Pattern.compile("_variant$"), "", false); |
6175 |
19 Mar 21 |
nicklas |
30 |
return new Analyzer.TokenStreamComponents(t, pr); |
6175 |
19 Mar 21 |
nicklas |
31 |
} |
6175 |
19 Mar 21 |
nicklas |
32 |
|
6175 |
19 Mar 21 |
nicklas |
33 |
} |