6123 |
09 Feb 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.analyze; |
6123 |
09 Feb 21 |
nicklas |
2 |
|
6123 |
09 Feb 21 |
nicklas |
3 |
import org.apache.lucene.analysis.util.CharTokenizer; |
6123 |
09 Feb 21 |
nicklas |
4 |
|
6123 |
09 Feb 21 |
nicklas |
5 |
/** |
6123 |
09 Feb 21 |
nicklas |
Tokenizer implementation specialized for HGVS.p values. |
6123 |
09 Feb 21 |
nicklas |
It will remove the prefix 'p.' and keep the rest of the |
6123 |
09 Feb 21 |
nicklas |
value. The implementation is list-aware and will also |
6123 |
09 Feb 21 |
nicklas |
split on comma and space. Values that start with '*' |
6123 |
09 Feb 21 |
nicklas |
also require special attantion since '*' is not allowed |
6123 |
09 Feb 21 |
nicklas |
(unless escaped) as the first character in a query. |
6123 |
09 Feb 21 |
nicklas |
12 |
|
6123 |
09 Feb 21 |
nicklas |
@author nicklas |
6123 |
09 Feb 21 |
nicklas |
14 |
*/ |
6123 |
09 Feb 21 |
nicklas |
15 |
public class HgvsProtTokenizer |
6123 |
09 Feb 21 |
nicklas |
16 |
extends CharTokenizer |
6123 |
09 Feb 21 |
nicklas |
17 |
{ |
6123 |
09 Feb 21 |
nicklas |
18 |
|
6123 |
09 Feb 21 |
nicklas |
19 |
public HgvsProtTokenizer() |
6123 |
09 Feb 21 |
nicklas |
20 |
{} |
6123 |
09 Feb 21 |
nicklas |
21 |
|
6123 |
09 Feb 21 |
nicklas |
22 |
@Override |
6123 |
09 Feb 21 |
nicklas |
23 |
protected boolean isTokenChar(int c) |
6123 |
09 Feb 21 |
nicklas |
24 |
{ |
6123 |
09 Feb 21 |
nicklas |
25 |
return c != 'p' && c != '.' && c != ',' && c != ' ' && c != '*'; |
6123 |
09 Feb 21 |
nicklas |
26 |
} |
6123 |
09 Feb 21 |
nicklas |
27 |
|
6123 |
09 Feb 21 |
nicklas |
28 |
} |