6133 |
16 Feb 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.query; |
6133 |
16 Feb 21 |
nicklas |
2 |
|
6133 |
16 Feb 21 |
nicklas |
3 |
import java.util.regex.Matcher; |
6133 |
16 Feb 21 |
nicklas |
4 |
import java.util.regex.Pattern; |
6133 |
16 Feb 21 |
nicklas |
5 |
|
6133 |
16 Feb 21 |
nicklas |
6 |
/** |
6133 |
16 Feb 21 |
nicklas |
Some query fields can automatically be prefixed with the correct |
6133 |
16 Feb 21 |
nicklas |
field name. This should make it easier to type queries without having |
6133 |
16 Feb 21 |
nicklas |
to know or type field names. Examples: |
6133 |
16 Feb 21 |
nicklas |
10 |
|
6133 |
16 Feb 21 |
nicklas |
rsNNNN -> rsid:rsNNNN (where NNNN is numeric) |
6133 |
16 Feb 21 |
nicklas |
COSVnnnn -> cosmic:COSVnnnn (where nnnn i numeric) |
6133 |
16 Feb 21 |
nicklas |
p.XXXX -> p:XXXX (where XXXX is non-whitespace) |
6133 |
16 Feb 21 |
nicklas |
ANNNB -> p:ANNNB (where A and B is [A-Z*=] and NNN is numeric) |
6133 |
16 Feb 21 |
nicklas |
c.XXXX -> c:XXXX (where XXXX is non-whitespace) |
6133 |
16 Feb 21 |
nicklas |
NNNNA>B -> c:NNNNA>B (where NNNN is numeric or -+ and A,B is [ACGT] |
7304 |
28 Aug 23 |
nicklas |
chrN -> chrom:chrN (where N is number or X or Y or M) |
6133 |
16 Feb 21 |
nicklas |
NNNN -> pos:NNNN (where NNNN is numeric) |
6143 |
22 Feb 21 |
nicklas |
SNV|Insertion|Deletion|Complex -> type:SNV|Insertion|Deletion|Complex |
6171 |
18 Mar 21 |
nicklas |
N/N -> gt:N/N (where N is 0 or 1) |
6171 |
18 Mar 21 |
nicklas |
21 |
|
6171 |
18 Mar 21 |
nicklas |
Finally, it will also escape all '/' with '\'. |
6133 |
16 Feb 21 |
nicklas |
23 |
*/ |
6133 |
16 Feb 21 |
nicklas |
24 |
public final class AutoPrefixer |
6133 |
16 Feb 21 |
nicklas |
25 |
{ |
6133 |
16 Feb 21 |
nicklas |
26 |
public static final AutoPrefixer INSTANCE = new AutoPrefixer(); |
6133 |
16 Feb 21 |
nicklas |
27 |
|
6133 |
16 Feb 21 |
nicklas |
28 |
private final Pattern rsid; |
6133 |
16 Feb 21 |
nicklas |
29 |
private final Pattern cosmic; |
6133 |
16 Feb 21 |
nicklas |
30 |
private final Pattern hgvsp; |
6133 |
16 Feb 21 |
nicklas |
31 |
private final Pattern hgvsp2; |
6133 |
16 Feb 21 |
nicklas |
32 |
private final Pattern hgvsc; |
6133 |
16 Feb 21 |
nicklas |
33 |
private final Pattern hgvsc2; |
6133 |
16 Feb 21 |
nicklas |
34 |
private final Pattern chr; |
6133 |
16 Feb 21 |
nicklas |
35 |
private final Pattern pos; |
6143 |
22 Feb 21 |
nicklas |
36 |
private final Pattern type; |
6171 |
18 Mar 21 |
nicklas |
37 |
private final Pattern gt; |
6171 |
18 Mar 21 |
nicklas |
38 |
private final Pattern escape; |
6133 |
16 Feb 21 |
nicklas |
39 |
|
6133 |
16 Feb 21 |
nicklas |
40 |
private AutoPrefixer() |
6133 |
16 Feb 21 |
nicklas |
41 |
{ |
6133 |
16 Feb 21 |
nicklas |
42 |
rsid = Pattern.compile("(?<!:)\\b(rs\\d+)"); |
6133 |
16 Feb 21 |
nicklas |
43 |
cosmic = Pattern.compile("(?<!:)\\b(COSV\\d+)"); |
6133 |
16 Feb 21 |
nicklas |
44 |
hgvsp = Pattern.compile("(?<!:)\\bp\\.(\\S+)"); |
6147 |
23 Feb 21 |
nicklas |
45 |
hgvsp2 = Pattern.compile("(?<=\\s|\\(|^)([A-Z*=]\\d+[A-Z*=])(?=\\s|\\)|$)"); |
6133 |
16 Feb 21 |
nicklas |
46 |
hgvsc = Pattern.compile("(?<!:)\\bc\\.(\\S+)"); |
6147 |
23 Feb 21 |
nicklas |
47 |
hgvsc2 = Pattern.compile("(?<=\\s|\\(|^)(\\*?[0-9+-]+[ACGT]+\\>[ACGT]+)(?=\\s|\\)|$)"); |
7304 |
28 Aug 23 |
nicklas |
48 |
chr = Pattern.compile("(?<!:)\\b(chr[0-9XYM]{1,2})(?=\\s|$)"); |
6147 |
23 Feb 21 |
nicklas |
49 |
pos = Pattern.compile("(?<=\\s|\\(|^)(\\d+)(?=\\s|\\)|$)"); |
6147 |
23 Feb 21 |
nicklas |
50 |
type = Pattern.compile("(?<!:)\\b(SNV|Insertion|Deletion|Complex)(?=\\s|\\)|$)"); |
6171 |
18 Mar 21 |
nicklas |
51 |
gt = Pattern.compile("(?<!:)\\b([0-1]\\/[0-1])"); |
6171 |
18 Mar 21 |
nicklas |
52 |
escape = Pattern.compile("(?<!\\\\)(\\/)"); |
6133 |
16 Feb 21 |
nicklas |
53 |
} |
6133 |
16 Feb 21 |
nicklas |
54 |
|
6522 |
20 Dec 21 |
nicklas |
55 |
public String autoPrefix(String query) |
6133 |
16 Feb 21 |
nicklas |
56 |
{ |
6133 |
16 Feb 21 |
nicklas |
57 |
Matcher m = rsid.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
58 |
query = m.replaceAll("rsid:$1"); |
6133 |
16 Feb 21 |
nicklas |
59 |
|
6133 |
16 Feb 21 |
nicklas |
60 |
m = cosmic.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
61 |
query = m.replaceAll("cosmic:$1"); |
6133 |
16 Feb 21 |
nicklas |
62 |
|
6133 |
16 Feb 21 |
nicklas |
63 |
m = hgvsp.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
64 |
query = m.replaceAll("p:$1"); |
6133 |
16 Feb 21 |
nicklas |
65 |
|
6133 |
16 Feb 21 |
nicklas |
66 |
m = hgvsp2.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
67 |
query = m.replaceAll("p:$1"); |
6133 |
16 Feb 21 |
nicklas |
68 |
|
6133 |
16 Feb 21 |
nicklas |
69 |
m = hgvsc.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
70 |
query = m.replaceAll("c:$1"); |
6133 |
16 Feb 21 |
nicklas |
71 |
|
6133 |
16 Feb 21 |
nicklas |
72 |
m = hgvsc2.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
73 |
query = m.replaceAll("c:$1"); |
6133 |
16 Feb 21 |
nicklas |
74 |
|
6133 |
16 Feb 21 |
nicklas |
75 |
m = chr.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
76 |
query = m.replaceAll("chrom:$1"); |
6133 |
16 Feb 21 |
nicklas |
77 |
|
6133 |
16 Feb 21 |
nicklas |
78 |
m = pos.matcher(query); |
6133 |
16 Feb 21 |
nicklas |
79 |
query = m.replaceAll("pos:$1"); |
6133 |
16 Feb 21 |
nicklas |
80 |
|
6143 |
22 Feb 21 |
nicklas |
81 |
m = type.matcher(query); |
6143 |
22 Feb 21 |
nicklas |
82 |
query = m.replaceAll("type:$1"); |
6143 |
22 Feb 21 |
nicklas |
83 |
|
6171 |
18 Mar 21 |
nicklas |
84 |
m = gt.matcher(query); |
6171 |
18 Mar 21 |
nicklas |
85 |
query = m.replaceAll("gt:$1"); |
6171 |
18 Mar 21 |
nicklas |
86 |
|
6171 |
18 Mar 21 |
nicklas |
87 |
m = escape.matcher(query); |
6171 |
18 Mar 21 |
nicklas |
88 |
query = m.replaceAll("\\\\$1"); |
6171 |
18 Mar 21 |
nicklas |
89 |
|
6133 |
16 Feb 21 |
nicklas |
90 |
return query; |
6133 |
16 Feb 21 |
nicklas |
91 |
} |
6133 |
16 Feb 21 |
nicklas |
92 |
|
6133 |
16 Feb 21 |
nicklas |
93 |
} |