6123 |
09 Feb 21 |
nicklas |
1 |
package net.sf.basedb.varsearch.analyze; |
6123 |
09 Feb 21 |
nicklas |
2 |
|
6123 |
09 Feb 21 |
nicklas |
3 |
import java.util.ArrayList; |
6123 |
09 Feb 21 |
nicklas |
4 |
import java.util.List; |
6123 |
09 Feb 21 |
nicklas |
5 |
import java.util.regex.Matcher; |
6123 |
09 Feb 21 |
nicklas |
6 |
import java.util.regex.Pattern; |
6123 |
09 Feb 21 |
nicklas |
7 |
|
6123 |
09 Feb 21 |
nicklas |
8 |
|
6123 |
09 Feb 21 |
nicklas |
9 |
/** |
6123 |
09 Feb 21 |
nicklas |
Translate Amino-acid notation from 3-letter to single-letter. |
6123 |
09 Feb 21 |
nicklas |
@author nicklas |
6123 |
09 Feb 21 |
nicklas |
12 |
*/ |
6123 |
09 Feb 21 |
nicklas |
13 |
public class AminoAcidTranslator |
6123 |
09 Feb 21 |
nicklas |
14 |
{ |
6123 |
09 Feb 21 |
nicklas |
15 |
|
6123 |
09 Feb 21 |
nicklas |
16 |
public static final AminoAcidTranslator INSTANCE = new AminoAcidTranslator(); |
6123 |
09 Feb 21 |
nicklas |
17 |
|
6123 |
09 Feb 21 |
nicklas |
18 |
private final List<AA> aa; |
6123 |
09 Feb 21 |
nicklas |
19 |
private final Pattern synonymous; |
6123 |
09 Feb 21 |
nicklas |
20 |
private AminoAcidTranslator() |
6123 |
09 Feb 21 |
nicklas |
21 |
{ |
6123 |
09 Feb 21 |
nicklas |
22 |
this.synonymous = Pattern.compile("(([A-Z])\\d+)\\2"); |
6123 |
09 Feb 21 |
nicklas |
23 |
this.aa = new ArrayList<>(16); |
6123 |
09 Feb 21 |
nicklas |
24 |
aa.add(new AA("A", "Ala")); // Alanine GCA, GCC, GCG, GCT |
6123 |
09 Feb 21 |
nicklas |
25 |
aa.add(new AA("B", "Asx")); // Asparagine or Aspartic acid AAC, AAT, GAC, GAT |
6123 |
09 Feb 21 |
nicklas |
26 |
aa.add(new AA("C", "Cys")); // Cysteine TGC, TGT |
6123 |
09 Feb 21 |
nicklas |
27 |
aa.add(new AA("D", "Asp")); // Aspartic acid GAC, GAT |
6123 |
09 Feb 21 |
nicklas |
28 |
aa.add(new AA("E", "Glu")); // Glutamic acid GAA, GAG |
6123 |
09 Feb 21 |
nicklas |
29 |
aa.add(new AA("F", "Phe")); // Phenylalanine TTC, TTT |
6123 |
09 Feb 21 |
nicklas |
30 |
aa.add(new AA("G", "Gly")); // Glycine GGA, GGC, GGG, GGT |
6123 |
09 Feb 21 |
nicklas |
31 |
aa.add(new AA("H", "His")); // Histidine CAC, CAT |
6123 |
09 Feb 21 |
nicklas |
32 |
aa.add(new AA("I", "Ile")); // Isoleucine ATA, ATC, ATT |
6123 |
09 Feb 21 |
nicklas |
33 |
aa.add(new AA("K", "Lys")); // Lysine AAA, AAG |
6123 |
09 Feb 21 |
nicklas |
34 |
aa.add(new AA("L", "Leu")); // Leucine CTA, CTC, CTG, CTT, TTA, TTG |
6123 |
09 Feb 21 |
nicklas |
35 |
aa.add(new AA("M", "Met")); // Methionine ATG |
6123 |
09 Feb 21 |
nicklas |
36 |
aa.add(new AA("N", "Asn")); // Asparagine AAC, AAT |
6123 |
09 Feb 21 |
nicklas |
37 |
aa.add(new AA("P", "Pro")); // Proline CCA, CCC, CCG, CCT |
6123 |
09 Feb 21 |
nicklas |
38 |
aa.add(new AA("Q", "Gln")); // Glutamine CAA, CAG |
6123 |
09 Feb 21 |
nicklas |
39 |
aa.add(new AA("R", "Arg")); // Arginine AGA, AGG, CGA, CGC, CGG, CGT |
6123 |
09 Feb 21 |
nicklas |
40 |
aa.add(new AA("S", "Ser")); // Serine AGC, AGT, TCA, TCC, TCG, TCT |
6123 |
09 Feb 21 |
nicklas |
41 |
aa.add(new AA("T", "Thr")); // Threonine ACA, ACC, ACG, ACT |
6123 |
09 Feb 21 |
nicklas |
42 |
aa.add(new AA("V", "Val")); // Valine GTA, GTC, GTG, GTT |
6123 |
09 Feb 21 |
nicklas |
43 |
aa.add(new AA("W", "Trp")); // Tryptophan TGG |
6123 |
09 Feb 21 |
nicklas |
44 |
aa.add(new AA("Y", "Tyr")); // Tyrosine TAC, TAT |
6123 |
09 Feb 21 |
nicklas |
45 |
aa.add(new AA("Z", "Glx")); // Glutamine or Glutamic acid CAA, CAG, GAA, GAG |
6123 |
09 Feb 21 |
nicklas |
46 |
aa.add(new AA("*", "Ter")); // Stop codon |
6123 |
09 Feb 21 |
nicklas |
47 |
|
6123 |
09 Feb 21 |
nicklas |
48 |
} |
6123 |
09 Feb 21 |
nicklas |
49 |
|
6123 |
09 Feb 21 |
nicklas |
50 |
/** |
6123 |
09 Feb 21 |
nicklas |
Translate the input that should contain 3-letter amino |
6123 |
09 Feb 21 |
nicklas |
acid codes to a value with 1-letter codes. Also, for |
6123 |
09 Feb 21 |
nicklas |
synonymous variants, the last code is replaced with =. |
6123 |
09 Feb 21 |
nicklas |
Example: |
6123 |
09 Feb 21 |
nicklas |
55 |
|
6123 |
09 Feb 21 |
nicklas |
p.His602Tyr --> p.H602Y |
6123 |
09 Feb 21 |
nicklas |
p.Glu262Glu --> p.E262= |
6123 |
09 Feb 21 |
nicklas |
58 |
*/ |
6123 |
09 Feb 21 |
nicklas |
59 |
public String translate(String in) |
6123 |
09 Feb 21 |
nicklas |
60 |
{ |
6123 |
09 Feb 21 |
nicklas |
61 |
if (in == null || in.length() == 0) return null; |
6123 |
09 Feb 21 |
nicklas |
62 |
String out = in; |
6123 |
09 Feb 21 |
nicklas |
63 |
for (AA a : aa) |
6123 |
09 Feb 21 |
nicklas |
64 |
{ |
6123 |
09 Feb 21 |
nicklas |
65 |
out = out.replace(a.three, a.one); |
6123 |
09 Feb 21 |
nicklas |
66 |
} |
6123 |
09 Feb 21 |
nicklas |
67 |
Matcher m = synonymous.matcher(out); |
6123 |
09 Feb 21 |
nicklas |
68 |
out = m.replaceAll("$1="); |
6123 |
09 Feb 21 |
nicklas |
69 |
return out; |
6123 |
09 Feb 21 |
nicklas |
70 |
} |
6123 |
09 Feb 21 |
nicklas |
71 |
|
6123 |
09 Feb 21 |
nicklas |
72 |
/** |
6123 |
09 Feb 21 |
nicklas |
Represents a single amino-acid. |
6123 |
09 Feb 21 |
nicklas |
74 |
*/ |
6123 |
09 Feb 21 |
nicklas |
75 |
static class AA |
6123 |
09 Feb 21 |
nicklas |
76 |
{ |
6123 |
09 Feb 21 |
nicklas |
77 |
final String one; |
6123 |
09 Feb 21 |
nicklas |
78 |
final String three; |
6123 |
09 Feb 21 |
nicklas |
79 |
|
6123 |
09 Feb 21 |
nicklas |
80 |
AA(String one, String three) |
6123 |
09 Feb 21 |
nicklas |
81 |
{ |
6123 |
09 Feb 21 |
nicklas |
82 |
this.one = one; |
6123 |
09 Feb 21 |
nicklas |
83 |
this.three = three; |
6123 |
09 Feb 21 |
nicklas |
84 |
} |
6123 |
09 Feb 21 |
nicklas |
85 |
} |
6123 |
09 Feb 21 |
nicklas |
86 |
|
6123 |
09 Feb 21 |
nicklas |
87 |
} |