4520 |
10 Sep 13 |
fredrik |
1 |
/* |
4520 |
10 Sep 13 |
fredrik |
MSNumpress.java |
4520 |
10 Sep 13 |
fredrik |
johan.teleman@immun.lth.se |
4520 |
10 Sep 13 |
fredrik |
4 |
|
4520 |
10 Sep 13 |
fredrik |
Copyright 2013 Johan Teleman |
4520 |
10 Sep 13 |
fredrik |
6 |
|
4520 |
10 Sep 13 |
fredrik |
Licensed under the Apache License, Version 2.0 (the "License"); |
4520 |
10 Sep 13 |
fredrik |
you may not use this file except in compliance with the License. |
4520 |
10 Sep 13 |
fredrik |
You may obtain a copy of the License at |
4520 |
10 Sep 13 |
fredrik |
10 |
|
4520 |
10 Sep 13 |
fredrik |
http://www.apache.org/licenses/LICENSE-2.0 |
4520 |
10 Sep 13 |
fredrik |
12 |
|
4520 |
10 Sep 13 |
fredrik |
Unless required by applicable law or agreed to in writing, software |
4520 |
10 Sep 13 |
fredrik |
distributed under the License is distributed on an "AS IS" BASIS, |
4520 |
10 Sep 13 |
fredrik |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
4520 |
10 Sep 13 |
fredrik |
See the License for the specific language governing permissions and |
4520 |
10 Sep 13 |
fredrik |
limitations under the License. |
4520 |
10 Sep 13 |
fredrik |
18 |
*/ |
4520 |
10 Sep 13 |
fredrik |
19 |
package ms.numpress; |
4520 |
10 Sep 13 |
fredrik |
20 |
|
4520 |
10 Sep 13 |
fredrik |
21 |
public class MSNumpress { |
4520 |
10 Sep 13 |
fredrik |
22 |
|
4520 |
10 Sep 13 |
fredrik |
///PSI-MS obo accession numbers. |
4520 |
10 Sep 13 |
fredrik |
24 |
public static final String ACC_NUMPRESS_LINEAR = "MS:1002312"; |
4520 |
10 Sep 13 |
fredrik |
25 |
public static final String ACC_NUMPRESS_PIC = "MS:1002313"; |
4520 |
10 Sep 13 |
fredrik |
26 |
public static final String ACC_NUMPRESS_SLOF = "MS:1002314"; |
4520 |
10 Sep 13 |
fredrik |
27 |
|
4520 |
10 Sep 13 |
fredrik |
28 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Convenience function for decoding binary data encoded by MSNumpress. If |
4520 |
10 Sep 13 |
fredrik |
* the passed cvAccession is one of |
4520 |
10 Sep 13 |
fredrik |
31 |
* |
4520 |
10 Sep 13 |
fredrik |
* ACC_NUMPRESS_LINEAR = "MS:1002312" |
4520 |
10 Sep 13 |
fredrik |
* ACC_NUMPRESS_PIC = "MS:1002313" |
4520 |
10 Sep 13 |
fredrik |
* ACC_NUMPRESS_SLOF = "MS:1002314" |
4520 |
10 Sep 13 |
fredrik |
35 |
* |
4520 |
10 Sep 13 |
fredrik |
* the corresponding decode function will be called. |
4520 |
10 Sep 13 |
fredrik |
37 |
* |
4520 |
10 Sep 13 |
fredrik |
* @cvAccession The PSI-MS obo CV accession of the encoded data. |
4520 |
10 Sep 13 |
fredrik |
* @data array of double to be encoded |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of doubles from data to encode |
4520 |
10 Sep 13 |
fredrik |
* @return The decoded doubles |
4520 |
10 Sep 13 |
fredrik |
42 |
*/ |
4520 |
10 Sep 13 |
fredrik |
43 |
public static double[] decode( |
4520 |
10 Sep 13 |
fredrik |
44 |
String cvAccession, |
4520 |
10 Sep 13 |
fredrik |
45 |
byte[] data, |
4520 |
10 Sep 13 |
fredrik |
46 |
int dataSize |
4520 |
10 Sep 13 |
fredrik |
47 |
) { |
4520 |
10 Sep 13 |
fredrik |
48 |
|
4520 |
10 Sep 13 |
fredrik |
49 |
if (cvAccession.equals(ACC_NUMPRESS_LINEAR)) { |
4520 |
10 Sep 13 |
fredrik |
50 |
double[] buffer = new double[dataSize * 2]; |
4520 |
10 Sep 13 |
fredrik |
51 |
int nbrOfDoubles = MSNumpress.decodeLinear(data, dataSize, buffer); |
4520 |
10 Sep 13 |
fredrik |
52 |
double[] result = new double[nbrOfDoubles]; |
4520 |
10 Sep 13 |
fredrik |
53 |
System.arraycopy(buffer, 0, result, 0, nbrOfDoubles); |
4520 |
10 Sep 13 |
fredrik |
54 |
return result; |
4520 |
10 Sep 13 |
fredrik |
55 |
|
4520 |
10 Sep 13 |
fredrik |
56 |
} else if (cvAccession.equals(ACC_NUMPRESS_SLOF)) { |
4520 |
10 Sep 13 |
fredrik |
57 |
double[] result = new double[dataSize / 2]; |
4520 |
10 Sep 13 |
fredrik |
58 |
MSNumpress.decodeSlof(data, dataSize, result); |
4520 |
10 Sep 13 |
fredrik |
59 |
return result; |
4520 |
10 Sep 13 |
fredrik |
60 |
|
4520 |
10 Sep 13 |
fredrik |
61 |
} else if (cvAccession.equals(ACC_NUMPRESS_PIC)) { |
4520 |
10 Sep 13 |
fredrik |
62 |
double[] buffer = new double[dataSize * 2]; |
4520 |
10 Sep 13 |
fredrik |
63 |
int nbrOfDoubles = MSNumpress.decodePic(data, dataSize, buffer); |
4520 |
10 Sep 13 |
fredrik |
64 |
double[] result = new double[nbrOfDoubles]; |
4520 |
10 Sep 13 |
fredrik |
65 |
System.arraycopy(buffer, 0, result, 0, nbrOfDoubles); |
4520 |
10 Sep 13 |
fredrik |
66 |
return result; |
4520 |
10 Sep 13 |
fredrik |
67 |
|
4520 |
10 Sep 13 |
fredrik |
68 |
} |
4520 |
10 Sep 13 |
fredrik |
69 |
|
4520 |
10 Sep 13 |
fredrik |
70 |
throw new IllegalArgumentException("'"+cvAccession+"' is not a numpress compression term"); |
4520 |
10 Sep 13 |
fredrik |
71 |
} |
4520 |
10 Sep 13 |
fredrik |
72 |
|
4520 |
10 Sep 13 |
fredrik |
73 |
/** |
4520 |
10 Sep 13 |
fredrik |
* This encoding works on a 4 byte integer, by truncating initial zeros or ones. |
4520 |
10 Sep 13 |
fredrik |
* If the initial (most significant) half byte is 0x0 or 0xf, the number of such |
4520 |
10 Sep 13 |
fredrik |
* halfbytes starting from the most significant is stored in a halfbyte. This initial |
4520 |
10 Sep 13 |
fredrik |
* count is then followed by the rest of the ints halfbytes, in little-endian order. |
4520 |
10 Sep 13 |
fredrik |
* A count halfbyte c of |
4520 |
10 Sep 13 |
fredrik |
79 |
* |
4520 |
10 Sep 13 |
fredrik |
* 0 <= c <= 8 is interpreted as an initial c 0x0 halfbytes |
4520 |
10 Sep 13 |
fredrik |
* 9 <= c <= 15 is interpreted as an initial (c-8) 0xf halfbytes |
4520 |
10 Sep 13 |
fredrik |
82 |
* |
4520 |
10 Sep 13 |
fredrik |
* Ex: |
4520 |
10 Sep 13 |
fredrik |
* int c rest |
4520 |
10 Sep 13 |
fredrik |
* 0 => 0x8 |
4520 |
10 Sep 13 |
fredrik |
* -1 => 0xf 0xf |
4520 |
10 Sep 13 |
fredrik |
* 23 => 0x6 0x7 0x1 |
4520 |
10 Sep 13 |
fredrik |
88 |
* |
4520 |
10 Sep 13 |
fredrik |
* @x the int to be encoded |
4520 |
10 Sep 13 |
fredrik |
* @res the byte array were halfbytes are stored |
4520 |
10 Sep 13 |
fredrik |
* @resOffset position in res were halfbytes are written |
4520 |
10 Sep 13 |
fredrik |
* @return the number of resulting halfbytes |
4520 |
10 Sep 13 |
fredrik |
93 |
*/ |
4520 |
10 Sep 13 |
fredrik |
94 |
protected static int encodeInt( |
4520 |
10 Sep 13 |
fredrik |
95 |
long x, |
4520 |
10 Sep 13 |
fredrik |
96 |
byte[] res, |
4520 |
10 Sep 13 |
fredrik |
97 |
int resOffset |
4520 |
10 Sep 13 |
fredrik |
98 |
) { |
4520 |
10 Sep 13 |
fredrik |
99 |
byte i, l; |
4520 |
10 Sep 13 |
fredrik |
100 |
long m; |
4520 |
10 Sep 13 |
fredrik |
101 |
long mask = 0xf0000000; |
4520 |
10 Sep 13 |
fredrik |
102 |
long init = x & mask; |
4520 |
10 Sep 13 |
fredrik |
103 |
|
4520 |
10 Sep 13 |
fredrik |
104 |
if (init == 0) { |
4520 |
10 Sep 13 |
fredrik |
105 |
l = 8; |
4520 |
10 Sep 13 |
fredrik |
106 |
for (i=0; i<8; i++) { |
4520 |
10 Sep 13 |
fredrik |
107 |
m = mask >> (4*i); |
4520 |
10 Sep 13 |
fredrik |
108 |
if ((x & m) != 0) { |
4520 |
10 Sep 13 |
fredrik |
109 |
l = i; |
4520 |
10 Sep 13 |
fredrik |
110 |
break; |
4520 |
10 Sep 13 |
fredrik |
111 |
} |
4520 |
10 Sep 13 |
fredrik |
112 |
} |
4520 |
10 Sep 13 |
fredrik |
113 |
res[resOffset] = l; |
4520 |
10 Sep 13 |
fredrik |
114 |
for (i=l; i<8; i++) |
4520 |
10 Sep 13 |
fredrik |
115 |
res[resOffset+1+i-l] = (byte)(0xf & (x >> (4*(i-l)))); |
4520 |
10 Sep 13 |
fredrik |
116 |
|
4520 |
10 Sep 13 |
fredrik |
117 |
return 1+8-l; |
4520 |
10 Sep 13 |
fredrik |
118 |
|
4520 |
10 Sep 13 |
fredrik |
119 |
} else if (init == mask) { |
4520 |
10 Sep 13 |
fredrik |
120 |
l = 7; |
4520 |
10 Sep 13 |
fredrik |
121 |
for (i=0; i<8; i++) { |
4520 |
10 Sep 13 |
fredrik |
122 |
m = mask >> (4*i); |
4520 |
10 Sep 13 |
fredrik |
123 |
if ((x & m) != m) { |
4520 |
10 Sep 13 |
fredrik |
124 |
l = i; |
4520 |
10 Sep 13 |
fredrik |
125 |
break; |
4520 |
10 Sep 13 |
fredrik |
126 |
} |
4520 |
10 Sep 13 |
fredrik |
127 |
} |
4520 |
10 Sep 13 |
fredrik |
128 |
res[resOffset] = (byte)(l | 8); |
4520 |
10 Sep 13 |
fredrik |
129 |
for (i=l; i<8; i++) |
4520 |
10 Sep 13 |
fredrik |
130 |
res[resOffset+1+i-l] = (byte)(0xf & (x >> (4*(i-l)))); |
4520 |
10 Sep 13 |
fredrik |
131 |
|
4520 |
10 Sep 13 |
fredrik |
132 |
return 1+8-l; |
4520 |
10 Sep 13 |
fredrik |
133 |
|
4520 |
10 Sep 13 |
fredrik |
134 |
} else { |
4520 |
10 Sep 13 |
fredrik |
135 |
res[resOffset] = 0; |
4520 |
10 Sep 13 |
fredrik |
136 |
for (i=0; i<8; i++) |
4520 |
10 Sep 13 |
fredrik |
137 |
res[resOffset+1+i] = (byte)(0xf & (x >> (4*i))); |
4520 |
10 Sep 13 |
fredrik |
138 |
|
4520 |
10 Sep 13 |
fredrik |
139 |
return 9; |
4520 |
10 Sep 13 |
fredrik |
140 |
|
4520 |
10 Sep 13 |
fredrik |
141 |
} |
4520 |
10 Sep 13 |
fredrik |
142 |
} |
4520 |
10 Sep 13 |
fredrik |
143 |
|
4520 |
10 Sep 13 |
fredrik |
144 |
|
4520 |
10 Sep 13 |
fredrik |
145 |
|
4520 |
10 Sep 13 |
fredrik |
146 |
public static void encodeFixedPoint( |
4520 |
10 Sep 13 |
fredrik |
147 |
double fixedPoint, |
4520 |
10 Sep 13 |
fredrik |
148 |
byte[] result |
4520 |
10 Sep 13 |
fredrik |
149 |
) { |
4520 |
10 Sep 13 |
fredrik |
150 |
long fp = Double.doubleToLongBits(fixedPoint); |
4520 |
10 Sep 13 |
fredrik |
151 |
for (int i=0; i<8; i++) { |
4520 |
10 Sep 13 |
fredrik |
152 |
result[7-i] = (byte)((fp >> (8*i)) & 0xff); |
4520 |
10 Sep 13 |
fredrik |
153 |
} |
4520 |
10 Sep 13 |
fredrik |
154 |
} |
4520 |
10 Sep 13 |
fredrik |
155 |
|
4520 |
10 Sep 13 |
fredrik |
156 |
|
4520 |
10 Sep 13 |
fredrik |
157 |
|
4520 |
10 Sep 13 |
fredrik |
158 |
public static double decodeFixedPoint( |
4520 |
10 Sep 13 |
fredrik |
159 |
byte[] data |
4520 |
10 Sep 13 |
fredrik |
160 |
) { |
4520 |
10 Sep 13 |
fredrik |
161 |
long fp = 0; |
4520 |
10 Sep 13 |
fredrik |
162 |
for (int i=0; i<8; i++) { |
4520 |
10 Sep 13 |
fredrik |
163 |
fp = fp | ((0xFFl & data[7-i]) << (8*i)); |
4520 |
10 Sep 13 |
fredrik |
164 |
} |
4520 |
10 Sep 13 |
fredrik |
165 |
return Double.longBitsToDouble(fp); |
4520 |
10 Sep 13 |
fredrik |
166 |
} |
4520 |
10 Sep 13 |
fredrik |
167 |
|
4520 |
10 Sep 13 |
fredrik |
168 |
|
4520 |
10 Sep 13 |
fredrik |
169 |
|
4520 |
10 Sep 13 |
fredrik |
170 |
|
4520 |
10 Sep 13 |
fredrik |
171 |
|
4520 |
10 Sep 13 |
fredrik |
172 |
///////////////////////////////////////////////////////////////////////////////// |
4520 |
10 Sep 13 |
fredrik |
173 |
|
4520 |
10 Sep 13 |
fredrik |
174 |
|
4520 |
10 Sep 13 |
fredrik |
175 |
public static double optimalLinearFixedPoint( |
4520 |
10 Sep 13 |
fredrik |
176 |
double[] data, |
4520 |
10 Sep 13 |
fredrik |
177 |
int dataSize |
4520 |
10 Sep 13 |
fredrik |
178 |
) { |
4520 |
10 Sep 13 |
fredrik |
179 |
if (dataSize == 0) return 0; |
4520 |
10 Sep 13 |
fredrik |
180 |
if (dataSize == 1) return Math.floor(0xFFFFFFFFl / data[0]); |
4520 |
10 Sep 13 |
fredrik |
181 |
double maxDouble = Math.max(data[0], data[1]); |
4520 |
10 Sep 13 |
fredrik |
182 |
|
4520 |
10 Sep 13 |
fredrik |
183 |
for (int i=2; i<dataSize; i++) { |
4520 |
10 Sep 13 |
fredrik |
184 |
double extrapol = data[i-1] + (data[i-1] - data[i-2]); |
4520 |
10 Sep 13 |
fredrik |
185 |
double diff = data[i] - extrapol; |
4520 |
10 Sep 13 |
fredrik |
186 |
maxDouble = Math.max(maxDouble, Math.ceil(Math.abs(diff)+1)); |
4520 |
10 Sep 13 |
fredrik |
187 |
} |
4520 |
10 Sep 13 |
fredrik |
188 |
|
4520 |
10 Sep 13 |
fredrik |
189 |
return Math.floor(0x7FFFFFFFl / maxDouble); |
4520 |
10 Sep 13 |
fredrik |
190 |
} |
4520 |
10 Sep 13 |
fredrik |
191 |
|
4520 |
10 Sep 13 |
fredrik |
192 |
|
4520 |
10 Sep 13 |
fredrik |
193 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Encodes the doubles in data by first using a |
4520 |
10 Sep 13 |
fredrik |
* - lossy conversion to a 4 byte 5 decimal fixed point repressentation |
4520 |
10 Sep 13 |
fredrik |
* - storing the residuals from a linear prediction after first to values |
4520 |
10 Sep 13 |
fredrik |
* - encoding by encodeInt (see above) |
4520 |
10 Sep 13 |
fredrik |
198 |
* |
4520 |
10 Sep 13 |
fredrik |
* The resulting binary is maximally dataSize * 5 bytes, but much less if the |
4520 |
10 Sep 13 |
fredrik |
* data is reasonably smooth on the first order. |
4520 |
10 Sep 13 |
fredrik |
201 |
* |
4520 |
10 Sep 13 |
fredrik |
* This encoding is suitable for typical m/z or retention time binary arrays. |
4520 |
10 Sep 13 |
fredrik |
* For masses above 100 m/z the encoding is accurate to at least 0.1 ppm. |
4520 |
10 Sep 13 |
fredrik |
204 |
* |
4520 |
10 Sep 13 |
fredrik |
* @data array of double to be encoded |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of doubles from data to encode |
4520 |
10 Sep 13 |
fredrik |
* @result array were resulting bytes should be stored |
4520 |
10 Sep 13 |
fredrik |
* @fixedPoint the scaling factor used for getting the fixed point repr. |
4520 |
10 Sep 13 |
fredrik |
* This is stored in the binary and automatically extracted |
4520 |
10 Sep 13 |
fredrik |
* on decoding. |
4520 |
10 Sep 13 |
fredrik |
* @return the number of encoded bytes |
4520 |
10 Sep 13 |
fredrik |
212 |
*/ |
4520 |
10 Sep 13 |
fredrik |
213 |
public static int encodeLinear( |
4520 |
10 Sep 13 |
fredrik |
214 |
double[] data, |
4520 |
10 Sep 13 |
fredrik |
215 |
int dataSize, |
4520 |
10 Sep 13 |
fredrik |
216 |
byte[] result, |
4520 |
10 Sep 13 |
fredrik |
217 |
double fixedPoint |
4520 |
10 Sep 13 |
fredrik |
218 |
) { |
4520 |
10 Sep 13 |
fredrik |
219 |
long[] ints = new long[3]; |
4520 |
10 Sep 13 |
fredrik |
220 |
int i; |
4520 |
10 Sep 13 |
fredrik |
221 |
int ri = 16; |
4520 |
10 Sep 13 |
fredrik |
222 |
byte halfBytes[] = new byte[10]; |
4520 |
10 Sep 13 |
fredrik |
223 |
int halfByteCount = 0; |
4520 |
10 Sep 13 |
fredrik |
224 |
int hbi; |
4520 |
10 Sep 13 |
fredrik |
225 |
long extrapol; |
4520 |
10 Sep 13 |
fredrik |
226 |
long diff; |
4520 |
10 Sep 13 |
fredrik |
227 |
|
4520 |
10 Sep 13 |
fredrik |
228 |
encodeFixedPoint(fixedPoint, result); |
4520 |
10 Sep 13 |
fredrik |
229 |
|
4520 |
10 Sep 13 |
fredrik |
230 |
if (dataSize == 0) return 8; |
4520 |
10 Sep 13 |
fredrik |
231 |
|
4520 |
10 Sep 13 |
fredrik |
232 |
ints[1] = (long)(data[0] * fixedPoint + 0.5); |
4520 |
10 Sep 13 |
fredrik |
233 |
for (i=0; i<4; i++) { |
4520 |
10 Sep 13 |
fredrik |
234 |
result[8+i] = (byte)((ints[1] >> (i*8)) & 0xff); |
4520 |
10 Sep 13 |
fredrik |
235 |
} |
4520 |
10 Sep 13 |
fredrik |
236 |
|
4520 |
10 Sep 13 |
fredrik |
237 |
if (dataSize == 1) return 12; |
4520 |
10 Sep 13 |
fredrik |
238 |
|
4520 |
10 Sep 13 |
fredrik |
239 |
ints[2] = (long)(data[1] * fixedPoint + 0.5); |
4520 |
10 Sep 13 |
fredrik |
240 |
for (i=0; i<4; i++) { |
4520 |
10 Sep 13 |
fredrik |
241 |
result[12+i] = (byte)((ints[2] >> (i*8)) & 0xff); |
4520 |
10 Sep 13 |
fredrik |
242 |
} |
4520 |
10 Sep 13 |
fredrik |
243 |
|
4520 |
10 Sep 13 |
fredrik |
244 |
halfByteCount = 0; |
4520 |
10 Sep 13 |
fredrik |
245 |
ri = 16; |
4520 |
10 Sep 13 |
fredrik |
246 |
|
4520 |
10 Sep 13 |
fredrik |
247 |
for (i=2; i<dataSize; i++) { |
4520 |
10 Sep 13 |
fredrik |
248 |
ints[0] = ints[1]; |
4520 |
10 Sep 13 |
fredrik |
249 |
ints[1] = ints[2]; |
4520 |
10 Sep 13 |
fredrik |
250 |
ints[2] = (long)(data[i] * fixedPoint + 0.5); |
4520 |
10 Sep 13 |
fredrik |
251 |
extrapol = ints[1] + (ints[1] - ints[0]); |
4520 |
10 Sep 13 |
fredrik |
252 |
diff = ints[2] - extrapol; |
4520 |
10 Sep 13 |
fredrik |
253 |
halfByteCount += encodeInt(diff, halfBytes, halfByteCount); |
4520 |
10 Sep 13 |
fredrik |
254 |
|
4520 |
10 Sep 13 |
fredrik |
255 |
for (hbi=1; hbi < halfByteCount; hbi+=2) |
4520 |
10 Sep 13 |
fredrik |
256 |
result[ri++] = (byte)((halfBytes[hbi-1] << 4) | (halfBytes[hbi] & 0xf)); |
4520 |
10 Sep 13 |
fredrik |
257 |
|
4520 |
10 Sep 13 |
fredrik |
258 |
if (halfByteCount % 2 != 0) { |
4520 |
10 Sep 13 |
fredrik |
259 |
halfBytes[0] = halfBytes[halfByteCount-1]; |
4520 |
10 Sep 13 |
fredrik |
260 |
halfByteCount = 1; |
4520 |
10 Sep 13 |
fredrik |
261 |
} else |
4520 |
10 Sep 13 |
fredrik |
262 |
halfByteCount = 0; |
4520 |
10 Sep 13 |
fredrik |
263 |
|
4520 |
10 Sep 13 |
fredrik |
264 |
} |
4520 |
10 Sep 13 |
fredrik |
265 |
if (halfByteCount == 1) |
4520 |
10 Sep 13 |
fredrik |
266 |
result[ri++] = (byte)(halfBytes[0] << 4); |
4520 |
10 Sep 13 |
fredrik |
267 |
|
4520 |
10 Sep 13 |
fredrik |
268 |
return ri; |
4520 |
10 Sep 13 |
fredrik |
269 |
} |
4520 |
10 Sep 13 |
fredrik |
270 |
|
4520 |
10 Sep 13 |
fredrik |
271 |
|
4520 |
10 Sep 13 |
fredrik |
272 |
|
4520 |
10 Sep 13 |
fredrik |
273 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Decodes data encoded by encodeLinear. Note that the compression |
4520 |
10 Sep 13 |
fredrik |
* discard any information < 1e-5, so data is only guaranteed |
4520 |
10 Sep 13 |
fredrik |
* to be within +- 5e-6 of the original value. |
4520 |
10 Sep 13 |
fredrik |
277 |
* |
4520 |
10 Sep 13 |
fredrik |
* Further, values > ~42000 will also be truncated because of the |
4520 |
10 Sep 13 |
fredrik |
* fixed point representation, so this scheme is stronly discouraged |
4520 |
10 Sep 13 |
fredrik |
* if values above might be above this size. |
4520 |
10 Sep 13 |
fredrik |
281 |
* |
4520 |
10 Sep 13 |
fredrik |
* result vector guaranteedly shorter than twice the data length (in nbr of values) |
4520 |
10 Sep 13 |
fredrik |
* returns the number of doubles read |
4520 |
10 Sep 13 |
fredrik |
284 |
* |
4520 |
10 Sep 13 |
fredrik |
* @data array of bytes to be decoded |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of bytes from data to decode |
4520 |
10 Sep 13 |
fredrik |
* @result array were resulting doubles should be stored |
4520 |
10 Sep 13 |
fredrik |
* @return the number of decoded doubles, or -1 if dataSize < 4 or 4 < dataSize < 8 |
4520 |
10 Sep 13 |
fredrik |
289 |
*/ |
4520 |
10 Sep 13 |
fredrik |
290 |
public static int decodeLinear( |
4520 |
10 Sep 13 |
fredrik |
291 |
byte[] data, |
4520 |
10 Sep 13 |
fredrik |
292 |
int dataSize, |
4520 |
10 Sep 13 |
fredrik |
293 |
double[] result |
4520 |
10 Sep 13 |
fredrik |
294 |
) { |
4520 |
10 Sep 13 |
fredrik |
295 |
int ri = 2; |
4520 |
10 Sep 13 |
fredrik |
296 |
long[] ints = new long[3]; |
4520 |
10 Sep 13 |
fredrik |
297 |
long extrapol; |
4520 |
10 Sep 13 |
fredrik |
298 |
long y; |
4520 |
10 Sep 13 |
fredrik |
299 |
IntDecoder dec = new IntDecoder(data, 16); |
4520 |
10 Sep 13 |
fredrik |
300 |
|
4520 |
10 Sep 13 |
fredrik |
301 |
if (dataSize < 8) return -1; |
4520 |
10 Sep 13 |
fredrik |
302 |
double fixedPoint = decodeFixedPoint(data); |
4520 |
10 Sep 13 |
fredrik |
303 |
if (dataSize < 12) return -1; |
4520 |
10 Sep 13 |
fredrik |
304 |
|
4520 |
10 Sep 13 |
fredrik |
305 |
ints[1] = 0; |
4520 |
10 Sep 13 |
fredrik |
306 |
for (int i=0; i<4; i++) { |
4520 |
10 Sep 13 |
fredrik |
307 |
ints[1] = ints[1] | ( (0xFFl & data[8+i]) << (i*8)); |
4520 |
10 Sep 13 |
fredrik |
308 |
} |
4520 |
10 Sep 13 |
fredrik |
309 |
result[0] = ints[1] / fixedPoint; |
4520 |
10 Sep 13 |
fredrik |
310 |
|
4520 |
10 Sep 13 |
fredrik |
311 |
if (dataSize == 12) return 1; |
4520 |
10 Sep 13 |
fredrik |
312 |
if (dataSize < 16) return -1; |
4520 |
10 Sep 13 |
fredrik |
313 |
|
4520 |
10 Sep 13 |
fredrik |
314 |
ints[2] = 0; |
4520 |
10 Sep 13 |
fredrik |
315 |
for (int i=0; i<4; i++) { |
4520 |
10 Sep 13 |
fredrik |
316 |
ints[2] = ints[2] | ( (0xFFl & data[12+i]) << (i*8)); |
4520 |
10 Sep 13 |
fredrik |
317 |
} |
4520 |
10 Sep 13 |
fredrik |
318 |
result[1] = ints[2] / fixedPoint; |
4520 |
10 Sep 13 |
fredrik |
319 |
|
4520 |
10 Sep 13 |
fredrik |
320 |
while (dec.pos < dataSize) { |
4520 |
10 Sep 13 |
fredrik |
321 |
if (dec.pos == (dataSize - 1) && dec.half) |
4520 |
10 Sep 13 |
fredrik |
322 |
if ((data[dec.pos] & 0xf) != 0x8) |
4520 |
10 Sep 13 |
fredrik |
323 |
break; |
4520 |
10 Sep 13 |
fredrik |
324 |
|
4520 |
10 Sep 13 |
fredrik |
325 |
ints[0] = ints[1]; |
4520 |
10 Sep 13 |
fredrik |
326 |
ints[1] = ints[2]; |
4520 |
10 Sep 13 |
fredrik |
327 |
ints[2] = dec.next(); |
4520 |
10 Sep 13 |
fredrik |
328 |
|
4520 |
10 Sep 13 |
fredrik |
329 |
extrapol = ints[1] + (ints[1] - ints[0]); |
4520 |
10 Sep 13 |
fredrik |
330 |
y = extrapol + ints[2]; |
4520 |
10 Sep 13 |
fredrik |
331 |
result[ri++] = y / fixedPoint; |
4520 |
10 Sep 13 |
fredrik |
332 |
ints[2] = y; |
4520 |
10 Sep 13 |
fredrik |
333 |
} |
4520 |
10 Sep 13 |
fredrik |
334 |
|
4520 |
10 Sep 13 |
fredrik |
335 |
return ri; |
4520 |
10 Sep 13 |
fredrik |
336 |
} |
4520 |
10 Sep 13 |
fredrik |
337 |
|
4520 |
10 Sep 13 |
fredrik |
338 |
|
4520 |
10 Sep 13 |
fredrik |
339 |
|
4520 |
10 Sep 13 |
fredrik |
340 |
///////////////////////////////////////////////////////////////////////////////// |
4520 |
10 Sep 13 |
fredrik |
341 |
|
4520 |
10 Sep 13 |
fredrik |
342 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Encodes ion counts by simply rounding to the nearest 4 byte integer, |
4520 |
10 Sep 13 |
fredrik |
* and compressing each integer with encodeInt. |
4520 |
10 Sep 13 |
fredrik |
345 |
* |
4520 |
10 Sep 13 |
fredrik |
* The handleable range is therefore 0 -> 4294967294. |
4520 |
10 Sep 13 |
fredrik |
* The resulting binary is maximally dataSize * 5 bytes, but much less if the |
4520 |
10 Sep 13 |
fredrik |
* data is close to 0 on average. |
4520 |
10 Sep 13 |
fredrik |
349 |
* |
4520 |
10 Sep 13 |
fredrik |
* @data array of doubles to be encoded |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of doubles from data to encode |
4520 |
10 Sep 13 |
fredrik |
* @result array were resulting bytes should be stored |
4520 |
10 Sep 13 |
fredrik |
* @return the number of encoded bytes |
4520 |
10 Sep 13 |
fredrik |
354 |
*/ |
4520 |
10 Sep 13 |
fredrik |
355 |
public static int encodePic( |
4520 |
10 Sep 13 |
fredrik |
356 |
double[] data, |
4520 |
10 Sep 13 |
fredrik |
357 |
int dataSize, |
4520 |
10 Sep 13 |
fredrik |
358 |
byte[] result |
4520 |
10 Sep 13 |
fredrik |
359 |
) { |
4520 |
10 Sep 13 |
fredrik |
360 |
long count; |
4520 |
10 Sep 13 |
fredrik |
361 |
int ri = 0; |
4520 |
10 Sep 13 |
fredrik |
362 |
int hbi = 0; |
4520 |
10 Sep 13 |
fredrik |
363 |
byte halfBytes[] = new byte[10]; |
4520 |
10 Sep 13 |
fredrik |
364 |
int halfByteCount = 0; |
4520 |
10 Sep 13 |
fredrik |
365 |
|
4520 |
10 Sep 13 |
fredrik |
//printf("Encoding %d doubles\n", (int)dataSize); |
4520 |
10 Sep 13 |
fredrik |
367 |
|
4520 |
10 Sep 13 |
fredrik |
368 |
for (int i=0; i<dataSize; i++) { |
4520 |
10 Sep 13 |
fredrik |
369 |
count = (long)(data[i] + 0.5); |
4520 |
10 Sep 13 |
fredrik |
370 |
halfByteCount += encodeInt(count, halfBytes, halfByteCount); |
4520 |
10 Sep 13 |
fredrik |
371 |
|
4520 |
10 Sep 13 |
fredrik |
372 |
for (hbi=1; hbi < halfByteCount; hbi+=2) |
4520 |
10 Sep 13 |
fredrik |
373 |
result[ri++] = (byte)((halfBytes[hbi-1] << 4) | (halfBytes[hbi] & 0xf)); |
4520 |
10 Sep 13 |
fredrik |
374 |
|
4520 |
10 Sep 13 |
fredrik |
375 |
if (halfByteCount % 2 != 0) { |
4520 |
10 Sep 13 |
fredrik |
376 |
halfBytes[0] = halfBytes[halfByteCount-1]; |
4520 |
10 Sep 13 |
fredrik |
377 |
halfByteCount = 1; |
4520 |
10 Sep 13 |
fredrik |
378 |
} else |
4520 |
10 Sep 13 |
fredrik |
379 |
halfByteCount = 0; |
4520 |
10 Sep 13 |
fredrik |
380 |
|
4520 |
10 Sep 13 |
fredrik |
381 |
} |
4520 |
10 Sep 13 |
fredrik |
382 |
if (halfByteCount == 1) |
4520 |
10 Sep 13 |
fredrik |
383 |
result[ri++] = (byte)(halfBytes[0] << 4); |
4520 |
10 Sep 13 |
fredrik |
384 |
|
4520 |
10 Sep 13 |
fredrik |
385 |
return ri; |
4520 |
10 Sep 13 |
fredrik |
386 |
} |
4520 |
10 Sep 13 |
fredrik |
387 |
|
4520 |
10 Sep 13 |
fredrik |
388 |
|
4520 |
10 Sep 13 |
fredrik |
389 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Decodes data encoded by encodePic |
4520 |
10 Sep 13 |
fredrik |
391 |
* |
4520 |
10 Sep 13 |
fredrik |
* result vector guaranteedly shorter than twice the data length (in nbr of values) |
4520 |
10 Sep 13 |
fredrik |
393 |
* |
4520 |
10 Sep 13 |
fredrik |
* @data array of bytes to be decoded (need memorycont. repr.) |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of bytes from data to decode |
4520 |
10 Sep 13 |
fredrik |
* @result array were resulting doubles should be stored |
4520 |
10 Sep 13 |
fredrik |
* @return the number of decoded doubles |
4520 |
10 Sep 13 |
fredrik |
398 |
*/ |
4520 |
10 Sep 13 |
fredrik |
399 |
public static int decodePic( |
4520 |
10 Sep 13 |
fredrik |
400 |
byte[] data, |
4520 |
10 Sep 13 |
fredrik |
401 |
int dataSize, |
4520 |
10 Sep 13 |
fredrik |
402 |
double[] result |
4520 |
10 Sep 13 |
fredrik |
403 |
) { |
4520 |
10 Sep 13 |
fredrik |
404 |
int ri = 0; |
4520 |
10 Sep 13 |
fredrik |
405 |
long count; |
4520 |
10 Sep 13 |
fredrik |
406 |
IntDecoder dec = new IntDecoder(data, 0); |
4520 |
10 Sep 13 |
fredrik |
407 |
|
4520 |
10 Sep 13 |
fredrik |
408 |
while (dec.pos < dataSize) { |
4520 |
10 Sep 13 |
fredrik |
409 |
if (dec.pos == (dataSize - 1) && dec.half) |
4520 |
10 Sep 13 |
fredrik |
410 |
if ((data[dec.pos] & 0xf) != 0x8) |
4520 |
10 Sep 13 |
fredrik |
411 |
break; |
4520 |
10 Sep 13 |
fredrik |
412 |
|
4520 |
10 Sep 13 |
fredrik |
413 |
count = dec.next(); |
4520 |
10 Sep 13 |
fredrik |
414 |
result[ri++] = count; |
4520 |
10 Sep 13 |
fredrik |
415 |
} |
4520 |
10 Sep 13 |
fredrik |
416 |
return ri; |
4520 |
10 Sep 13 |
fredrik |
417 |
} |
4520 |
10 Sep 13 |
fredrik |
418 |
|
4520 |
10 Sep 13 |
fredrik |
419 |
|
4520 |
10 Sep 13 |
fredrik |
420 |
|
4520 |
10 Sep 13 |
fredrik |
421 |
|
4520 |
10 Sep 13 |
fredrik |
422 |
///////////////////////////////////////////////////////////////////////////////// |
4520 |
10 Sep 13 |
fredrik |
423 |
|
4520 |
10 Sep 13 |
fredrik |
424 |
public static double optimalSlofFixedPoint( |
4520 |
10 Sep 13 |
fredrik |
425 |
double[] data, |
4520 |
10 Sep 13 |
fredrik |
426 |
int dataSize |
4520 |
10 Sep 13 |
fredrik |
427 |
) { |
4520 |
10 Sep 13 |
fredrik |
428 |
if (dataSize == 0) return 0; |
4520 |
10 Sep 13 |
fredrik |
429 |
|
4520 |
10 Sep 13 |
fredrik |
430 |
double maxDouble = 1; |
4520 |
10 Sep 13 |
fredrik |
431 |
double x; |
4520 |
10 Sep 13 |
fredrik |
432 |
double fp; |
4520 |
10 Sep 13 |
fredrik |
433 |
|
4520 |
10 Sep 13 |
fredrik |
434 |
for (int i=0; i<dataSize; i++) { |
4520 |
10 Sep 13 |
fredrik |
435 |
x = Math.log(data[i]+1); |
4520 |
10 Sep 13 |
fredrik |
436 |
maxDouble = Math.max(maxDouble, x); |
4520 |
10 Sep 13 |
fredrik |
437 |
} |
4520 |
10 Sep 13 |
fredrik |
438 |
|
4520 |
10 Sep 13 |
fredrik |
439 |
fp = Math.floor(0xFFFF / maxDouble); |
4520 |
10 Sep 13 |
fredrik |
440 |
|
4520 |
10 Sep 13 |
fredrik |
441 |
return fp; |
4520 |
10 Sep 13 |
fredrik |
442 |
} |
4520 |
10 Sep 13 |
fredrik |
443 |
|
4520 |
10 Sep 13 |
fredrik |
444 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Encodes ion counts by taking the natural logarithm, and storing a |
4520 |
10 Sep 13 |
fredrik |
* fixed point representation of this. This is calculated as |
4520 |
10 Sep 13 |
fredrik |
447 |
* |
4520 |
10 Sep 13 |
fredrik |
* unsigned short fp = log(d) * fixedPoint + 0.5 |
4520 |
10 Sep 13 |
fredrik |
449 |
* |
4520 |
10 Sep 13 |
fredrik |
* @data array of doubles to be encoded |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of doubles from data to encode |
4520 |
10 Sep 13 |
fredrik |
* @result array were resulting bytes should be stored |
4520 |
10 Sep 13 |
fredrik |
* @fixedPoint the scaling factor used for getting the fixed point repr. |
4520 |
10 Sep 13 |
fredrik |
* This is stored in the binary and automatically extracted |
4520 |
10 Sep 13 |
fredrik |
* on decoding. |
4520 |
10 Sep 13 |
fredrik |
* @return the number of encoded bytes |
4520 |
10 Sep 13 |
fredrik |
457 |
*/ |
4520 |
10 Sep 13 |
fredrik |
458 |
public static int encodeSlof( |
4520 |
10 Sep 13 |
fredrik |
459 |
double[] data, |
4520 |
10 Sep 13 |
fredrik |
460 |
int dataSize, |
4520 |
10 Sep 13 |
fredrik |
461 |
byte[] result, |
4520 |
10 Sep 13 |
fredrik |
462 |
double fixedPoint |
4520 |
10 Sep 13 |
fredrik |
463 |
) { |
4520 |
10 Sep 13 |
fredrik |
464 |
int x; |
4520 |
10 Sep 13 |
fredrik |
465 |
int ri = 8; |
4520 |
10 Sep 13 |
fredrik |
466 |
|
4520 |
10 Sep 13 |
fredrik |
467 |
encodeFixedPoint(fixedPoint, result); |
4520 |
10 Sep 13 |
fredrik |
468 |
|
4520 |
10 Sep 13 |
fredrik |
469 |
for (int i=0; i<dataSize; i++) { |
4520 |
10 Sep 13 |
fredrik |
470 |
x = (int)(Math.log(data[i]+1) * fixedPoint + 0.5); |
4520 |
10 Sep 13 |
fredrik |
471 |
|
4520 |
10 Sep 13 |
fredrik |
472 |
result[ri++] = (byte)(0xff & x); |
4520 |
10 Sep 13 |
fredrik |
473 |
result[ri++] = (byte)(x >> 8); |
4520 |
10 Sep 13 |
fredrik |
474 |
} |
4520 |
10 Sep 13 |
fredrik |
475 |
return ri; |
4520 |
10 Sep 13 |
fredrik |
476 |
} |
4520 |
10 Sep 13 |
fredrik |
477 |
|
4520 |
10 Sep 13 |
fredrik |
478 |
|
4520 |
10 Sep 13 |
fredrik |
479 |
/** |
4520 |
10 Sep 13 |
fredrik |
* Decodes data encoded by encodeSlof |
4520 |
10 Sep 13 |
fredrik |
481 |
* |
4520 |
10 Sep 13 |
fredrik |
* result vector length is twice the data length |
4520 |
10 Sep 13 |
fredrik |
* returns the number of doubles read |
4520 |
10 Sep 13 |
fredrik |
484 |
* |
4520 |
10 Sep 13 |
fredrik |
* @data array of bytes to be decoded (need memorycont. repr.) |
4520 |
10 Sep 13 |
fredrik |
* @dataSize number of bytes from data to decode |
4520 |
10 Sep 13 |
fredrik |
* @result array were resulting doubles should be stored |
4520 |
10 Sep 13 |
fredrik |
* @return the number of decoded doubles |
4520 |
10 Sep 13 |
fredrik |
489 |
*/ |
4520 |
10 Sep 13 |
fredrik |
490 |
public static int decodeSlof( |
4520 |
10 Sep 13 |
fredrik |
491 |
byte[] data, |
4520 |
10 Sep 13 |
fredrik |
492 |
int dataSize, |
4520 |
10 Sep 13 |
fredrik |
493 |
double[] result |
4520 |
10 Sep 13 |
fredrik |
494 |
) { |
4520 |
10 Sep 13 |
fredrik |
495 |
int x; |
4520 |
10 Sep 13 |
fredrik |
496 |
int ri = 0; |
4520 |
10 Sep 13 |
fredrik |
497 |
|
4520 |
10 Sep 13 |
fredrik |
498 |
if (dataSize < 8) return -1; |
4520 |
10 Sep 13 |
fredrik |
499 |
double fixedPoint = decodeFixedPoint(data); |
4520 |
10 Sep 13 |
fredrik |
500 |
|
4520 |
10 Sep 13 |
fredrik |
501 |
for (int i=8; i<dataSize; i+=2) { |
4520 |
10 Sep 13 |
fredrik |
502 |
x = (0xff & data[i]) | ((0xff & data[i+1]) << 8); |
4520 |
10 Sep 13 |
fredrik |
503 |
result[ri++] = Math.exp(((double)(0xffff & x)) / fixedPoint) - 1; |
4520 |
10 Sep 13 |
fredrik |
504 |
} |
4520 |
10 Sep 13 |
fredrik |
505 |
return ri; |
4520 |
10 Sep 13 |
fredrik |
506 |
} |
4520 |
10 Sep 13 |
fredrik |
507 |
} |