7624 |
07 Mar 19 |
nicklas |
1 |
/* |
7624 |
07 Mar 19 |
nicklas |
$Id: TestNumberFormat.java 5776 2011-09-30 11:28:05Z nicklas $ |
7624 |
07 Mar 19 |
nicklas |
3 |
|
7624 |
07 Mar 19 |
nicklas |
BioArray Software Environment (BASE) - http://base.thep.lu.se/ |
7624 |
07 Mar 19 |
nicklas |
Copyright (C) 2007 Johan Enell, Nicklas Nordborg |
7624 |
07 Mar 19 |
nicklas |
6 |
|
7624 |
07 Mar 19 |
nicklas |
This file is part of BASE. |
7624 |
07 Mar 19 |
nicklas |
8 |
|
7624 |
07 Mar 19 |
nicklas |
BASE is free software; you can redistribute it and/or |
7624 |
07 Mar 19 |
nicklas |
modify it under the terms of the GNU General Public License |
7624 |
07 Mar 19 |
nicklas |
as published by the Free Software Foundation; either version 3 |
7624 |
07 Mar 19 |
nicklas |
of the License, or (at your option) any later version. |
7624 |
07 Mar 19 |
nicklas |
13 |
|
7624 |
07 Mar 19 |
nicklas |
BASE is distributed in the hope that it will be useful, |
7624 |
07 Mar 19 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
7624 |
07 Mar 19 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
7624 |
07 Mar 19 |
nicklas |
GNU General Public License for more details. |
7624 |
07 Mar 19 |
nicklas |
18 |
|
7624 |
07 Mar 19 |
nicklas |
You should have received a copy of the GNU General Public License |
7624 |
07 Mar 19 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
7624 |
07 Mar 19 |
nicklas |
21 |
*/ |
7624 |
07 Mar 19 |
nicklas |
22 |
import java.io.IOException; |
7624 |
07 Mar 19 |
nicklas |
23 |
import java.io.InputStream; |
7624 |
07 Mar 19 |
nicklas |
24 |
|
7624 |
07 Mar 19 |
nicklas |
25 |
import net.sf.basedb.util.FileUtil; |
7624 |
07 Mar 19 |
nicklas |
26 |
import net.sf.basedb.util.charset.CharsetDetector; |
7629 |
11 Mar 19 |
nicklas |
27 |
import net.sf.basedb.util.charset.CharsetUtil; |
7624 |
07 Mar 19 |
nicklas |
28 |
import net.sf.basedb.util.charset.SimpleStringDetector; |
7624 |
07 Mar 19 |
nicklas |
29 |
import net.sf.basedb.util.charset.StringDetector; |
7624 |
07 Mar 19 |
nicklas |
30 |
|
7624 |
07 Mar 19 |
nicklas |
31 |
public class TestCharsetDetector |
7624 |
07 Mar 19 |
nicklas |
32 |
{ |
7624 |
07 Mar 19 |
nicklas |
33 |
|
7624 |
07 Mar 19 |
nicklas |
34 |
static boolean ok = true; |
7624 |
07 Mar 19 |
nicklas |
35 |
public static void main(String[] args) |
7624 |
07 Mar 19 |
nicklas |
36 |
{ |
7624 |
07 Mar 19 |
nicklas |
37 |
TestUtil.checkArgs(args); |
7624 |
07 Mar 19 |
nicklas |
38 |
TestUtil.begin(); |
7624 |
07 Mar 19 |
nicklas |
39 |
ok = test_all(); |
7624 |
07 Mar 19 |
nicklas |
40 |
TestUtil.stop(); |
7624 |
07 Mar 19 |
nicklas |
41 |
} |
7624 |
07 Mar 19 |
nicklas |
42 |
|
7624 |
07 Mar 19 |
nicklas |
43 |
static boolean test_all() |
7624 |
07 Mar 19 |
nicklas |
44 |
{ |
7624 |
07 Mar 19 |
nicklas |
45 |
write("++Testing CharsetDetector"); |
7624 |
07 Mar 19 |
nicklas |
46 |
|
7624 |
07 Mar 19 |
nicklas |
47 |
test_charset("data/charset-tester-utf-8.txt", "UTF-8", null, null, false); |
7624 |
07 Mar 19 |
nicklas |
48 |
test_charset("data/charset-tester-utf-8.txt", "UTF-8", "Namn", "Ålder", false); |
7624 |
07 Mar 19 |
nicklas |
49 |
test_charset("data/charset-tester-utf-8.txt", "ISO-8859-1", null, null, false); // No failure but we get incorrect text for Å and ö |
7624 |
07 Mar 19 |
nicklas |
50 |
test_charset("data/charset-tester-utf-8.txt", "ISO-8859-1", "Namn", "Ålder", true); // Fails since we can't find Ålder (Å is parsed as Ã…) |
7629 |
11 Mar 19 |
nicklas |
51 |
|
7624 |
07 Mar 19 |
nicklas |
52 |
test_charset("data/charset-tester-iso-8859-1.txt", "ISO-8859-1", null, null, false); |
7624 |
07 Mar 19 |
nicklas |
53 |
test_charset("data/charset-tester-iso-8859-1.txt", "ISO-8859-1", "Namn", "Ålder", false); |
7624 |
07 Mar 19 |
nicklas |
54 |
test_charset("data/charset-tester-iso-8859-1.txt", "UTF-8", "Namn", "Ålder", true); // Fails since file is invalid UTF-8 |
7624 |
07 Mar 19 |
nicklas |
55 |
|
7624 |
07 Mar 19 |
nicklas |
56 |
test_charset("data/charset-tester-iso-8859-1.txt", "ISO-8859-7", null, null, false); // No failure but we get incorrect text for Å and ö |
7624 |
07 Mar 19 |
nicklas |
57 |
test_charset("data/charset-tester-iso-8859-1.txt", "ISO-8859-7", "Namn", "Ålder", true); // Fails since Å is Ε (greek) |
7629 |
11 Mar 19 |
nicklas |
58 |
|
7629 |
11 Mar 19 |
nicklas |
// Both files should work with the UTF-8 fallback charset |
7629 |
11 Mar 19 |
nicklas |
60 |
test_charset("data/charset-tester-utf-8.txt", "X-UTF-8_with_ISO-8859-1_fallback", "Namn", "Ålder", false); |
7629 |
11 Mar 19 |
nicklas |
61 |
test_charset("data/charset-tester-iso-8859-1.txt", "X-UTF-8_with_ISO-8859-1_fallback", "Namn", "Ålder", false); |
7624 |
07 Mar 19 |
nicklas |
62 |
|
7624 |
07 Mar 19 |
nicklas |
63 |
write("++Testing CharsetDetector "+(ok ? "OK" : "Failed")+"\n"); |
7624 |
07 Mar 19 |
nicklas |
64 |
return ok; |
7624 |
07 Mar 19 |
nicklas |
65 |
} |
7624 |
07 Mar 19 |
nicklas |
66 |
|
7624 |
07 Mar 19 |
nicklas |
67 |
static void write(String message) |
7624 |
07 Mar 19 |
nicklas |
68 |
{ |
7624 |
07 Mar 19 |
nicklas |
69 |
System.out.println(message); |
7624 |
07 Mar 19 |
nicklas |
70 |
} |
7624 |
07 Mar 19 |
nicklas |
71 |
|
7624 |
07 Mar 19 |
nicklas |
72 |
static void test_charset(String file, String charset, String lookFor, String match, boolean shouldFail) |
7624 |
07 Mar 19 |
nicklas |
73 |
{ |
7624 |
07 Mar 19 |
nicklas |
74 |
InputStream in = null; |
7624 |
07 Mar 19 |
nicklas |
75 |
try |
7624 |
07 Mar 19 |
nicklas |
76 |
{ |
7624 |
07 Mar 19 |
nicklas |
77 |
StringDetector lineChecker = null; |
7624 |
07 Mar 19 |
nicklas |
78 |
if (lookFor != null && match != null) |
7624 |
07 Mar 19 |
nicklas |
79 |
{ |
7624 |
07 Mar 19 |
nicklas |
80 |
lineChecker = new SimpleStringDetector(lookFor, match); |
7624 |
07 Mar 19 |
nicklas |
81 |
} |
7629 |
11 Mar 19 |
nicklas |
82 |
CharsetDetector detector = new CharsetDetector(CharsetUtil.getCharset(charset), lineChecker); |
7624 |
07 Mar 19 |
nicklas |
83 |
|
7624 |
07 Mar 19 |
nicklas |
84 |
in = FileUtil.getInputStream(new java.io.File(file)); |
7624 |
07 Mar 19 |
nicklas |
85 |
boolean fileIsOk = detector.testIt(in); |
7624 |
07 Mar 19 |
nicklas |
86 |
if (!fileIsOk) |
7624 |
07 Mar 19 |
nicklas |
87 |
{ |
7624 |
07 Mar 19 |
nicklas |
88 |
throw detector.getParsingFailure(); |
7624 |
07 Mar 19 |
nicklas |
89 |
} |
7624 |
07 Mar 19 |
nicklas |
90 |
|
7624 |
07 Mar 19 |
nicklas |
91 |
if (shouldFail) |
7624 |
07 Mar 19 |
nicklas |
92 |
{ |
7624 |
07 Mar 19 |
nicklas |
93 |
throw new Exception("Charset (" + charset + ") detection should fail for file: " + file); |
7624 |
07 Mar 19 |
nicklas |
94 |
} |
7624 |
07 Mar 19 |
nicklas |
95 |
|
7624 |
07 Mar 19 |
nicklas |
96 |
write("--Charset (" + charset + ") detection for file '" + file + "' OK"); |
7624 |
07 Mar 19 |
nicklas |
97 |
} |
7624 |
07 Mar 19 |
nicklas |
98 |
catch (IOException ex) |
7624 |
07 Mar 19 |
nicklas |
99 |
{ |
7624 |
07 Mar 19 |
nicklas |
100 |
if (!shouldFail) |
7624 |
07 Mar 19 |
nicklas |
101 |
{ |
7624 |
07 Mar 19 |
nicklas |
102 |
write("--Charset (" + charset + ") detection for file '" + file + "' FAILED"); |
7624 |
07 Mar 19 |
nicklas |
103 |
ex.printStackTrace(); |
7624 |
07 Mar 19 |
nicklas |
104 |
ok = false; |
7624 |
07 Mar 19 |
nicklas |
105 |
} |
7624 |
07 Mar 19 |
nicklas |
106 |
else |
7624 |
07 Mar 19 |
nicklas |
107 |
{ |
7624 |
07 Mar 19 |
nicklas |
108 |
write("--Charset (" + charset + ") detection for file '" + file + "' failed as expected: " + ex.getMessage()); |
7624 |
07 Mar 19 |
nicklas |
109 |
} |
7624 |
07 Mar 19 |
nicklas |
110 |
} |
7624 |
07 Mar 19 |
nicklas |
111 |
catch (Throwable t) |
7624 |
07 Mar 19 |
nicklas |
112 |
{ |
7624 |
07 Mar 19 |
nicklas |
113 |
write("--Charset (" + charset + ") detection for file '" + file + "' FAILED"); |
7624 |
07 Mar 19 |
nicklas |
114 |
t.printStackTrace(); |
7624 |
07 Mar 19 |
nicklas |
115 |
ok = false; |
7624 |
07 Mar 19 |
nicklas |
116 |
} |
7624 |
07 Mar 19 |
nicklas |
117 |
finally |
7624 |
07 Mar 19 |
nicklas |
118 |
{ |
7624 |
07 Mar 19 |
nicklas |
119 |
FileUtil.close(in); |
7624 |
07 Mar 19 |
nicklas |
120 |
} |
7624 |
07 Mar 19 |
nicklas |
121 |
} |
7624 |
07 Mar 19 |
nicklas |
122 |
|
7624 |
07 Mar 19 |
nicklas |
123 |
} |
7624 |
07 Mar 19 |
nicklas |
124 |
|