3883 |
27 Apr 16 |
nicklas |
1 |
package net.sf.basedb.inca; |
3883 |
27 Apr 16 |
nicklas |
2 |
|
3883 |
27 Apr 16 |
nicklas |
3 |
import java.io.BufferedReader; |
3883 |
27 Apr 16 |
nicklas |
4 |
import java.io.IOException; |
3883 |
27 Apr 16 |
nicklas |
5 |
import java.util.HashSet; |
3883 |
27 Apr 16 |
nicklas |
6 |
import java.util.Set; |
3883 |
27 Apr 16 |
nicklas |
7 |
|
3883 |
27 Apr 16 |
nicklas |
8 |
/** |
3883 |
27 Apr 16 |
nicklas |
Parses the SCANB file containing personal numbers |
3883 |
27 Apr 16 |
nicklas |
(and some other information that we don't need). |
3883 |
27 Apr 16 |
nicklas |
The file is generated by the "INCA export" wizard |
3883 |
27 Apr 16 |
nicklas |
in Reggie. |
3883 |
27 Apr 16 |
nicklas |
13 |
|
3883 |
27 Apr 16 |
nicklas |
The file is a tab-separated text file. The first row |
3883 |
27 Apr 16 |
nicklas |
must contain column names. The only required column is the |
3883 |
27 Apr 16 |
nicklas |
"PersonalNo" column. |
3883 |
27 Apr 16 |
nicklas |
17 |
|
3883 |
27 Apr 16 |
nicklas |
@author nicklas |
3883 |
27 Apr 16 |
nicklas |
@since 1.0 |
3883 |
27 Apr 16 |
nicklas |
20 |
*/ |
3883 |
27 Apr 16 |
nicklas |
21 |
public class ScanBParser |
3883 |
27 Apr 16 |
nicklas |
22 |
{ |
3883 |
27 Apr 16 |
nicklas |
23 |
|
3883 |
27 Apr 16 |
nicklas |
24 |
private final Set<String> personalNumbers; |
3890 |
28 Apr 16 |
nicklas |
25 |
private int parsedLines; |
3883 |
27 Apr 16 |
nicklas |
26 |
|
3883 |
27 Apr 16 |
nicklas |
27 |
/** |
3883 |
27 Apr 16 |
nicklas |
Create a new parser. Use the {@link #parse(BufferedReader, String)} |
3883 |
27 Apr 16 |
nicklas |
method to start parsing. |
3883 |
27 Apr 16 |
nicklas |
30 |
*/ |
3883 |
27 Apr 16 |
nicklas |
31 |
public ScanBParser() |
3883 |
27 Apr 16 |
nicklas |
32 |
{ |
3883 |
27 Apr 16 |
nicklas |
33 |
this.personalNumbers = new HashSet<>(); |
3883 |
27 Apr 16 |
nicklas |
34 |
} |
3883 |
27 Apr 16 |
nicklas |
35 |
|
3883 |
27 Apr 16 |
nicklas |
36 |
/** |
3883 |
27 Apr 16 |
nicklas |
Parse the given ScanB file and store the personal numbers |
3883 |
27 Apr 16 |
nicklas |
found in it. |
3883 |
27 Apr 16 |
nicklas |
39 |
*/ |
3883 |
27 Apr 16 |
nicklas |
40 |
public void parse(BufferedReader reader, String filename) |
3883 |
27 Apr 16 |
nicklas |
41 |
{ |
3883 |
27 Apr 16 |
nicklas |
42 |
int personalNoIndex = -1; |
3883 |
27 Apr 16 |
nicklas |
43 |
int lineNo = 1; |
3883 |
27 Apr 16 |
nicklas |
44 |
try |
3883 |
27 Apr 16 |
nicklas |
45 |
{ |
3883 |
27 Apr 16 |
nicklas |
// The first line must be a header line |
3883 |
27 Apr 16 |
nicklas |
// Split on {tab} and search for 'PersonalNo' |
3883 |
27 Apr 16 |
nicklas |
48 |
String header = reader.readLine(); |
3883 |
27 Apr 16 |
nicklas |
49 |
if (header != null) |
3883 |
27 Apr 16 |
nicklas |
50 |
{ |
3883 |
27 Apr 16 |
nicklas |
51 |
String[] columns = header.split("\\t"); |
3883 |
27 Apr 16 |
nicklas |
52 |
for (int i = 0; i < columns.length; i++) |
3883 |
27 Apr 16 |
nicklas |
53 |
{ |
3883 |
27 Apr 16 |
nicklas |
54 |
if ("PersonalNo".equals(columns[i])) |
3883 |
27 Apr 16 |
nicklas |
55 |
{ |
3883 |
27 Apr 16 |
nicklas |
56 |
personalNoIndex = i; |
3883 |
27 Apr 16 |
nicklas |
57 |
break; |
3883 |
27 Apr 16 |
nicklas |
58 |
} |
3883 |
27 Apr 16 |
nicklas |
59 |
} |
3883 |
27 Apr 16 |
nicklas |
60 |
} |
3883 |
27 Apr 16 |
nicklas |
61 |
|
3883 |
27 Apr 16 |
nicklas |
// If 'PersonalNo' is not found, throw exception |
3883 |
27 Apr 16 |
nicklas |
63 |
if (personalNoIndex < 0) |
3883 |
27 Apr 16 |
nicklas |
64 |
{ |
3883 |
27 Apr 16 |
nicklas |
65 |
throw new RuntimeException("Could not find column 'PersonalNo' in header"); |
3883 |
27 Apr 16 |
nicklas |
66 |
} |
3883 |
27 Apr 16 |
nicklas |
67 |
|
3883 |
27 Apr 16 |
nicklas |
// Parse the rest of the file |
3883 |
27 Apr 16 |
nicklas |
// Each line should contain a personal number |
3883 |
27 Apr 16 |
nicklas |
70 |
String line = null; |
3883 |
27 Apr 16 |
nicklas |
71 |
while((line = reader.readLine()) != null) |
3883 |
27 Apr 16 |
nicklas |
72 |
{ |
3883 |
27 Apr 16 |
nicklas |
73 |
lineNo++; |
3883 |
27 Apr 16 |
nicklas |
74 |
String[] columns = line.split("\\t"); |
3883 |
27 Apr 16 |
nicklas |
75 |
if (columns.length <= personalNoIndex) |
3883 |
27 Apr 16 |
nicklas |
76 |
{ |
3883 |
27 Apr 16 |
nicklas |
77 |
throw new RuntimeException("Too few columns: " + columns.length + |
3883 |
27 Apr 16 |
nicklas |
78 |
" (expected at least " + (personalNoIndex+1)+")"); |
3883 |
27 Apr 16 |
nicklas |
79 |
} |
3883 |
27 Apr 16 |
nicklas |
80 |
personalNumbers.add(columns[personalNoIndex]); |
3883 |
27 Apr 16 |
nicklas |
81 |
} |
3883 |
27 Apr 16 |
nicklas |
82 |
} |
3883 |
27 Apr 16 |
nicklas |
83 |
catch (IOException | RuntimeException ex) |
3883 |
27 Apr 16 |
nicklas |
84 |
{ |
3883 |
27 Apr 16 |
nicklas |
85 |
throw new ImportException(filename, lineNo, ex); |
3883 |
27 Apr 16 |
nicklas |
86 |
} |
3890 |
28 Apr 16 |
nicklas |
87 |
finally |
3890 |
28 Apr 16 |
nicklas |
88 |
{ |
3890 |
28 Apr 16 |
nicklas |
89 |
parsedLines = lineNo; |
3890 |
28 Apr 16 |
nicklas |
90 |
} |
3883 |
27 Apr 16 |
nicklas |
91 |
} |
3883 |
27 Apr 16 |
nicklas |
92 |
|
3883 |
27 Apr 16 |
nicklas |
93 |
/** |
3890 |
28 Apr 16 |
nicklas |
The number of lines parsed. |
3890 |
28 Apr 16 |
nicklas |
95 |
*/ |
3890 |
28 Apr 16 |
nicklas |
96 |
public int getParsedLines() |
3890 |
28 Apr 16 |
nicklas |
97 |
{ |
3890 |
28 Apr 16 |
nicklas |
98 |
return parsedLines; |
3890 |
28 Apr 16 |
nicklas |
99 |
} |
3890 |
28 Apr 16 |
nicklas |
100 |
|
3890 |
28 Apr 16 |
nicklas |
101 |
/** |
3890 |
28 Apr 16 |
nicklas |
The number of unique personal numbers in the files. |
3890 |
28 Apr 16 |
nicklas |
103 |
*/ |
3890 |
28 Apr 16 |
nicklas |
104 |
public int getPersonalNumbers() |
3890 |
28 Apr 16 |
nicklas |
105 |
{ |
3890 |
28 Apr 16 |
nicklas |
106 |
return personalNumbers.size(); |
3890 |
28 Apr 16 |
nicklas |
107 |
} |
3890 |
28 Apr 16 |
nicklas |
108 |
|
3890 |
28 Apr 16 |
nicklas |
109 |
/** |
3883 |
27 Apr 16 |
nicklas |
Check if the given personal number is accepted or not. |
3883 |
27 Apr 16 |
nicklas |
111 |
*/ |
3883 |
27 Apr 16 |
nicklas |
112 |
public boolean isAccpted(String personalNo) |
3883 |
27 Apr 16 |
nicklas |
113 |
{ |
3883 |
27 Apr 16 |
nicklas |
114 |
return personalNumbers.contains(personalNo); |
3883 |
27 Apr 16 |
nicklas |
115 |
} |
3883 |
27 Apr 16 |
nicklas |
116 |
|
3883 |
27 Apr 16 |
nicklas |
117 |
} |