line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MAB2::Record::Base; |
2
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
3
|
|
|
|
|
|
|
|
4
|
2
|
|
|
2
|
|
1755
|
use Encode::MAB2; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
153
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
MAB2::Record::Base - Access an MAB2 record |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 SYNOPSIS |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use MAB2::Record::Base; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Constructor |
15
|
|
|
|
|
|
|
my $mab2raw = "00296nM2.01200024 k001 1000016-1\c^002a19890418". |
16
|
|
|
|
|
|
|
"\c^004 20010812\c^028b1000016-1\c^029 HK00158537\c^030 aa1dc". |
17
|
|
|
|
|
|
|
"|m\c^036aIT\c^066 |\c^070 9002\c^070aHBZ\c^800 Accademia Na". |
18
|
|
|
|
|
|
|
"zionale di San Luca \c^810 Accademia di San Luca
|
19
|
|
|
|
|
|
|
"ccademia Nazionale di San Luca>\c^850aReale Accademia di San Lu". |
20
|
|
|
|
|
|
|
"ca \c^852a45335-3\c^\c]"; |
21
|
|
|
|
|
|
|
my $mab2 = MAB2::Record::Base->new($mab2raw); |
22
|
|
|
|
|
|
|
# $mab2 now blessed into MAB2::Record::gkd because it is a gkd record |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# various representations: |
25
|
|
|
|
|
|
|
print $mab2->id; # just the ID |
26
|
|
|
|
|
|
|
print $mab2->readable; # quite readable |
27
|
|
|
|
|
|
|
print $mab2->as_string; # the raw string we put into it |
28
|
|
|
|
|
|
|
print $mab2->dump; # only useful for debugging the module itself |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
C is the common base class for all classes |
34
|
|
|
|
|
|
|
implementing MAB2 record types: |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
MAB2::Record::gkd |
37
|
|
|
|
|
|
|
MAB2::Record::lokal |
38
|
|
|
|
|
|
|
MAB2::Record::pnd |
39
|
|
|
|
|
|
|
MAB2::Record::swd |
40
|
|
|
|
|
|
|
MAB2::Record::titel |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
The constructor C takes a raw MAB2 record as argument and returns |
43
|
|
|
|
|
|
|
an object which is blessed into one of the five above listed classes. |
44
|
|
|
|
|
|
|
Some level of proficiency in dealing with MAB2 records is needed for |
45
|
|
|
|
|
|
|
the user of this module for further processing of the objects. It is |
46
|
|
|
|
|
|
|
recommended to use C to get acquainted with the raw format |
47
|
|
|
|
|
|
|
of the created objects. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
For illustration purpose, here is the Data::Dumper output of the full |
50
|
|
|
|
|
|
|
object into which the sample record from the SYNOPSIS section is |
51
|
|
|
|
|
|
|
transformed: |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$VAR1 = bless( [ |
54
|
|
|
|
|
|
|
'...', |
55
|
|
|
|
|
|
|
undef, |
56
|
|
|
|
|
|
|
[ |
57
|
|
|
|
|
|
|
{ |
58
|
|
|
|
|
|
|
'nicht_benutzt' => [ |
59
|
|
|
|
|
|
|
' ' |
60
|
|
|
|
|
|
|
], |
61
|
|
|
|
|
|
|
'datenanfangsadresse' => [ |
62
|
|
|
|
|
|
|
'00024' |
63
|
|
|
|
|
|
|
], |
64
|
|
|
|
|
|
|
'satztyp' => [ |
65
|
|
|
|
|
|
|
'k', |
66
|
|
|
|
|
|
|
'Koerperschaftsnamensatz (MAB-GKD)' |
67
|
|
|
|
|
|
|
], |
68
|
|
|
|
|
|
|
'versionsangabe' => [ |
69
|
|
|
|
|
|
|
'M2.0' |
70
|
|
|
|
|
|
|
], |
71
|
|
|
|
|
|
|
'satzstatus' => [ |
72
|
|
|
|
|
|
|
'n', |
73
|
|
|
|
|
|
|
'neuer Datensatz' |
74
|
|
|
|
|
|
|
], |
75
|
|
|
|
|
|
|
'indikatorlaenge' => [ |
76
|
|
|
|
|
|
|
'1' |
77
|
|
|
|
|
|
|
], |
78
|
|
|
|
|
|
|
'satzlaenge' => [ |
79
|
|
|
|
|
|
|
'00296' |
80
|
|
|
|
|
|
|
], |
81
|
|
|
|
|
|
|
'teilfeldkennungslaenge' => [ |
82
|
|
|
|
|
|
|
'2' |
83
|
|
|
|
|
|
|
] |
84
|
|
|
|
|
|
|
}, |
85
|
|
|
|
|
|
|
[ |
86
|
|
|
|
|
|
|
[ |
87
|
|
|
|
|
|
|
'001', |
88
|
|
|
|
|
|
|
' ', |
89
|
|
|
|
|
|
|
'1000016-1', |
90
|
|
|
|
|
|
|
'identifikationsnummer des datensatzes' |
91
|
|
|
|
|
|
|
], |
92
|
|
|
|
|
|
|
[ |
93
|
|
|
|
|
|
|
'002', |
94
|
|
|
|
|
|
|
'a', |
95
|
|
|
|
|
|
|
'19890418', |
96
|
|
|
|
|
|
|
'datum der ersterfassung / fremddatenuebernahme' |
97
|
|
|
|
|
|
|
], |
98
|
|
|
|
|
|
|
[ |
99
|
|
|
|
|
|
|
'004', |
100
|
|
|
|
|
|
|
' ', |
101
|
|
|
|
|
|
|
'20010812', |
102
|
|
|
|
|
|
|
'erstellungsdatum des austauschsatzes' |
103
|
|
|
|
|
|
|
], |
104
|
|
|
|
|
|
|
... |
105
|
|
|
|
|
|
|
[ |
106
|
|
|
|
|
|
|
'810', |
107
|
|
|
|
|
|
|
' ', |
108
|
|
|
|
|
|
|
'Accademia di San Luca
|
109
|
|
|
|
|
|
|
Luca>', |
110
|
|
|
|
|
|
|
'1. verweisungsform zum namen der koerperschaft' |
111
|
|
|
|
|
|
|
], |
112
|
|
|
|
|
|
|
[ |
113
|
|
|
|
|
|
|
'850', |
114
|
|
|
|
|
|
|
'a', |
115
|
|
|
|
|
|
|
'Reale Accademia di San Luca ', |
116
|
|
|
|
|
|
|
'1. frueherer, zeitweiser oder spaeterer name der koerper |
117
|
|
|
|
|
|
|
schaft' |
118
|
|
|
|
|
|
|
], |
119
|
|
|
|
|
|
|
[ |
120
|
|
|
|
|
|
|
'852', |
121
|
|
|
|
|
|
|
'a', |
122
|
|
|
|
|
|
|
'45335-3', |
123
|
|
|
|
|
|
|
'identifikationsnummer des 1. frueheren, zeitweisen oder |
124
|
|
|
|
|
|
|
spaeteren namens' |
125
|
|
|
|
|
|
|
] |
126
|
|
|
|
|
|
|
] |
127
|
|
|
|
|
|
|
], |
128
|
|
|
|
|
|
|
'...' |
129
|
|
|
|
|
|
|
], 'MAB2::Record::gkd' ); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
Please note that the object contains both the original string in its |
133
|
|
|
|
|
|
|
own byte oriented encoding and all fields in Unicode. The conversion |
134
|
|
|
|
|
|
|
is done by the C module. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
The normal way of accessing MAB2 records is through the use of either |
137
|
|
|
|
|
|
|
the C or C class. The C |
138
|
|
|
|
|
|
|
class binds an MAB2 file to an array and each record in the original |
139
|
|
|
|
|
|
|
MAB2 file to an array element starting with element 0. The |
140
|
|
|
|
|
|
|
C class binds to a hash with the MAB2 identifier as the |
141
|
|
|
|
|
|
|
key. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head1 Overloading |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The tied objects have their stringifier overloaded to the |
146
|
|
|
|
|
|
|
C method so that |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
print $tie[1234]; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
always prints the record as the unaltered original input record. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 SEE ALSO |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
C, C, C |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
2
|
|
|
2
|
|
13
|
use constant RAW => 0; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
144
|
|
160
|
2
|
|
|
2
|
|
10
|
use constant INTERNALS => 1; # maybe nonsense: sometimes recno, |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
111
|
|
161
|
|
|
|
|
|
|
# sometimes id, whatever the *caller* |
162
|
|
|
|
|
|
|
# wants to have there |
163
|
2
|
|
|
2
|
|
22
|
use constant STRUCT => 2; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
110
|
|
164
|
2
|
|
|
2
|
|
11
|
use constant DUMPVALUE => 3; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
90
|
|
165
|
|
|
|
|
|
|
|
166
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
104
|
|
167
|
2
|
|
|
2
|
|
2269
|
use overload '""' => "as_string"; |
|
2
|
|
|
|
|
1637
|
|
|
2
|
|
|
|
|
18
|
|
168
|
|
|
|
|
|
|
|
169
|
2
|
|
|
2
|
|
2328
|
use Dumpvalue; |
|
2
|
|
|
|
|
10148
|
|
|
2
|
|
|
|
|
6000
|
|
170
|
|
|
|
|
|
|
our $DV = Dumpvalue->new(unctrl => "quote"); |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
our $DEBUG; |
173
|
|
|
|
|
|
|
$DEBUG = 1 unless defined $DEBUG; |
174
|
|
|
|
|
|
|
our $NAMESPACE = "MAB2::Record"; |
175
|
|
|
|
|
|
|
my $KDocs; |
176
|
|
|
|
|
|
|
my $RDocs; |
177
|
|
|
|
|
|
|
our(%type2pack) = qw( |
178
|
|
|
|
|
|
|
h titel |
179
|
|
|
|
|
|
|
y titel |
180
|
|
|
|
|
|
|
u titel |
181
|
|
|
|
|
|
|
v titel |
182
|
|
|
|
|
|
|
p pnd |
183
|
|
|
|
|
|
|
t pnd |
184
|
|
|
|
|
|
|
k gkd |
185
|
|
|
|
|
|
|
w gkd |
186
|
|
|
|
|
|
|
r swd |
187
|
|
|
|
|
|
|
s swd |
188
|
|
|
|
|
|
|
x swd |
189
|
|
|
|
|
|
|
l lokal |
190
|
|
|
|
|
|
|
e lokal |
191
|
|
|
|
|
|
|
z lokal |
192
|
|
|
|
|
|
|
); |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
{ |
195
|
|
|
|
|
|
|
local $/; |
196
|
|
|
|
|
|
|
my $strdocs = ; |
197
|
|
|
|
|
|
|
close DATA; |
198
|
|
|
|
|
|
|
($KDocs, $RDocs) = __PACKAGE__->parsedoc($strdocs); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
{ |
202
|
|
|
|
|
|
|
my %seen; |
203
|
|
|
|
|
|
|
for my $pack (grep !$seen{$_}++, values %type2pack) { |
204
|
|
|
|
|
|
|
my $req = "MAB2/Record/$pack.pm"; |
205
|
|
|
|
|
|
|
require $req; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub parsedoc { |
210
|
12
|
|
|
12
|
0
|
22
|
my $self = shift; |
211
|
12
|
|
|
|
|
19
|
my $strdocs = shift; |
212
|
|
|
|
|
|
|
|
213
|
12
|
|
|
|
|
401
|
$strdocs =~ s/ ^ .*?\n ( ?=\d ) //sx; # remove header |
214
|
|
|
|
|
|
|
|
215
|
12
|
|
|
|
|
3063
|
my @docs = $strdocs =~ /\G(\d.*?\n)(?=\d|$)/sgc; # split into subdocuments |
216
|
12
|
|
|
|
|
54
|
my @kennungdocs; |
217
|
|
|
|
|
|
|
my @realrecdocs; |
218
|
12
|
|
|
|
|
27
|
for my $doc (@docs) { |
219
|
1162
|
|
|
|
|
8253
|
$doc =~ s/\s+\z//; |
220
|
1162
|
100
|
|
|
|
4134
|
if ($doc =~ /^\d\d?\s/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
221
|
16
|
|
|
|
|
51
|
push @kennungdocs, [$doc]; |
222
|
|
|
|
|
|
|
} elsif ($doc =~ /^\d\d\d-/) { |
223
|
148
|
|
|
|
|
198
|
next; |
224
|
|
|
|
|
|
|
} elsif ($doc =~ /^\d--/) { |
225
|
28
|
|
|
|
|
42
|
next; |
226
|
|
|
|
|
|
|
} else { |
227
|
970
|
|
|
|
|
2173
|
push @realrecdocs, [$doc]; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
} |
230
|
12
|
|
|
|
|
28
|
my %seen = (); |
231
|
12
|
|
|
|
|
40
|
for my $k (0..$#kennungdocs) { |
232
|
16
|
|
|
|
|
25
|
my $kdoc = $kennungdocs[$k]; |
233
|
16
|
|
|
|
|
22
|
my $doc = $kdoc->[0]; |
234
|
16
|
|
|
|
|
77
|
my($line1,$kexplain) = $doc =~ /(^[^\n]+)(?:\n(.+))?/s; |
235
|
|
|
|
|
|
|
# print "line1: $line1\n"; |
236
|
16
|
|
|
|
|
269
|
my($start,$to,$name) = $line1 =~ m{ ^ (\d+) (.{8}) \s+ (.*) }x; # |
237
|
16
|
|
|
|
|
20
|
my $length = 0; |
238
|
16
|
100
|
|
|
|
52
|
if ($to =~ /-\s(\d+)/) { |
239
|
8
|
|
|
|
|
22
|
$length = $1 - $start; |
240
|
|
|
|
|
|
|
} |
241
|
16
|
|
|
|
|
17
|
$length++; # 0->1, 4->5 :-) |
242
|
16
|
|
|
|
|
25
|
$name = lc $name; |
243
|
16
|
|
|
|
|
32
|
$name =~ s/[^a-z0-9_]/_/g; |
244
|
16
|
50
|
|
|
|
61
|
die if $seen{$name}++; |
245
|
|
|
|
|
|
|
# print "start: $start\n"; |
246
|
|
|
|
|
|
|
# print "name: $name\n"; |
247
|
|
|
|
|
|
|
# print "kexplain: $kexplain\n" if defined $kexplain; |
248
|
16
|
|
|
|
|
18
|
my %kexplain; |
249
|
16
|
100
|
66
|
|
|
64
|
if ($kexplain && length $kexplain) { |
250
|
4
|
|
|
|
|
515
|
my @code = $kexplain =~ /\G\s+([a-z])\s=\s(.*?)(?=\n\s+[a-z]\s=\s|$)/sgc; |
251
|
4
|
|
|
|
|
32
|
%kexplain = @code; |
252
|
4
|
|
|
|
|
14
|
for my $e (keys %kexplain) { |
253
|
44
|
|
|
|
|
71
|
$kexplain{$e} =~ s/^\s+//; |
254
|
44
|
|
|
|
|
114
|
$kexplain{$e} =~ s/\s+$//; |
255
|
44
|
|
|
|
|
213
|
$kexplain{$e} =~ s/\s+/ /gs; |
256
|
|
|
|
|
|
|
# print "ex $e: $kexplain{$e}\n"; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
16
|
|
|
|
|
34
|
$kdoc->[1] = $start; |
260
|
16
|
|
|
|
|
41
|
$kdoc->[2] = $length; |
261
|
16
|
|
|
|
|
24
|
$kdoc->[3] = $name; # all uppercase hurts |
262
|
16
|
|
|
|
|
62
|
$kdoc->[4] = \%kexplain; |
263
|
|
|
|
|
|
|
} |
264
|
12
|
|
|
|
|
22
|
%seen = (); |
265
|
12
|
|
|
|
|
53
|
local $| = 1; |
266
|
12
|
|
|
|
|
26
|
for my $r (0..$#realrecdocs) { |
267
|
970
|
|
|
|
|
1211
|
my $rdoc = $realrecdocs[$r]; |
268
|
970
|
|
|
|
|
1062
|
my $doc = $rdoc->[0]; |
269
|
|
|
|
|
|
|
# print "========>\n", $doc, "\n<========"; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# very different from above, because "line1" can be more than one line |
272
|
970
|
|
|
|
|
6104
|
my($line1,$rexplain) = $doc =~ /^((?:[^\n]|\n(?!\n))+)(?:\n\n(.+))?/s; |
273
|
970
|
|
|
|
|
1673
|
$line1 =~ s/^\s+//; |
274
|
970
|
|
|
|
|
2376
|
$line1 =~ s/\s+$//; |
275
|
970
|
|
|
|
|
4914
|
$line1 =~ s/\s+/ /g; |
276
|
970
|
|
|
|
|
2637
|
$line1 =~ s/^(\d+)\s+//; |
277
|
970
|
|
|
|
|
1560
|
my($codenr) = $1; |
278
|
970
|
50
|
|
|
|
2867
|
die "seeing again $codenr???" if $seen{$codenr}++; |
279
|
970
|
100
|
|
|
|
1356
|
if ($rexplain) { |
280
|
|
|
|
|
|
|
# $rexplain =~ s/^\s+Indikator:\s+//g; |
281
|
628
|
|
|
|
|
2173
|
$rexplain =~ s/^\s+//g; |
282
|
|
|
|
|
|
|
} else { |
283
|
342
|
|
|
|
|
415
|
$rexplain = ""; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
# print "self[$self]codenr[$codenr]rexplain[$rexplain]\n" if defined $rexplain; |
286
|
970
|
|
|
|
|
1773
|
$rdoc->[1] = $codenr; |
287
|
970
|
|
|
|
|
1163
|
$rdoc->[2] = undef; |
288
|
970
|
|
|
|
|
1502
|
$rdoc->[3] = $line1; |
289
|
970
|
|
|
|
|
2117
|
$rdoc->[4] = $rexplain; # XXX this needs to become more useful |
290
|
|
|
|
|
|
|
# than just plain text |
291
|
|
|
|
|
|
|
} |
292
|
12
|
|
|
|
|
26
|
my $end = $#realrecdocs; |
293
|
12
|
|
|
|
|
25
|
for my $r (0..$end) { |
294
|
970
|
100
|
100
|
|
|
3369
|
next unless $realrecdocs[$r][4] && $realrecdocs[$r][4] eq "..."; |
295
|
22
|
|
|
|
|
54
|
my $after_yadda = $realrecdocs[$r+1][1]; |
296
|
|
|
|
|
|
|
# print "Found >>...<< in $realrecdocs[$r][1], need to fill upto $after_yadda"; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Ich will vielleicht eine Zahl in diesem Text hochzaehlen |
299
|
22
|
100
|
|
|
|
96
|
if (my($foundnumber) = $realrecdocs[$r][3] =~ /(\d+)/) { |
300
|
18
|
|
|
|
|
25
|
my $step = 1; |
301
|
18
|
|
|
|
|
26
|
my $rr3 = $realrecdocs[$r][3]; |
302
|
18
|
100
|
|
|
|
97
|
if ($rr3 eq "ZUSAETZLICHE ANGABEN ZUR 2. VERWEISUNGSFORM") { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
303
|
2
|
|
|
|
|
3
|
$step = 2; |
304
|
|
|
|
|
|
|
} elsif ($rr3 eq "IDENTIFIKATIONSNUMMER DES 2. FRUEHEREN, ZEITWEISEN ODER SPAETEREN NAMENS DER KOERPERSCHAFT") { |
305
|
|
|
|
|
|
|
# gkd |
306
|
2
|
|
|
|
|
5
|
$step = 3; |
307
|
|
|
|
|
|
|
} elsif ($rr3 eq "ERLAEUTERUNGEN ZUR 2. SCHLAGWORTKETTE") { |
308
|
6
|
|
|
|
|
8
|
$step = 5; |
309
|
|
|
|
|
|
|
} elsif ($rr3 eq "KOERPERSCHAFT, BEI DER DIE 2. PERSON BESCHAEFTIGT IST") { |
310
|
2
|
|
|
|
|
4
|
$step = 4; |
311
|
|
|
|
|
|
|
} elsif ($rr3 eq "IDENTIFIKATIONSNUMMER DES KOERPERSCHAFTSNAMENSATZES DER 2. KOERPERSCHAFT") { |
312
|
2
|
|
|
|
|
5
|
$step = 2; |
313
|
|
|
|
|
|
|
} elsif ($rr3 eq "ZUSAETZE ZUM 2. PARALLELSACHTITEL") { |
314
|
2
|
|
|
|
|
3
|
$step = 4; |
315
|
|
|
|
|
|
|
} elsif ($rr3 eq "SACHTITEL DER 2. NE") { |
316
|
2
|
|
|
|
|
5
|
$step = 6; |
317
|
|
|
|
|
|
|
} |
318
|
18
|
|
|
|
|
36
|
my $before_yadda = $realrecdocs[$r][1]; |
319
|
18
|
|
|
|
|
31
|
for my $offset (1..$step) { |
320
|
|
|
|
|
|
|
# warn "offset[$offset]"; |
321
|
72
|
|
|
|
|
95
|
my $first = $before_yadda + $offset; |
322
|
72
|
|
|
|
|
83
|
my $blueprint = $first - $step; |
323
|
|
|
|
|
|
|
# warn "first[$first]blueprint[$blueprint]"; |
324
|
72
|
|
|
|
|
63
|
my $blueprintrec; |
325
|
72
|
|
|
|
|
86
|
for my $rr (@realrecdocs) { |
326
|
|
|
|
|
|
|
# warn "DEBUG: rr1[$rr->[1]]"; |
327
|
7792
|
100
|
|
|
|
13854
|
next unless $rr->[1] == $blueprint; |
328
|
66
|
|
|
|
|
74
|
$blueprintrec = $rr; |
329
|
66
|
|
|
|
|
77
|
last; |
330
|
|
|
|
|
|
|
} |
331
|
72
|
100
|
|
|
|
137
|
next unless $blueprintrec; |
332
|
66
|
50
|
|
|
|
173
|
die "Unexpected blueprintrec3[$blueprintrec->[3]]" |
333
|
|
|
|
|
|
|
unless $blueprintrec->[3] =~ /2/; |
334
|
66
|
|
|
|
|
88
|
my $sprintf = $blueprintrec->[3]; |
335
|
66
|
|
|
|
|
175
|
$sprintf =~ s/2/%d/; |
336
|
66
|
|
|
|
|
79
|
my $foundnumber = 2; |
337
|
66
|
|
|
|
|
153
|
for (my $nr = $first; $nr<$after_yadda; $nr+=$step) { |
338
|
694
|
|
|
|
|
3252
|
push @realrecdocs, [ |
339
|
|
|
|
|
|
|
">>>generated<<<", |
340
|
|
|
|
|
|
|
sprintf("%03d", $nr), |
341
|
|
|
|
|
|
|
undef, |
342
|
|
|
|
|
|
|
sprintf($sprintf,++$foundnumber), |
343
|
|
|
|
|
|
|
undef |
344
|
|
|
|
|
|
|
]; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
} else { |
348
|
4
|
|
|
|
|
11
|
for my $i ($realrecdocs[$r][1]+1..$after_yadda-1) { # $after_yadda (sans -1) XXX |
349
|
16
|
|
|
|
|
77
|
push @realrecdocs, [ |
350
|
|
|
|
|
|
|
">>>same as $realrecdocs[$r][1]<<<", |
351
|
|
|
|
|
|
|
sprintf("%03d", $i), |
352
|
|
|
|
|
|
|
undef, |
353
|
|
|
|
|
|
|
$realrecdocs[$r][3], |
354
|
|
|
|
|
|
|
undef |
355
|
|
|
|
|
|
|
]; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
# Now realrecdocs is unsorted, but we prefer it as a hash anyway |
360
|
12
|
|
|
|
|
21
|
my %realrecdocs; |
361
|
12
|
|
|
|
|
22
|
for my $rdoc (@realrecdocs) { |
362
|
1680
|
|
|
|
|
4055
|
$realrecdocs{$rdoc->[1]} = $rdoc; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
12
|
|
|
|
|
350
|
return(\@kennungdocs,\%realrecdocs); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub new { |
369
|
2
|
|
|
2
|
0
|
116
|
my($me,$raw,$key) = @_; |
370
|
2
|
|
33
|
|
|
18
|
my $self = bless [$raw,$key], ref $me || $me; |
371
|
2
|
50
|
|
|
|
8
|
if ( my $pack = $self->_class() ) { # was $struct->[0]{satztyp}[0] |
372
|
2
|
|
|
|
|
9
|
bless $self, "MAB2::Record::$pack"; |
373
|
|
|
|
|
|
|
} else { |
374
|
0
|
|
|
|
|
0
|
die "Couldn't determine class."; |
375
|
|
|
|
|
|
|
} |
376
|
2
|
|
|
|
|
7
|
$self; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub as_string { |
380
|
1
|
|
|
1
|
0
|
12
|
my($self) = @_; |
381
|
1
|
|
|
|
|
77
|
$self->[RAW]; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
sub readable { |
385
|
2
|
|
|
2
|
0
|
5
|
my($self) = @_; |
386
|
2
|
|
|
|
|
12
|
$self->_struct; |
387
|
2
|
|
|
|
|
4
|
my @m; |
388
|
2
|
|
|
|
|
5
|
my $base = $self->[STRUCT][0]; |
389
|
2
|
|
|
|
|
6
|
my $cont = $self->[STRUCT][1]; |
390
|
2
|
|
|
|
|
20
|
for my $k (sort keys %$base) { |
391
|
16
|
|
|
|
|
20
|
my $v; |
392
|
16
|
100
|
|
|
|
15
|
if (@{$base->{$k}}>1) { |
|
16
|
|
|
|
|
43
|
|
393
|
4
|
|
|
|
|
8
|
$v = sprintf "%s (%s)", @{$base->{$k}}; |
|
4
|
|
|
|
|
17
|
|
394
|
|
|
|
|
|
|
} else { |
395
|
12
|
|
|
|
|
24
|
$v = $base->{$k}[0]; |
396
|
|
|
|
|
|
|
} |
397
|
16
|
|
|
|
|
76
|
push @m, sprintf "%-25s: %s", $k, $v; |
398
|
|
|
|
|
|
|
} |
399
|
2
|
|
|
|
|
9
|
for my $sr (@$cont) { |
400
|
26
|
|
|
|
|
43
|
my $print = sprintf "%3s %1s %s [%s]", map { Dumpvalue::unctrl($_); } @$sr; |
|
104
|
|
|
|
|
728
|
|
401
|
26
|
|
|
|
|
299
|
if (0 && $print =~ /[^\040-\177]/) { |
402
|
|
|
|
|
|
|
$print .= sprintf("\n=%s\n=%s", |
403
|
|
|
|
|
|
|
Encode::encode("ascii",$sr->[2],Encode::FB_XMLCREF()), |
404
|
|
|
|
|
|
|
$sr->[2], |
405
|
|
|
|
|
|
|
); |
406
|
|
|
|
|
|
|
} |
407
|
26
|
|
|
|
|
59
|
push @m, $print; |
408
|
|
|
|
|
|
|
} |
409
|
2
|
|
|
|
|
50
|
join "\n", @m; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
sub dump { |
413
|
0
|
|
|
0
|
0
|
0
|
my($self) = @_; |
414
|
0
|
|
|
|
|
0
|
require Data::Dumper; |
415
|
0
|
|
|
|
|
0
|
$Data::Dumper::Indent = 1; |
416
|
0
|
|
|
|
|
0
|
$self->_struct; |
417
|
0
|
|
|
|
|
0
|
my $x = Data::Dumper::Dumper($self); |
418
|
0
|
|
|
|
|
0
|
$x =~ s/\[\n\s+/[/g; |
419
|
0
|
|
|
|
|
0
|
$x =~ s/\n\s+\]/]/g; |
420
|
0
|
|
|
|
|
0
|
$x =~ s/',\n\s+'/', '/g; |
421
|
0
|
|
|
|
|
0
|
$x; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub _class { |
425
|
2
|
|
|
2
|
|
3
|
my $self = shift; |
426
|
2
|
|
|
|
|
50
|
my $type = substr($self->[RAW],23,1); |
427
|
2
|
50
|
|
|
|
8
|
warn "ALERT: type[$type]" unless exists $type2pack{$type}; |
428
|
2
|
|
|
|
|
8
|
$type2pack{$type}; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub id { |
432
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
433
|
0
|
|
|
|
|
0
|
my $id; |
434
|
0
|
|
|
|
|
0
|
if (0) { # 228 secunden fuer Datei 12 (Keywords) ohne debug |
435
|
|
|
|
|
|
|
# my $struct = $self->_struct; |
436
|
|
|
|
|
|
|
# $id = $struct->[1][0][0] eq "001" ? $struct->[1][0][2] : die; |
437
|
|
|
|
|
|
|
} else { # 67 secs fuer gleiche Arbeit, 852 secs fuer 01 |
438
|
0
|
|
|
|
|
0
|
my $raw = $self->as_string; |
439
|
0
|
|
|
|
|
0
|
($id) = substr($raw,28) =~ m/([^\c^\c]]+)/; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
# warn "id[$id]"; |
442
|
|
|
|
|
|
|
# die Dumpvalue::unctrl("id1[$id1]id2[$id2]") unless $id1 eq $id2; |
443
|
0
|
|
|
|
|
0
|
$id; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub _struct { |
447
|
2
|
|
|
2
|
|
4
|
my $self = shift; |
448
|
2
|
50
|
|
|
|
8
|
return $self->[STRUCT] if $self->[STRUCT]; |
449
|
2
|
|
|
|
|
4
|
my $struct; |
450
|
2
|
50
|
|
|
|
12
|
if ($DEBUG) { |
451
|
2
|
|
|
|
|
12
|
$self->[DUMPVALUE] = $DV->stringify($self->[RAW]); |
452
|
|
|
|
|
|
|
} |
453
|
2
|
|
|
|
|
172
|
my $derec = Encode::decode("MAB2",$self->[RAW]); |
454
|
2
|
|
|
|
|
125
|
pos $derec = 0; |
455
|
2
|
|
|
|
|
7
|
for my $k (@$KDocs) { |
456
|
16
|
|
|
|
|
35
|
my $re = "."x$k->[2]; |
457
|
16
|
50
|
|
|
|
292
|
$struct->[0]{$k->[3]}[0] = $1 if $derec =~ /\G($re)/gc; |
458
|
|
|
|
|
|
|
##########^ 0=base/kennungsdocs |
459
|
16
|
50
|
|
|
|
45
|
if ($DEBUG) { |
460
|
16
|
100
|
|
|
|
19
|
$struct->[0]{$k->[3]}[1] = $k->[4]{$1} if %{$k->[4]}; |
|
16
|
|
|
|
|
72
|
|
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
} |
463
|
2
|
50
|
|
|
|
9
|
warn "ALERT: Datenanfangsadresse nicht 24!" unless |
464
|
|
|
|
|
|
|
(my $daa = $struct->[0]{datenanfangsadresse}[0]) == 24; |
465
|
|
|
|
|
|
|
# ^^^^^^^^^ |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# avoid using stringdata in numeric context, because it turns on IOK |
468
|
|
|
|
|
|
|
# or something and the next print prints "24" instead of "00024" |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
# strp = structpart of the record |
471
|
2
|
|
|
|
|
63
|
my(@strp) = $derec =~ / \G (\d\d\d) (.) ([^\c^]+) (?: \c] | \c^ )/xgc; # |
472
|
2
|
|
|
|
|
7
|
my(@str); |
473
|
2
|
|
|
|
|
7
|
while (@strp) { |
474
|
26
|
50
|
|
|
|
59
|
die "Invalid strp" unless @strp >=3; |
475
|
26
|
|
|
|
|
70
|
my $str = [ splice @strp, 0, 3 ]; |
476
|
26
|
50
|
|
|
|
67
|
if ($DEBUG) { |
477
|
|
|
|
|
|
|
# die Bezeichnung des Feldes im "real" Record. Da dort alles |
478
|
|
|
|
|
|
|
# Uppercase ist, muessen wir lc nehmen, sonst erschlaegt uns das |
479
|
26
|
50
|
|
|
|
106
|
$str->[3] = $str->[0] ? lc $self->segmentname($str->[0]) : "UNDEF"; |
480
|
|
|
|
|
|
|
} |
481
|
26
|
|
|
|
|
87
|
push @str, $str; |
482
|
|
|
|
|
|
|
} |
483
|
2
|
|
|
|
|
6
|
$struct->[1] = \@str; |
484
|
2
|
|
|
|
|
8
|
$self->[STRUCT] = $struct; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub segmentname { |
488
|
18
|
|
|
18
|
0
|
27
|
my $self = shift; |
489
|
18
|
|
|
|
|
24
|
my $rec = shift; |
490
|
18
|
|
|
|
|
119
|
$RDocs->{$rec}[3]; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
sub subrecords { |
494
|
0
|
|
|
0
|
0
|
|
my($self) = shift; |
495
|
0
|
|
|
|
|
|
$self->_struct; |
496
|
0
|
|
|
|
|
|
@{$self->[STRUCT][1]}; |
|
0
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
sub subrecords_ref { |
500
|
0
|
|
|
0
|
0
|
|
my($self) = shift; |
501
|
0
|
|
|
|
|
|
$self->_struct; |
502
|
0
|
|
|
|
|
|
$self->[STRUCT][1]; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
sub date_004 { |
506
|
0
|
|
|
0
|
0
|
|
my($self) = @_; |
507
|
0
|
|
|
|
|
|
my $sr = $self->subrecords_ref; |
508
|
0
|
|
|
|
|
|
for my $i (0..$#$sr) { |
509
|
0
|
0
|
|
|
|
|
next unless $sr->[$i][0] eq "004"; |
510
|
0
|
|
|
|
|
|
return $sr->[$i][2]; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
1; |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# segm000.txt |
517
|
|
|
|
|
|
|
__DATA__ |