File Coverage

blib/lib/MAB2/Record/Base.pm

Criterion	Covered	Total	%
statement	174	202	86.1
branch	50	66	75.7
condition	6	9	66.6
subroutine	15	20	75.0
pod	0	10	0.0
total	245	307	79.8

line	stmt	bran	cond	sub	pod	time	code
1							package MAB2::Record::Base;
2							our $VERSION = '0.03';
3
4	2			2		1473	use Encode::MAB2;
	2					5
	2					116
5
6							=head1 NAME
7
8							MAB2::Record::Base - Access an MAB2 record
9
10							=head1 SYNOPSIS
11
12							use MAB2::Record::Base;
13
14							# Constructor
15							my $mab2raw = "00296nM2.01200024 k001 1000016-1\c^002a19890418".
16							"\c^004 20010812\c^028b1000016-1\c^029 HK00158537\c^030 aa1dc".
17							"\|m\c^036aIT\c^066 \|\c^070 9002\c^070aHBZ\c^800 Accademia Na".
18							"zionale di San Luca \c^810 Accademia di San Luca
19							"ccademia Nazionale di San Luca>\c^850aReale Accademia di San Lu".
20							"ca \c^852a45335-3\c^\c]";
21							my $mab2 = MAB2::Record::Base->new($mab2raw);
22							# $mab2 now blessed into MAB2::Record::gkd because it is a gkd record
23
24							# various representations:
25							print $mab2->id; # just the ID
26							print $mab2->readable; # quite readable
27							print $mab2->as_string; # the raw string we put into it
28							print $mab2->dump; # only useful for debugging the module itself
29
30
31							=head1 DESCRIPTION
32
33							C is the common base class for all classes
34							implementing MAB2 record types:
35
36							MAB2::Record::gkd
37							MAB2::Record::lokal
38							MAB2::Record::pnd
39							MAB2::Record::swd
40							MAB2::Record::titel
41
42							The constructor C takes a raw MAB2 record as argument and returns
43							an object which is blessed into one of the five above listed classes.
44							Some level of proficiency in dealing with MAB2 records is needed for
45							the user of this module for further processing of the objects. It is
46							recommended to use C to get acquainted with the raw format
47							of the created objects.
48
49							For illustration purpose, here is the Data::Dumper output of the full
50							object into which the sample record from the SYNOPSIS section is
51							transformed:
52
53							$VAR1 = bless( [
54							'...',
55							undef,
56							[
57							{
58							'nicht_benutzt' => [
59							' '
60							],
61							'datenanfangsadresse' => [
62							'00024'
63							],
64							'satztyp' => [
65							'k',
66							'Koerperschaftsnamensatz (MAB-GKD)'
67							],
68							'versionsangabe' => [
69							'M2.0'
70							],
71							'satzstatus' => [
72							'n',
73							'neuer Datensatz'
74							],
75							'indikatorlaenge' => [
76							'1'
77							],
78							'satzlaenge' => [
79							'00296'
80							],
81							'teilfeldkennungslaenge' => [
82							'2'
83							]
84							},
85							[
86							[
87							'001',
88							' ',
89							'1000016-1',
90							'identifikationsnummer des datensatzes'
91							],
92							[
93							'002',
94							'a',
95							'19890418',
96							'datum der ersterfassung / fremddatenuebernahme'
97							],
98							[
99							'004',
100							' ',
101							'20010812',
102							'erstellungsdatum des austauschsatzes'
103							],
104							...
105							[
106							'810',
107							' ',
108							'Accademia di San Luca
109							Luca>',
110							'1. verweisungsform zum namen der koerperschaft'
111							],
112							[
113							'850',
114							'a',
115							'Reale Accademia di San Luca ',
116							'1. frueherer, zeitweiser oder spaeterer name der koerper
117							schaft'
118							],
119							[
120							'852',
121							'a',
122							'45335-3',
123							'identifikationsnummer des 1. frueheren, zeitweisen oder
124							spaeteren namens'
125							]
126							]
127							],
128							'...'
129							], 'MAB2::Record::gkd' );
130
131
132							Please note that the object contains both the original string in its
133							own byte oriented encoding and all fields in Unicode. The conversion
134							is done by the C module.
135
136							The normal way of accessing MAB2 records is through the use of either
137							the C or C class. The C
138							class binds an MAB2 file to an array and each record in the original
139							MAB2 file to an array element starting with element 0. The
140							C class binds to a hash with the MAB2 identifier as the
141							key.
142
143							=head1 Overloading
144
145							The tied objects have their stringifier overloaded to the
146							C method so that
147
148							print $tie[1234];
149
150							always prints the record as the unaltered original input record.
151
152							=head1 SEE ALSO
153
154							C, C, C
155
156
157							=cut
158
159	2			2		10	use constant RAW => 0;
	2					5
	2					148
160	2			2		11	use constant INTERNALS => 1; # maybe nonsense: sometimes recno,
	2					6
	2					87
161							# sometimes id, whatever the caller
162							# wants to have there
163	2			2		10	use constant STRUCT => 2;
	2					4
	2					81
164	2			2		9	use constant DUMPVALUE => 3;
	2					3
	2					79
165
166	2			2		10	use strict;
	2					3
	2					63
167	2			2		1556	use overload '""' => "as_string";
	2					1202
	2					16
168
169	2			2		3008	use Dumpvalue;
	2					12830
	2					7142
170							our $DV = Dumpvalue->new(unctrl => "quote");
171
172							our $DEBUG;
173							$DEBUG = 1 unless defined $DEBUG;
174							our $NAMESPACE = "MAB2::Record";
175							my $KDocs;
176							my $RDocs;
177							our(%type2pack) = qw(
178							h titel
179							y titel
180							u titel
181							v titel
182							p pnd
183							t pnd
184							k gkd
185							w gkd
186							r swd
187							s swd
188							x swd
189							l lokal
190							e lokal
191							z lokal
192							);
193
194							{
195							local $/;
196							my $strdocs = ;
197							close DATA;
198							($KDocs, $RDocs) = __PACKAGE__->parsedoc($strdocs);
199							}
200
201							{
202							my %seen;
203							for my $pack (grep !$seen{$_}++, values %type2pack) {
204							my $req = "MAB2/Record/$pack.pm";
205							require $req;
206							}
207							}
208
209							sub parsedoc {
210	12			12	0	24	my $self = shift;
211	12					20	my $strdocs = shift;
212
213	12					348	$strdocs =~ s/ ^ .*?\n (?=\d ) //sx; # remove header
214
215	12					2524	my @docs = $strdocs =~ /\G(\d.*?\n)(?=\d\|$)/sgc; # split into subdocuments
216	12					52	my @kennungdocs;
217							my @realrecdocs;
218	12					27	for my $doc (@docs) {
219	1162					8001	$doc =~ s/\s+\z//;
220	1162	100				4373	if ($doc =~ /^\d\d?\s/) {
		100
		100
221	16					52	push @kennungdocs, [$doc];
222							} elsif ($doc =~ /^\d\d\d-/) {
223	148					220	next;
224							} elsif ($doc =~ /^\d--/) {
225	28					46	next;
226							} else {
227	970					2320	push @realrecdocs, [$doc];
228							}
229							}
230	12					25	my %seen = ();
231	12					36	for my $k (0..$#kennungdocs) {
232	16					25	my $kdoc = $kennungdocs[$k];
233	16					22	my $doc = $kdoc->[0];
234	16					60	my($line1,$kexplain) = $doc =~ /(^[^\n]+)(?:\n(.+))?/s;
235							# print "line1: $line1\n";
236	16					60	my($start,$to,$name) = $line1 =~ m{ ^ (\d+) (.{8}) \s+ (.*) }x; #
237	16					25	my $length = 0;
238	16	100				49	if ($to =~ /-\s(\d+)/) {
239	8					19	$length = $1 - $start;
240							}
241	16					19	$length++; # 0->1, 4->5 :-)
242	16					26	$name = lc $name;
243	16					30	$name =~ s/[^a-z0-9_]/_/g;
244	16	50				59	die if $seen{$name}++;
245							# print "start: $start\n";
246							# print "name: $name\n";
247							# print "kexplain: $kexplain\n" if defined $kexplain;
248	16					18	my %kexplain;
249	16	100	66			54	if ($kexplain && length $kexplain) {
250	4					443	my @code = $kexplain =~ /\G\s+([a-z])\s=\s(.*?)(?=\n\s+[a-z]\s=\s\|$)/sgc;
251	4					37	%kexplain = @code;
252	4					14	for my $e (keys %kexplain) {
253	44					82	$kexplain{$e} =~ s/^\s+//;
254	44					107	$kexplain{$e} =~ s/\s+$//;
255	44					215	$kexplain{$e} =~ s/\s+/ /gs;
256							# print "ex $e: $kexplain{$e}\n";
257							}
258							}
259	16					32	$kdoc->[1] = $start;
260	16					37	$kdoc->[2] = $length;
261	16					23	$kdoc->[3] = $name; # all uppercase hurts
262	16					45	$kdoc->[4] = \%kexplain;
263							}
264	12					25	%seen = ();
265	12					47	local $\| = 1;
266	12					29	for my $r (0..$#realrecdocs) {
267	970					1335	my $rdoc = $realrecdocs[$r];
268	970					1355	my $doc = $rdoc->[0];
269							# print "========>\n", $doc, "\n<========";
270
271							# very different from above, because "line1" can be more than one line
272	970					5932	my($line1,$rexplain) = $doc =~ /^((?:[^\n]\|\n(?!\n))+)(?:\n\n(.+))?/s;
273	970					1751	$line1 =~ s/^\s+//;
274	970					2433	$line1 =~ s/\s+$//;
275	970					4543	$line1 =~ s/\s+/ /g;
276	970					2496	$line1 =~ s/^(\d+)\s+//;
277	970					1714	my($codenr) = $1;
278	970	50				2710	die "seeing again $codenr???" if $seen{$codenr}++;
279	970	100				1505	if ($rexplain) {
280							# $rexplain =~ s/^\s+Indikator:\s+//g;
281	628					2112	$rexplain =~ s/^\s+//g;
282							} else {
283	342					461	$rexplain = "";
284							}
285							# print "self[$self]codenr[$codenr]rexplain[$rexplain]\n" if defined $rexplain;
286	970					1775	$rdoc->[1] = $codenr;
287	970					1312	$rdoc->[2] = undef;
288	970					1522	$rdoc->[3] = $line1;
289	970					1942	$rdoc->[4] = $rexplain; # XXX this needs to become more useful
290							# than just plain text
291							}
292	12					19	my $end = $#realrecdocs;
293	12					26	for my $r (0..$end) {
294	970	100	100			3445	next unless $realrecdocs[$r][4] && $realrecdocs[$r][4] eq "...";
295	22					49	my $after_yadda = $realrecdocs[$r+1][1];
296							# print "Found >>...<< in $realrecdocs[$r][1], need to fill upto $after_yadda";
297
298							# Ich will vielleicht eine Zahl in diesem Text hochzaehlen
299	22	100				102	if (my($foundnumber) = $realrecdocs[$r][3] =~ /(\d+)/) {
300	18					23	my $step = 1;
301	18					28	my $rr3 = $realrecdocs[$r][3];
302	18	100				92	if ($rr3 eq "ZUSAETZLICHE ANGABEN ZUR 2. VERWEISUNGSFORM") {
		100
		100
		100
		100
		100
		50
303	2					3	$step = 2;
304							} elsif ($rr3 eq "IDENTIFIKATIONSNUMMER DES 2. FRUEHEREN, ZEITWEISEN ODER SPAETEREN NAMENS DER KOERPERSCHAFT") {
305							# gkd
306	2					3	$step = 3;
307							} elsif ($rr3 eq "ERLAEUTERUNGEN ZUR 2. SCHLAGWORTKETTE") {
308	6					9	$step = 5;
309							} elsif ($rr3 eq "KOERPERSCHAFT, BEI DER DIE 2. PERSON BESCHAEFTIGT IST") {
310	2					4	$step = 4;
311							} elsif ($rr3 eq "IDENTIFIKATIONSNUMMER DES KOERPERSCHAFTSNAMENSATZES DER 2. KOERPERSCHAFT") {
312	2					5	$step = 2;
313							} elsif ($rr3 eq "ZUSAETZE ZUM 2. PARALLELSACHTITEL") {
314	2					4	$step = 4;
315							} elsif ($rr3 eq "SACHTITEL DER 2. NE") {
316	2					4	$step = 6;
317							}
318	18					34	my $before_yadda = $realrecdocs[$r][1];
319	18					38	for my $offset (1..$step) {
320							# warn "offset[$offset]";
321	72					101	my $first = $before_yadda + $offset;
322	72					87	my $blueprint = $first - $step;
323							# warn "first[$first]blueprint[$blueprint]";
324	72					74	my $blueprintrec;
325	72					99	for my $rr (@realrecdocs) {
326							# warn "DEBUG: rr1[$rr->[1]]";
327	7792	100				16040	next unless $rr->[1] == $blueprint;
328	66					78	$blueprintrec = $rr;
329	66					90	last;
330							}
331	72	100				142	next unless $blueprintrec;
332	66	50				180	die "Unexpected blueprintrec3[$blueprintrec->[3]]"
333							unless $blueprintrec->[3] =~ /2/;
334	66					94	my $sprintf = $blueprintrec->[3];
335	66					189	$sprintf =~ s/2/%d/;
336	66					93	my $foundnumber = 2;
337	66					160	for (my $nr = $first; $nr<$after_yadda; $nr+=$step) {
338	694					3504	push @realrecdocs, [
339							">>>generated<<<",
340							sprintf("%03d", $nr),
341							undef,
342							sprintf($sprintf,++$foundnumber),
343							undef
344							];
345							}
346							}
347							} else {
348	4					15	for my $i ($realrecdocs[$r][1]+1..$after_yadda-1) { # $after_yadda (sans -1) XXX
349	16					84	push @realrecdocs, [
350							">>>same as $realrecdocs[$r][1]<<<",
351							sprintf("%03d", $i),
352							undef,
353							$realrecdocs[$r][3],
354							undef
355							];
356							}
357							}
358							}
359							# Now realrecdocs is unsorted, but we prefer it as a hash anyway
360	12					21	my %realrecdocs;
361	12					25	for my $rdoc (@realrecdocs) {
362	1680					3055	$realrecdocs{$rdoc->[1]} = $rdoc;
363							}
364
365	12					258	return(\@kennungdocs,\%realrecdocs);
366							}
367
368							sub new {
369	2			2	0	4855	my($me,$raw,$key) = @_;
370	2		33			15	my $self = bless [$raw,$key], ref $me \|\| $me;
371	2	50				7	if ( my $pack = $self->_class() ) { # was $struct->[0]{satztyp}[0]
372	2					8	bless $self, "MAB2::Record::$pack";
373							} else {
374	0					0	die "Couldn't determine class.";
375							}
376	2					5	$self;
377							}
378
379							sub as_string {
380	1			1	0	9	my($self) = @_;
381	1					49	$self->[RAW];
382							}
383
384							sub readable {
385	2			2	0	5	my($self) = @_;
386	2					9	$self->_struct;
387	2					3	my @m;
388	2					3	my $base = $self->[STRUCT][0];
389	2					5	my $cont = $self->[STRUCT][1];
390	2					13	for my $k (sort keys %$base) {
391	16					18	my $v;
392	16	100				17	if (@{$base->{$k}}>1) {
	16					39
393	4					6	$v = sprintf "%s (%s)", @{$base->{$k}};
	4					13
394							} else {
395	12					18	$v = $base->{$k}[0];
396							}
397	16					60	push @m, sprintf "%-25s: %s", $k, $v;
398							}
399	2					5	for my $sr (@$cont) {
400	26					39	my $print = sprintf "%3s %1s %s [%s]", map { Dumpvalue::unctrl($_); } @$sr;
	104					594
401	26					244	if (0 && $print =~ /[^\040-\177]/) {
402							$print .= sprintf("\n=%s\n=%s",
403							Encode::encode("ascii",$sr->[2],Encode::FB_XMLCREF()),
404							$sr->[2],
405							);
406							}
407	26					43	push @m, $print;
408							}
409	2					21	join "\n", @m;
410							}
411
412							sub dump {
413	0			0	0	0	my($self) = @_;
414	0					0	require Data::Dumper;
415	0					0	$Data::Dumper::Indent = 1;
416	0					0	$self->_struct;
417	0					0	my $x = Data::Dumper::Dumper($self);
418	0					0	$x =~ s/\[\n\s+/[/g;
419	0					0	$x =~ s/\n\s+\]/]/g;
420	0					0	$x =~ s/',\n\s+'/', '/g;
421	0					0	$x;
422							}
423
424							sub _class {
425	2			2		3	my $self = shift;
426	2					43	my $type = substr($self->[RAW],23,1);
427	2	50				8	warn "ALERT: type[$type]" unless exists $type2pack{$type};
428	2					8	$type2pack{$type};
429							}
430
431							sub id {
432	0			0	0	0	my $self = shift;
433	0					0	my $id;
434	0					0	if (0) { # 228 secunden fuer Datei 12 (Keywords) ohne debug
435							# my $struct = $self->_struct;
436							# $id = $struct->[1][0][0] eq "001" ? $struct->[1][0][2] : die;
437							} else { # 67 secs fuer gleiche Arbeit, 852 secs fuer 01
438	0					0	my $raw = $self->as_string;
439	0					0	($id) = substr($raw,28) =~ m/([^\c^\c]]+)/;
440							}
441							# warn "id[$id]";
442							# die Dumpvalue::unctrl("id1[$id1]id2[$id2]") unless $id1 eq $id2;
443	0					0	$id;
444							}
445
446							sub _struct {
447	2			2		3	my $self = shift;
448	2	50				8	return $self->[STRUCT] if $self->[STRUCT];
449	2					2	my $struct;
450	2	50				19	if ($DEBUG) {
451	2					9	$self->[DUMPVALUE] = $DV->stringify($self->[RAW]);
452							}
453	2					138	my $derec = Encode::decode("MAB2",$self->[RAW]);
454	2					9550	pos $derec = 0;
455	2					7	for my $k (@$KDocs) {
456	16					30	my $re = "."x$k->[2];
457	16	50				201	$struct->[0]{$k->[3]}[0] = $1 if $derec =~ /\G($re)/gc;
458							##########^ 0=base/kennungsdocs
459	16	50				39	if ($DEBUG) {
460	16	100				65	$struct->[0]{$k->[3]}[1] = $k->[4]{$1} if %{$k->[4]};
	16					63
461							}
462							}
463							warn "ALERT: Datenanfangsadresse nicht 24!" unless
464	2	50				8	(my $daa = $struct->[0]{datenanfangsadresse}[0]) == 24;
465							# ^^^^^^^^^
466
467							# avoid using stringdata in numeric context, because it turns on IOK
468							# or something and the next print prints "24" instead of "00024"
469
470							# strp = structpart of the record
471	2					50	my(@strp) = $derec =~ / \G (\d\d\d) (.) ([^\c^]+) (?: \c] \| \c^ )/xgc; #
472	2					5	my(@str);
473	2					6	while (@strp) {
474	26	50				61	die "Invalid strp" unless @strp >=3;
475	26					64	my $str = [ splice @strp, 0, 3 ];
476	26	50				63	if ($DEBUG) {
477							# die Bezeichnung des Feldes im "real" Record. Da dort alles
478							# Uppercase ist, muessen wir lc nehmen, sonst erschlaegt uns das
479	26	50				88	$str->[3] = $str->[0] ? lc $self->segmentname($str->[0]) : "UNDEF";
480							}
481	26					68	push @str, $str;
482							}
483	2					4	$struct->[1] = \@str;
484	2					7	$self->[STRUCT] = $struct;
485							}
486
487							sub segmentname {
488	18			18	0	24	my $self = shift;
489	18					22	my $rec = shift;
490	18					90	$RDocs->{$rec}[3];
491							}
492
493							sub subrecords {
494	0			0	0		my($self) = shift;
495	0						$self->_struct;
496	0						@{$self->[STRUCT][1]};
	0
497							}
498
499							sub subrecords_ref {
500	0			0	0		my($self) = shift;
501	0						$self->_struct;
502	0						$self->[STRUCT][1];
503							}
504
505							sub date_004 {
506	0			0	0		my($self) = @_;
507	0						my $sr = $self->subrecords_ref;
508	0						for my $i (0..$#$sr) {
509	0	0					next unless $sr->[$i][0] eq "004";
510	0						return $sr->[$i][2];
511							}
512							}
513
514							1;
515
516							# segm000.txt
517							__DATA__