File Coverage

blib/lib/Catmandu/Fix/marc_spec.pm
Criterion Covered Total %
statement 31 31 100.0
branch 2 2 100.0
condition 9 10 90.0
subroutine 6 6 100.0
pod 0 1 0.0
total 48 50 96.0


line stmt bran cond sub pod time code
1             package Catmandu::Fix::marc_spec;
2              
3 2     2   633 use Catmandu::Sane;
  2         5  
  2         12  
4 2     2   963 use Catmandu::MARC;
  2         5  
  2         54  
5 2     2   14 use Moo;
  2         3  
  2         8  
6 2     2   1146 use Catmandu::Fix::Has;
  2         1547  
  2         13  
7              
8             with 'Catmandu::Fix::Base';
9              
10             our $VERSION = '1.20';
11              
12             has spec => ( fix_arg=> 1 );
13             has path => ( fix_arg=> 1 );
14             has split => ( fix_opt=> 1 );
15             has join => ( fix_opt=> 1 );
16             has value => ( fix_opt=> 1 );
17             has pluck => ( fix_opt=> 1 );
18             has invert => ( fix_opt=> 1 );
19             has nested_arrays => ( fix_opt=> 1 );
20              
21             sub emit {
22 40     40 0 42894 my ( $self, $fixer ) = @_;
23 40         137 my $path = $fixer->split_path( $self->path );
24 40         958 my $key = $path->[-1];
25 40         135 my $marc_obj = Catmandu::MARC->instance;
26              
27             # Precompile the marc_path to gain some speed
28 40         964 my $spec = $marc_obj->parse_marc_spec( $self->spec );
29 40         47834 my $marc = $fixer->capture($marc_obj);
30 40         2345 my $marc_spec = $fixer->capture($spec);
31 40 100 100     2604 my $marc_opt = $fixer->capture({
      100        
      100        
      50        
      100        
32             '-join' => $self->join // '' ,
33             '-split' => $self->split // 0 ,
34             '-pluck' => $self->pluck // 0 ,
35             '-nested_arrays' => $self->nested_arrays // 0 ,
36             '-invert' => $self->invert // 0 ,
37             '-value' => $self->value ,
38             '-force_array' => ($key =~ /^(\$.*|[0-9]+)$/) ? 1 : 0
39             });
40              
41 40         2615 my $var = $fixer->var;
42 40         256 my $result = $fixer->generate_var;
43 40         1317 my $current_value = $fixer->generate_var;
44              
45 40         1316 my $perl = "";
46 40         96 $perl .= $fixer->emit_declare_vars($current_value, "[]");
47 40         430 $perl .=<<EOF;
48             if (defined(my ${result} = ${marc}->marc_spec(
49             ${var},
50             ${marc_spec},
51             ${marc_opt})) ) {
52             ${result} = ref(${result}) ? ${result} : [${result}];
53             for ${current_value} (\@{${result}}) {
54             EOF
55              
56             $perl .= $fixer->emit_create_path(
57             $var,
58             $path,
59             sub {
60 40     40   7226 my $var2 = shift;
61 40         108 "${var2} = ${current_value}"
62             }
63 40         192 );
64              
65 40         474 $perl .=<<EOF;
66             }
67             }
68             EOF
69 40         137 $perl;
70             }
71              
72             1;
73              
74             __END__
75              
76             =encoding utf-8
77              
78             =head1 NAME
79              
80             Catmandu::Fix::marc_spec - reference MARC values via
81             L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>
82              
83             =head1 SYNOPSIS
84              
85             In a fix file e.g. 'my.fix':
86              
87             # Assign value of MARC leader to my.ldr.all
88             marc_spec('LDR', my.ldr.all)
89              
90             # Assign values of all subfields of field 245 as a joined string
91             marc_spec('245', my.title.all)
92              
93             # If field 245 exists, set string 'the title' as the value of my.title.default
94             marc_spec('245', my.title.default, value:'the title')
95              
96             # Assign values of all subfields of every field 650 to my.subjects.all
97             # as a joined string
98             marc_spec('650', my.subjects.all)
99              
100             # Same as above with joining characters '###'
101             marc_spec('650', my.subjects.all, join:'###')
102              
103             # Same as above but added as an element to the array my.append.subjects
104             marc_spec('650', my.append.subjects.$append, join:'###')
105              
106             # Every value of a subfield will be an array element
107             marc_spec('650', my.split.subjects, split:1)
108              
109             # Assign values of all subfields of all fields having indicator 1 = 1
110             # and indicator 2 = 0 to the my.fields.indicators10 array.
111             marc_spec('..._10', my.fields.indicators10.$append)
112              
113             # Assign first four characters of leader to my.firstcharpos.ldr
114             marc_spec('LDR/0-3', my.firstcharpos.ldr)
115              
116             # Assign last four characters of leader to my.lastcharpos.ldr
117             marc_spec('LDR/#-3', my.lastcharpos.ldr)
118              
119             # Assign value of subfield a of field 245 to my.title.proper
120             marc_spec('245$a', my.title.proper)
121              
122             # Assign first two characters of subfield a of field 245 to my.title.proper
123             marc_spec('245$a/0-1', my.title.charpos)
124              
125             # Assign all subfields of second field 650 to my.second.subject
126             marc_spec('650[1]', my.second.subject)
127              
128             # Assign values of all subfields of last field 650 to my.last.subject
129             marc_spec('650[#]', my.last.subject)
130              
131             # Assign an array of values of all subfields of the first two fields 650
132             # to my.two.split.subjects
133             marc_spec('650[0-1]', my.two.split.subjects, split:1)
134              
135             # Assign a joined string of values of all subfields of the last two fields 650
136             # to my.two.join.subjects
137             marc_spec('650[#-1]', my.two.join.subjects, join:'###')
138              
139              
140             # Assign value of first subfield a of all fields 020 to my.isbn.number
141             marc_spec('020$a[0]', my.isbn.number)
142              
143             # Assign value of first subfield q of first field 020 to my.isbn.qual.one
144             marc_spec('020[0]$q[0]', my.isbn.qual.none)
145              
146             # Assign values of subfield q and a in the order stated as an array
147             # to my.isbns.pluck.all
148             # without option 'pluck:1' the elments will be in 'natural' order
149             # see example below
150             marc_spec('020$q$a', my.isbns.pluck.all, split:1, pluck:1)
151              
152             # Assign value of last subfield q and second subfield a
153             # in 'natural' order of last field 020 as an array to my.isbn.qual.other
154             marc_spec('020[#]$q[#]$a[1]', my.isbn.qual.other, split:1)
155              
156             # Assign first five characters of value of last subfield q and last character
157             # of value of second subfield a in 'natural' order of all fields 020
158             # as an array to my.isbn.qual.substring.other
159             marc_spec('020$q[#]/0-4$a[1]/#', my.isbn.qual.substring.other, split:1)
160              
161             # Assign values of of all other subfields than a of field 020
162             # to my.isbn.other.subfields
163             marc_spec('020$a' my.isbn.other.subfields, invert:1)
164              
165             # Assign value of subfield a of field 245 only, if subfield a of field 246
166             # with value 1 for indicator1 exists
167             marc_spec('245$a{246_1$a}', my.var.title)
168              
169             And then on command line:
170              
171             catmandu convert MARC to YAML --fix my.fix < perl_books.mrc
172              
173             See L<Catmandu Importers|http://librecat.org/Catmandu/#importers> and
174             L<Catmandu Fixes|http://librecat.org/Catmandu/#fixes> for a deeper
175             understanding of how L<Catmandu|http://librecat.org/> works.
176              
177             =head1 DESCRIPTION
178              
179             L<Catmandu::Fix::marc_spec|Catmandu::Fix::marc_spec> is a fix for the
180             famous L<Catmandu Framework|Catmandu>.
181              
182             For the most part it behaves like
183             L<Catmandu::Fix::marc_map|Catmandu::Fix::marc_map> , but has a more fine
184             grained method to reference MARC data content.
185              
186             See L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>
187             for documentation on the path syntax.
188              
189             =head1 METHODS
190              
191             =head2 marc_spec(MARCspec, JSON_PATH, OPT:VAL, OPT2:VAL,...)
192              
193             First parameter must be a string, following the syntax of
194             L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>.
195             Do always use single quotes with this first parameter.
196              
197             Second parameter is a string describing the variable or the variable path
198             to assign referenced values to
199             (see L<Catmandu Paths|http://librecat.org/Catmandu/#paths>).
200              
201             You may use one of $first, $last, $prepend or $append to add
202             referenced data values to a specific position of an array
203             (see L<Catmandu Wildcards|http://librecat.org/Catmandu/#wildcards> and
204             mapping rules at L<https://github.com/LibreCat/Catmandu-MARC/wiki/Mapping-rules>).
205              
206             # INPUT
207             [245,1,0,"a","Cross-platform Perl /","c","Eric F. Johnson."]
208              
209             # CALL
210             marc_spec('245', my.title.$append)
211              
212             # OUTPUT
213             {
214             my {
215             title [
216             [0] "Cross-platform Perl /Eric F. Johnson."
217             ]
218             }
219              
220             }
221              
222             Third and every other parameters are optional and must
223             be in the form of key:value (see L</"OPTONS"> for a deeper
224             understanding of options).
225              
226             =head1 OPTIONS
227              
228             =head2 split: 0|1
229              
230             If split is set to 1, every fixed fields value or every subfield will be
231             an array element.
232              
233             # INPUT
234             [650," ",0,"a","Perl (Computer program language)"],
235             [650," ",0,"a","Web servers."]
236              
237             # CALL
238             marc_spec('650', my.subjects, split:1)
239              
240             # OUTPUT
241             {
242             my {
243             subjects [
244             [0] "Perl (Computer program language)",
245             [1] "Web servers."
246             ]
247             }
248             }
249              
250             See split mapping rules at L<https://github.com/LibreCat/Catmandu-MARC/wiki/Mapping-rules>.
251              
252              
253             =head2 nested_arrays: 0|1
254              
255             Using the nested_array
256             option the output will be an array of array of strings (one array item for
257             each matched field, one array of strings for each matched subfield).
258              
259             # INPUT
260             [650," ",0,"a","Perl (Computer program language)"],
261             [650," ",0,"a","Web servers."]
262              
263             # CALL
264             marc_spec('650', my.subjects, nested_arrays:1)
265              
266             # OUTPUT
267             {
268             my {
269             subjects [
270             [0] [
271             [0] "Perl (Computer program language)"
272             ]
273             [1] [
274             [0] "Web servers."
275             ]
276             ]
277             }
278             }
279              
280             See nested_array mapping rules at L<https://github.com/LibreCat/Catmandu-MARC/wiki/Mapping-rules>.
281              
282             =head2 join: Str
283              
284             If set, value of join will be used to join the referenced data content.
285             This will only have an effect if option split is undefined (not set or set to 0).
286              
287             # INPUT
288             [650," ",0,"a","Perl (Computer program language)"],
289             [650," ",0,"a","Web servers."]
290              
291             # CALL
292             marc_spec('650', my.subjects, join:'###')
293              
294             # OUTPUT
295             {
296             my {
297             subjects "Perl (Computer program language)###Web servers."
298             }
299             }
300              
301             =head2 pluck: 0|1
302              
303             This has only an effect on subfield values. By default subfield reference
304             happens in 'natural' order (first number 0 to 9 and then letters a to z).
305              
306             # INPUT
307             ["020"," ", " ","a","0491001304","q","black leather"]
308              
309             # CALL
310             marc_spec('020$q$a', my.isbn, split:1)
311              
312             # OUTPUT
313             {
314             my {
315             isbn [
316             [0] 0491001304,
317             [1] "black leather"
318             ]
319             }
320             }
321              
322              
323             If pluck is set to 1, values will be referenced by the order stated in the
324             MARCspec.
325              
326             # INPUT
327             ["020"," ", " ","a","0491001304","q","black leather"]
328              
329             # CALL
330             marc_spec('020$q$a', my.plucked.isbn, split:1, pluck:1)
331              
332             # OUTPUT
333             {
334             my {
335             isbn [
336             [0] "black leather",
337             [1] 0491001304
338             ]
339             }
340             }
341              
342             =head2 value: Str
343              
344             If set to a value, this value will be assigned to $var if MARCspec references
345             data content (if the field or subfield exists).
346              
347             In case two or more subfields are referenced, the value will be assigned to $var if
348             at least one of them exists:
349              
350             # INPUT
351             ["020"," ", " ","a","0491001304"]
352              
353             # CALL
354             marc_spec('020$a$q', my.isbn, value:'one subfield exists')
355              
356             # OUTPUT
357             {
358             my {
359             isbn "one subfield exists"
360             }
361             }
362              
363             =head2 invert: 0|1
364              
365             This has only an effect on subfields (values). If set to 1 it will invert the
366             last pattern for every subfield. E.g.
367              
368             # references all subfields but not subfield a and q
369             marc_spec('020$a$q' my.other.subfields, invert:1)
370              
371             # references all subfields but not subfield a and not the last repetition
372             # of subfield q
373             marc_spec('020$a$q[#]' my.other.subfields, invert:1)
374              
375             # references all but not the last two characters of first subfield a
376             marc_spec('020$a[0]/#-1' my.other.subfields, invert:1)
377              
378             Invert will not work with subspecs.
379              
380             =head1 INLINE
381              
382             This Fix can be used inline in a Perl script:
383              
384             use Catmandu::Fix::marc_spec as => 'marc_spec';
385              
386             my $data = { record => [...] };
387              
388             $data = marc_spec($data,'245$a','title');
389              
390             print $data->{title} , "\n";
391              
392             =head1 SEE ALSO
393              
394             L<Catmandu::Fix>
395             L<Catmandu::Fix::marc_map>
396              
397             =head1 AUTHOR
398              
399             Carsten Klee E<lt>klee@cpan.orgE<gt>
400              
401             =head1 CONTRIBUTORS
402              
403             =over
404              
405             =item * Johann Rolschewski, C<< <jorol at cpan> >>,
406              
407             =item * Patrick Hochstenbach, C<< <patrick.hochstenbach at ugent.be> >>,
408              
409             =item * Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>
410              
411             =back
412              
413             =head1 LICENSE AND COPYRIGHT
414              
415             This library is free software; you can redistribute it and/or modify
416             it under the same terms as Perl itself.
417              
418             =head1 SEE ALSO
419              
420             =over
421              
422             =item * L<MARCspec - A common MARC record path language|http://marcspec.github.io/MARCspec/>
423              
424             =item * L<Catmandu|http://librecat.org/>
425              
426             =item * L<Catmandu Importers|http://librecat.org/Catmandu/#importers>
427              
428             =item * L<Catmandu Importers|http://librecat.org/Catmandu/#importers>
429              
430             =item * L<Catmandu Fixes|http://librecat.org/Catmandu/#fixes>
431              
432             =item * L<Catmandu::MARC::Fix::marc_map|Catmandu::MARC::Fix::marc_map>
433              
434             =item * L<Catmandu Paths|http://librecat.org/Catmandu/#paths>
435              
436             =item * L<Catmandu Wildcards|http://librecat.org/Catmandu/#wildcards>
437              
438             =item * L<MARC::Spec|MARC::Spec>
439              
440             =item * L<Catmandu::Fix|Catmandu::Fix>
441              
442             =item * L<Catmandu::MARC|Catmandu::MARC>
443              
444             =back
445              
446             =cut