line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!perl |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package MARC::File::MARCMaker; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
MARC::File::MARCMaker -- Work with MARCMaker/MARCBreaker records. |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=cut |
10
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
119051
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
67
|
|
12
|
2
|
|
|
2
|
|
9
|
use integer; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
12
|
|
13
|
|
|
|
|
|
|
|
14
|
2
|
|
|
2
|
|
46
|
use vars qw( $VERSION $ERROR ); |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
120
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
$VERSION = 0.05; |
17
|
|
|
|
|
|
|
|
18
|
2
|
|
|
2
|
|
10
|
use MARC::File; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
49
|
|
19
|
2
|
|
|
2
|
|
8
|
use vars qw( @ISA ); @ISA = qw( MARC::File ); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
90
|
|
20
|
|
|
|
|
|
|
|
21
|
2
|
|
|
2
|
|
10
|
use MARC::Record qw( LEADER_LEN ); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
137
|
|
22
|
2
|
|
|
2
|
|
10
|
use constant SUBFIELD_INDICATOR => "\x24"; #dollar sign |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
479
|
|
23
|
2
|
|
|
2
|
|
13
|
use constant END_OF_FIELD => "\n\x3D"; #line break, equals sign |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
326
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYNOPSIS |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
use MARC::File::MARCMaker; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my $file = MARC::File::MARCMaker->in( $filename ); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
while ( my $marc = $file->next() ) { |
34
|
|
|
|
|
|
|
# Do something |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
$file->close(); |
37
|
|
|
|
|
|
|
undef $file; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
#################################################### |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
use MARC::File::MARCMaker; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
## reading with MARC::Batch |
44
|
|
|
|
|
|
|
my $batch = MARC::Batch->new( 'MARCMaker', $filename ); |
45
|
|
|
|
|
|
|
my $record = $batch->next(); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
## or reading with MARC::File::MARCMaker explicitly |
48
|
|
|
|
|
|
|
my $file = MARC::File::MARCMaker->in( $filename ); |
49
|
|
|
|
|
|
|
my $record = $file->next(); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
## output a single MARC::Record object in MARCMaker format (formatted plain text) |
52
|
|
|
|
|
|
|
#print $record->as_marcmaker(); #goal syntax |
53
|
|
|
|
|
|
|
print MARC::File::MARCMaker->encode($record); #current syntax |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 DESCRIPTION |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The MARC-File-MARCMaker distribution is an extension to the MARC-Record |
58
|
|
|
|
|
|
|
distribution for working with MARC21 data using the format used by the Library |
59
|
|
|
|
|
|
|
of Congress MARCMaker and MARCBreaker programs. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
More information may be obtained here: L |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
You must have MARC::Record installed to use MARC::File::MARCMaker. In fact |
64
|
|
|
|
|
|
|
once you install the MARC-File-MARCMaker distribution you will most likely not |
65
|
|
|
|
|
|
|
use it directly, but will have an additional file format available to you |
66
|
|
|
|
|
|
|
when you use MARC::Batch. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
This module is based on code from the original MARC.pm module, as well as the |
69
|
|
|
|
|
|
|
MARC::Record distribution's MARC::File::USMARC and MARC::File::MicroLIF modules. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 DEVIATIONS FROM LC'S DOCUMENTATION |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
LC's MARCMaker/MARCBreaker programs require files to have DOS line endings. |
74
|
|
|
|
|
|
|
This module should be capable of reading any type of line ending. |
75
|
|
|
|
|
|
|
It converts existing endings to "\n", the endings of the platform. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Initial version may or may not work well with line breaks in the middle of a field. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
MARCMaker version of the LDR (record size bytes) will not necessarily be dependable, and should not be relied upon. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 EXPORT |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
None. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 TODO |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Do limit tests in filling the buffer and getting chunks. Seems to work for first fill, but may fail on larger reads/multiple reads to fill the buffer. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Test special characters (those requiring escapes). Initial version may not fully support non-English characters. All MARC-8 may work, Unicode support is untested and unassured. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Implement better character encoding and decoding, including Unicode support. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Work on character set internal subs for both input and output. Currently, the original subs from MARC.pm are being used essentially as-is. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Error checking for line breaks vs. new fields? Probably not possible, since line breaks are allowed within fields, so checking for missing equals sign is not really possible. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Account for multiple occurences of =LDR in a single record, usually caused by lack of blank line between records, so records get mushed together. Also check for multiple =001s. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Determine why the constant SUBFIELD_INDICATOR can't be used in the split into subfields. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Work on encode(). |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Allow as_marcmaker() to be called with either MARC::Field or MARC::Record objects, returning the appropriate result. Desired behavior is as_usmarc() methods in MARC::Record and MARC::Field |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Decode should mostly be working. Test for correctness. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Remove unnecessary code and documentation, remnants of the initial development of the module. Move internal subs to end of module? |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 VERSION HISTORY |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Version 0.05: First CPAN release, Oct. 30, 2005. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Version 0.04: Updated Oct. 22, 2005. Released Oct. 23, 2005. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
-Initial commit to CVS on SourceForge |
116
|
|
|
|
|
|
|
-Misc. cleanup. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Version 0.03: Updated Aug. 2, 2005. Released Aug. 14, 2005. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
-Revised decode() to fix problem with dollar sign conversion from mnemonics to characters. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Version 0.02: Updated July 12-13, 2005. Released July 16, 2005. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
-Preliminary version of encode() for fields and records |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Version 0.01: Initial version, Nov. 21, 2004-Mar. 7, 2005. Released Mar. 7, 2005. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
-Basic version, translates .mrk format file into MARC::Record objects. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=for internal |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
############################################################ |
133
|
|
|
|
|
|
|
This section is copied from MARC::File::MicroLIF. |
134
|
|
|
|
|
|
|
############################################################ |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
The buffer must be large enough to handle any valid record because |
137
|
|
|
|
|
|
|
we don't check for cases like a CR/LF pair or an end-of-record/CR/LF |
138
|
|
|
|
|
|
|
trio being only partially in the buffer. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
The max valid record is the max MARC record size (99999) plus one |
141
|
|
|
|
|
|
|
or two characters per tag (CR, LF, or CR/LF). It's hard to say |
142
|
|
|
|
|
|
|
what the max number of tags is, so here we use 6000. (6000 tags |
143
|
|
|
|
|
|
|
can be squeezed into a MARC record only if every tag has only one |
144
|
|
|
|
|
|
|
subfield containing a maximum of one character, or if data from |
145
|
|
|
|
|
|
|
multiple tags overlaps in the MARC record body. We're pretty safe.) |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
2
|
|
|
2
|
|
12
|
use constant BUFFER_MIN => (99999 + 6000 * 2); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
14358
|
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 METHODS |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
################################## |
156
|
|
|
|
|
|
|
### START OF MARCMAKER METHODS ### |
157
|
|
|
|
|
|
|
################################## |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 _next (merged from MicroLIF and USMARC) |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Called by MARC::File::next(). |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _next { #done for MARCMaker? |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#_get_chunk will separate records from each other and should convert |
170
|
|
|
|
|
|
|
# line endings to those of the platform. |
171
|
0
|
|
|
|
|
0
|
my $makerrec = $self->_get_chunk(); |
172
|
|
|
|
|
|
|
# for ease, make sure the newlines match this platform |
173
|
0
|
0
|
|
|
|
0
|
$makerrec =~ s/[\x0d\x0a]+/\n/g if defined $makerrec; |
174
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
0
|
return $makerrec; |
176
|
|
|
|
|
|
|
} #_next |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 decode( $string [, \&filter_func ] ) |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
(description based on MARC::File::USMARC::decode POD information) |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Constructor for handling data from a MARCMaker file. This function takes care |
183
|
|
|
|
|
|
|
of all the tag directory parsing & mangling. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Any warnings or coercions can be checked in the C function. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
The C<$filter_func> is an optional reference to a user-supplied function |
188
|
|
|
|
|
|
|
that determines on a tag-by-tag basis if you want the tag passed to it |
189
|
|
|
|
|
|
|
to be put into the MARC record. The function is passed the tag number |
190
|
|
|
|
|
|
|
and the raw tag data, and must return a boolean. The return of a true |
191
|
|
|
|
|
|
|
value tells MARC::File::MARCMaker::decode that the tag should get put into |
192
|
|
|
|
|
|
|
the resulting MARC record. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
For example, if you only want title and subject tags in your MARC record, |
195
|
|
|
|
|
|
|
try this: |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub filter { |
198
|
|
|
|
|
|
|
my ($tagno,$tagdata) = @_; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
my $marc = MARC::File::MARCMaker->decode( $string, \&filter ); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Why would you want to do such a thing? The big reason is that creating |
206
|
|
|
|
|
|
|
fields is processor-intensive, and if your program is doing read-only |
207
|
|
|
|
|
|
|
data analysis and needs to be as fast as possible, you can save time by |
208
|
|
|
|
|
|
|
not creating fields that you'll be ignoring anyway. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Another possible use is if you're only interested in printing certain |
211
|
|
|
|
|
|
|
tags from the record, then you can filter them when you read from disc |
212
|
|
|
|
|
|
|
and not have to delete unwanted tags yourself. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=cut |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub decode { #MARCMaker |
218
|
|
|
|
|
|
|
|
219
|
1
|
|
|
1
|
1
|
22
|
my $text; |
220
|
1
|
|
|
|
|
2
|
my $location = ''; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
## decode can be called in a variety of ways |
223
|
|
|
|
|
|
|
## $object->decode( $string ) |
224
|
|
|
|
|
|
|
## MARC::File::MARCMaker->decode( $string ) |
225
|
|
|
|
|
|
|
## MARC::File::MARCMaker::decode( $string ) |
226
|
|
|
|
|
|
|
## this bit of code covers all three |
227
|
|
|
|
|
|
|
|
228
|
1
|
|
|
|
|
2
|
my $self = shift; |
229
|
1
|
50
|
|
|
|
5
|
if ( ref($self) =~ /^MARC::File/ ) { |
230
|
0
|
|
|
|
|
0
|
$location = 'in record '.$self->{recnum}; |
231
|
0
|
|
|
|
|
0
|
$text = shift; |
232
|
|
|
|
|
|
|
} else { |
233
|
1
|
|
|
|
|
2
|
$location = 'in record 1'; |
234
|
1
|
50
|
|
|
|
5
|
$text = $self=~/MARC::File/ ? shift : $self; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
1
|
|
|
|
|
3
|
my $filter_func = shift; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# for ease, make the newlines match this platform |
240
|
|
|
|
|
|
|
# this has probably already been taken care of at least once, but just in case |
241
|
1
|
50
|
|
|
|
25
|
$text =~ s/[\x0d\x0a]+/\n/g if defined $text; |
242
|
|
|
|
|
|
|
|
243
|
1
|
|
|
|
|
6
|
my $marc = MARC::Record->new(); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
#report improperly passed $text (undefined $text) |
246
|
1
|
50
|
|
|
|
12
|
return $marc->_warn( "Unable to retrieve a record string $location" ) unless defined $text; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
############################# |
249
|
|
|
|
|
|
|
#### Charset work needed #### |
250
|
|
|
|
|
|
|
############################# |
251
|
|
|
|
|
|
|
#use default charset until that function is revised |
252
|
1
|
|
|
|
|
3
|
my $charset = usmarc_default(); |
253
|
|
|
|
|
|
|
############################# |
254
|
|
|
|
|
|
|
############################# |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
#Split each record on the "\n=" into the @lines array |
258
|
1
|
|
|
|
|
10
|
my @lines=split END_OF_FIELD, $text; |
259
|
1
|
|
|
|
|
2
|
my $leader = shift @lines; |
260
|
1
|
50
|
|
|
|
5
|
unless ($leader =~ /^=LDR /) { |
261
|
0
|
|
|
|
|
0
|
$marc->_warn( "First line must begin with =LDR" ); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
1
|
|
|
|
|
3
|
$leader=~s/^=LDR //; #Remove "=LDR " |
265
|
1
|
|
|
|
|
3
|
$leader=~s/[\n\r]//g; #remove line endings |
266
|
1
|
|
|
|
|
2
|
$leader=~s/\\/ /g; # substitute " " for \ |
267
|
|
|
|
|
|
|
#report error if result is not 24 bytes long |
268
|
1
|
50
|
|
|
|
4
|
unless (length($leader) == LEADER_LEN) { |
269
|
0
|
|
|
|
|
0
|
$marc->_warn( "Leader must be exactly 24 bytes long" ); |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
#add leader to the record |
273
|
1
|
|
|
|
|
6
|
$marc->leader( substr( $leader, 0, LEADER_LEN ) ); |
274
|
|
|
|
|
|
|
|
275
|
1
|
|
|
|
|
9
|
LINE: foreach my $line (@lines) { |
276
|
|
|
|
|
|
|
#Remove newlines from $line ; and also substitute " " for \ |
277
|
7
|
|
|
|
|
131
|
$line=~s/[\n\r]//g; |
278
|
7
|
|
|
|
|
23
|
$line=~s/\\/ /g; |
279
|
|
|
|
|
|
|
#get the tag name |
280
|
7
|
|
|
|
|
13
|
my $tagno = substr($line,0,3); |
281
|
|
|
|
|
|
|
# Check tag validity |
282
|
7
|
50
|
|
|
|
17
|
( $tagno =~ /^[0-9A-Za-z]{3}$/ ) or $marc->_warn( "Invalid tag in $location: \"$tagno\"" ); |
283
|
|
|
|
|
|
|
|
284
|
7
|
100
|
66
|
|
|
38
|
if ( ($tagno =~ /^\d+$/ ) && ( $tagno < 10 ) ) { |
285
|
|
|
|
|
|
|
#translate characters for tag data |
286
|
|
|
|
|
|
|
#revise line below as needed for _maker2char |
287
|
2
|
|
|
|
|
6
|
my $tagdata = _maker2char ( substr( $line, 5 ), $charset ); |
288
|
|
|
|
|
|
|
#filter_func implementation needs work |
289
|
2
|
50
|
|
|
|
5
|
if ( $filter_func ) { |
290
|
0
|
0
|
|
|
|
0
|
next LINE unless $filter_func->( $tagno, $tagdata ); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
#add field to record |
293
|
2
|
|
|
|
|
6
|
$marc->append_fields( MARC::Field->new( $tagno, $tagdata ) ); |
294
|
|
|
|
|
|
|
} #if $tagno < 10 |
295
|
|
|
|
|
|
|
else { |
296
|
|
|
|
|
|
|
#translate characters for subfield data |
297
|
|
|
|
|
|
|
#get indicators |
298
|
5
|
|
|
|
|
8
|
my $ind1 = substr( $line, 5, 1 ); |
299
|
5
|
|
|
|
|
7
|
my $ind2 = substr( $line, 6, 1 ); |
300
|
5
|
|
|
|
|
8
|
my $tagdata = substr( $line, 7 ); |
301
|
|
|
|
|
|
|
#report error if first character of tagdata is not a subfield indicator ($) |
302
|
5
|
50
|
|
|
|
16
|
$marc->_warn( "First character of subfield data must be a subfield indicator (dollar sign), $tagdata, $location for tag $tagno" ) unless ($tagdata =~ /^\$/ ); |
303
|
5
|
50
|
|
|
|
9
|
if ( $filter_func ) { |
304
|
0
|
0
|
|
|
|
0
|
next LINE unless $filter_func->( $tagno, $tagdata ); |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
#why doesn't SUBFIELD_INDICATOR work in the split? |
308
|
5
|
|
|
|
|
17
|
my @subfields_mnemonic = split( /\x24/, $tagdata ); |
309
|
|
|
|
|
|
|
#convert characters from mnemonics to characters |
310
|
5
|
|
|
|
|
7
|
my @subfields = map {_maker2char($_, $charset)} @subfields_mnemonic; |
|
11
|
|
|
|
|
17
|
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#is there a better way to deal with the empty first item? |
313
|
5
|
|
|
|
|
8
|
my $empty = shift @subfields; |
314
|
5
|
50
|
|
|
|
10
|
$marc->_warn( "Subfield data appears before first subfield? $location in $tagno" ) if $empty; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# Split the subfield data into subfield name and data pairs |
317
|
5
|
|
|
|
|
6
|
my @subfield_data; |
318
|
5
|
|
|
|
|
5
|
for ( @subfields ) { |
319
|
6
|
50
|
|
|
|
11
|
if ( length > 0 ) { |
320
|
6
|
|
|
|
|
16
|
push( @subfield_data, substr($_,0,1),substr($_,1) ); |
321
|
|
|
|
|
|
|
} else { |
322
|
0
|
|
|
|
|
0
|
$marc->_warn( "Entirely empty subfield found in tag $tagno" ); |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} #for @subfields |
325
|
|
|
|
|
|
|
|
326
|
5
|
50
|
|
|
|
14
|
if ( !@subfield_data ) { |
327
|
0
|
|
|
|
|
0
|
$marc->_warn( "no subfield data found $location for tag $tagno" ); |
328
|
0
|
|
|
|
|
0
|
next; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
5
|
|
|
|
|
16
|
my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data ); |
332
|
5
|
50
|
|
|
|
182
|
if ( $field->warnings() ) { |
333
|
0
|
|
|
|
|
0
|
$marc->_warn( $field->warnings() ); |
334
|
|
|
|
|
|
|
} |
335
|
5
|
|
|
|
|
30
|
$marc->append_fields( $field ); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
} # looping through all the fields |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
340
|
1
|
|
|
|
|
33
|
return $marc; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
} #decode MARCMaker |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=head2 update_leader() #from USMARC |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
This may be unnecessary code. Delete this section if that is the case. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
If any changes get made to the MARC record, the first 5 bytes of the |
349
|
|
|
|
|
|
|
leader (the length) will be invalid. This function updates the |
350
|
|
|
|
|
|
|
leader with the correct length of the record as it would be if |
351
|
|
|
|
|
|
|
written out to a file. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub update_leader() { #from USMARC |
355
|
|
|
|
|
|
|
my $self = shift; |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory(); |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
$self->_set_leader_lengths( $reclen, $baseaddress ); |
360
|
|
|
|
|
|
|
} #updated_leader() from USMARC |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head2 encode() #based on MARC::File::USMARC |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Returns a string of characters suitable for writing out to a MARCMaker file, |
365
|
|
|
|
|
|
|
including the leader, directory and all the fields. |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Uses as_marcmaker() below to build each field. |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=cut |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub encode { #MARCMaker, based on USMARC's encode() |
372
|
1
|
|
|
1
|
1
|
623
|
my $marc = shift; |
373
|
1
|
50
|
33
|
|
|
8
|
$marc = shift if (ref($marc)||$marc) =~ /^MARC::File/; |
374
|
1
|
|
|
|
|
2
|
my $field_string = ''; |
375
|
|
|
|
|
|
|
#convert each field (after the leader) to MARCMaker format |
376
|
1
|
|
|
|
|
6
|
foreach my $field ($marc->fields()) { |
377
|
7
|
|
|
|
|
355
|
$field_string .= $field->MARC::File::MARCMaker::as_marcmaker(); |
378
|
|
|
|
|
|
|
} #foreach field in record |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# Glomp it all together |
381
|
1
|
|
|
|
|
38
|
return join("", "=LDR ", $marc->leader, "\n", $field_string, "\n"); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
} #encode from USMARC |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 as_marcmaker() |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Based on MARC::Field::as_usmarc(). |
389
|
|
|
|
|
|
|
Turns a MARC::Field into a MARCMaker formatted field string. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head2 TODO (as_marcmaker()) |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
-Change field encoding portion of as_marcmaker() to internal _as_marcmaker() |
394
|
|
|
|
|
|
|
-Implement as_marcmaker() as wrapper for MARC::Record object and MARC::Field object encoding into MARCMaker format. |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
sub as_marcmaker() { |
400
|
12
|
|
|
12
|
1
|
3645
|
my $self = shift; |
401
|
|
|
|
|
|
|
# $self = shift if (ref($self)||$self) =~ /^MARC::File/; |
402
|
|
|
|
|
|
|
|
403
|
12
|
50
|
|
|
|
36
|
die "Wanted a MARC::Field but got a ", ref($self) unless ref($self) eq "MARC::Field"; |
404
|
|
|
|
|
|
|
|
405
|
12
|
|
|
|
|
22
|
my $charset = ustext_default(); |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Tags < 010 are pretty easy |
408
|
12
|
100
|
|
|
|
50
|
if ( $self->is_control_field ) { |
|
0
|
50
|
|
|
|
0
|
|
409
|
|
|
|
|
|
|
#convert characters to MARCMaker codes |
410
|
2
|
|
|
|
|
19
|
my $field_data = (_char2maker($self->data(), $charset)); |
411
|
|
|
|
|
|
|
#swap blank spaces for backslash ( \ ) |
412
|
2
|
|
|
|
|
11
|
$field_data =~ s/ /\\/g; |
413
|
|
|
|
|
|
|
#return formatted field |
414
|
2
|
|
|
|
|
9
|
return sprintf "=%s %s\n", $self->tag(), $field_data; |
415
|
|
|
|
|
|
|
} #if control field |
416
|
|
|
|
|
|
|
elsif ($self->tag() eq '000') {print "Leader?\n"} #leader? |
417
|
|
|
|
|
|
|
else { |
418
|
10
|
|
|
|
|
127
|
my @subs; |
419
|
10
|
|
|
|
|
14
|
my @subdata = @{$self->{_subfields}}; |
|
10
|
|
|
|
|
34
|
|
420
|
10
|
|
|
|
|
42
|
while ( @subdata ) { |
421
|
|
|
|
|
|
|
#convert characters to MARCMaker codes as each subfield goes by |
422
|
11
|
|
|
|
|
33
|
push( @subs, join( "", SUBFIELD_INDICATOR, shift @subdata, (_char2maker(shift @subdata, $charset))) ); |
423
|
|
|
|
|
|
|
} # while |
424
|
|
|
|
|
|
|
|
425
|
10
|
|
|
|
|
46
|
my $ind1 = $self->indicator(1); |
426
|
10
|
|
|
|
|
128
|
my $ind2 = $self->indicator(2); |
427
|
|
|
|
|
|
|
#swap blank for backslash ( \ ) |
428
|
10
|
|
|
|
|
105
|
$ind1 =~ s/ /\\/g; |
429
|
10
|
|
|
|
|
22
|
$ind2 =~ s/ /\\/g; |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
return |
432
|
10
|
|
|
|
|
31
|
join ("", "=", $self->tag(), " ", |
433
|
|
|
|
|
|
|
$ind1, |
434
|
|
|
|
|
|
|
$ind2, |
435
|
|
|
|
|
|
|
@subs, |
436
|
|
|
|
|
|
|
"\n", |
437
|
|
|
|
|
|
|
); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
} #as_usmarc() #MARC::Field |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
#################################### |
443
|
|
|
|
|
|
|
###### END USMARC subs ############# |
444
|
|
|
|
|
|
|
#################################### |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
######################################### |
450
|
|
|
|
|
|
|
### begin internal subs from MicroLIF ### |
451
|
|
|
|
|
|
|
######################################### |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
################################# |
454
|
|
|
|
|
|
|
# fill the buffer if we need to # |
455
|
|
|
|
|
|
|
################################# |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub _fill_buffer { #done for MARCMaker? |
458
|
|
|
|
|
|
|
|
459
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
460
|
0
|
|
|
|
|
0
|
my $ok = 1; |
461
|
|
|
|
|
|
|
|
462
|
0
|
0
|
0
|
|
|
0
|
if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) { |
463
|
|
|
|
|
|
|
# append the next chunk of bytes to the buffer |
464
|
0
|
|
|
|
|
0
|
my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf}); |
465
|
|
|
|
|
|
|
#convert line endings within the input buffer |
466
|
0
|
0
|
|
|
|
0
|
if ($self->{inputbuf} =~ /\x0d\x0a/s) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\x0d\x0a/\n/sg; |
468
|
|
|
|
|
|
|
} #if DOS endings |
469
|
|
|
|
|
|
|
elsif ($self->{inputbuf} =~ /\x0a/) { |
470
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\x0a/\n/sg; |
471
|
|
|
|
|
|
|
} #elsif Unix endings |
472
|
|
|
|
|
|
|
elsif ($self->{inputbuf} =~ /\x0d/) { |
473
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\x0d/\n/sg; |
474
|
|
|
|
|
|
|
} #elsif Macintosh endings |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
#remove extra blank lines between records |
477
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\n\s*\n+/\n\n/g; |
478
|
|
|
|
|
|
|
|
479
|
0
|
0
|
|
|
|
0
|
if ( !defined $read ) { |
|
|
0
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# error! |
481
|
0
|
|
|
|
|
0
|
$ok = undef; |
482
|
0
|
|
|
|
|
0
|
$MARC::File::ERROR = "error reading from file " . $self->{filename}; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
elsif ( $read < 1 ) { |
485
|
0
|
|
|
|
|
0
|
$self->{exhaustedfh} = 1; |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
0
|
return $ok; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=for internal |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head2 _get_chunk( ) #for MARCMaker |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
Gets the next chunk of data (which should be a single complete record). |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
All extra \r and \n are stripped and line endings are converted to those of the platform (\n). |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub _get_chunk { #done for MARCMaker? |
503
|
|
|
|
|
|
|
|
504
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
0
|
my $chunk = undef; |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
#read from the file and fill the input buffer |
509
|
0
|
0
|
0
|
|
|
0
|
if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) { |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
#retrieve the next record |
512
|
0
|
|
|
|
|
0
|
($chunk) = split /\n\n/, $self->{inputbuf}, 0; |
513
|
|
|
|
|
|
|
#remove the chunk and record separator from the input buffer |
514
|
0
|
|
|
|
|
0
|
$self->{inputbuf} = substr( $self->{inputbuf}, length($chunk)+length("\n\n") ); |
515
|
0
|
0
|
|
|
|
0
|
if ( !$chunk ) { |
516
|
0
|
|
|
|
|
0
|
$chunk = $self->{inputbuf}; |
517
|
0
|
|
|
|
|
0
|
$self->{inputbuf} = ''; |
518
|
0
|
|
|
|
|
0
|
$self->{exhaustedfh} = 1; |
519
|
|
|
|
|
|
|
} #if not chunk |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
} #if buffer can be filled and has characters |
522
|
0
|
|
|
|
|
0
|
return $chunk; |
523
|
|
|
|
|
|
|
} #_get_chunk() |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=head2 _unget_chunk ( ) #done for MARCMaker? |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
$chunk is put at the beginning of the buffer followed |
528
|
|
|
|
|
|
|
by two line endings ("\n\n") as a record separator. |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
I don't know that this sub is necessary. |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=cut |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub _unget_chunk { |
535
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
536
|
0
|
|
|
|
|
0
|
my $chunk = shift; |
537
|
0
|
|
|
|
|
0
|
$self->{inputbuf} = $chunk . $self->{inputbuf}; |
538
|
0
|
|
|
|
|
0
|
return; |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
####################################### |
543
|
|
|
|
|
|
|
### End internal subs from MicroLIF ### |
544
|
|
|
|
|
|
|
####################################### |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
####################################### |
547
|
|
|
|
|
|
|
### Character handling from MARC.pm ### |
548
|
|
|
|
|
|
|
####################################### |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head2 _char2maker |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
Pass in string of characters from a MARC record and a character map ($charset, or usmarc_default() by default). |
553
|
|
|
|
|
|
|
Returns string of characters encoded in MARCMaker format. |
554
|
|
|
|
|
|
|
(e.g. replaces '$' with {dollar}) |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=cut |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
sub _char2maker { #deal with charmap default |
559
|
13
|
|
|
13
|
|
1430
|
my @marc_string = split (//, shift); |
560
|
13
|
|
|
|
|
171
|
my $charmap = shift; #|| $charset; #add default value |
561
|
13
|
|
|
|
|
28
|
my $maker_string = join ('', map {${$charmap}{$_} } @marc_string); |
|
4608
|
|
|
|
|
6169
|
|
|
4608
|
|
|
|
|
10063
|
|
562
|
|
|
|
|
|
|
#replace html-style entities (´) with code in curly braces ({acute}) |
563
|
13
|
|
|
|
|
345
|
while ($maker_string =~ s/(&)([^ ]{1,7}?)(;)/{$2}/o) {} |
564
|
|
|
|
|
|
|
|
565
|
13
|
|
|
|
|
424
|
return $maker_string; |
566
|
|
|
|
|
|
|
} #_char2maker |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
###################### |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=head2 Default charset |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
usmarc_default() -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding. |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Used by _maker2char. |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
This perhaps should be an internal _usmarc_default(). |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=cut |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub usmarc_default { # rec |
582
|
1
|
|
|
1
|
0
|
9
|
my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb, |
583
|
|
|
|
|
|
|
0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff); |
584
|
1
|
|
|
|
|
5
|
my %inchar = map {sprintf ("%2.2X",int $_), chr($_)} @hexchar; |
|
92
|
|
|
|
|
207
|
|
585
|
|
|
|
|
|
|
|
586
|
1
|
|
|
|
|
10
|
$inchar{esc} = chr(0x1b); # escape |
587
|
1
|
|
|
|
|
2
|
$inchar{dollar} = chr(0x24); # dollar sign |
588
|
1
|
|
|
|
|
3
|
$inchar{curren} = chr(0x24); # dollar sign - alternate |
589
|
1
|
|
|
|
|
1
|
$inchar{24} = chr(0x24); # dollar sign - alternate |
590
|
1
|
|
|
|
|
2
|
$inchar{bsol} = chr(0x5c); # back slash (reverse solidus) |
591
|
1
|
|
|
|
|
2
|
$inchar{lcub} = chr(0x7b); # opening curly brace |
592
|
1
|
|
|
|
|
2
|
$inchar{rcub} = "}"; # closing curly brace - part 1 |
593
|
1
|
|
|
|
|
2
|
$inchar{joiner} = chr(0x8d); # zero width joiner |
594
|
1
|
|
|
|
|
2
|
$inchar{nonjoin} = chr(0x8e); # zero width non-joiner |
595
|
1
|
|
|
|
|
2
|
$inchar{Lstrok} = chr(0xa1); # latin capital letter l with stroke |
596
|
1
|
|
|
|
|
2
|
$inchar{Ostrok} = chr(0xa2); # latin capital letter o with stroke |
597
|
1
|
|
|
|
|
3
|
$inchar{Dstrok} = chr(0xa3); # latin capital letter d with stroke |
598
|
1
|
|
|
|
|
1
|
$inchar{THORN} = chr(0xa4); # latin capital letter thorn (icelandic) |
599
|
1
|
|
|
|
|
2
|
$inchar{AElig} = chr(0xa5); # latin capital letter AE |
600
|
1
|
|
|
|
|
1
|
$inchar{OElig} = chr(0xa6); # latin capital letter OE |
601
|
1
|
|
|
|
|
3
|
$inchar{softsign} = chr(0xa7); # modifier letter soft sign |
602
|
1
|
|
|
|
|
2
|
$inchar{middot} = chr(0xa8); # middle dot |
603
|
1
|
|
|
|
|
2
|
$inchar{flat} = chr(0xa9); # musical flat sign |
604
|
1
|
|
|
|
|
2
|
$inchar{reg} = chr(0xaa); # registered sign |
605
|
1
|
|
|
|
|
2
|
$inchar{plusmn} = chr(0xab); # plus-minus sign |
606
|
1
|
|
|
|
|
1
|
$inchar{Ohorn} = chr(0xac); # latin capital letter o with horn |
607
|
1
|
|
|
|
|
2
|
$inchar{Uhorn} = chr(0xad); # latin capital letter u with horn |
608
|
1
|
|
|
|
|
2
|
$inchar{mlrhring} = chr(0xae); # modifier letter right half ring (alif) |
609
|
1
|
|
|
|
|
2
|
$inchar{mllhring} = chr(0xb0); # modifier letter left half ring (ayn) |
610
|
1
|
|
|
|
|
2
|
$inchar{lstrok} = chr(0xb1); # latin small letter l with stroke |
611
|
1
|
|
|
|
|
2
|
$inchar{ostrok} = chr(0xb2); # latin small letter o with stroke |
612
|
1
|
|
|
|
|
2
|
$inchar{dstrok} = chr(0xb3); # latin small letter d with stroke |
613
|
1
|
|
|
|
|
1
|
$inchar{thorn} = chr(0xb4); # latin small letter thorn (icelandic) |
614
|
1
|
|
|
|
|
2
|
$inchar{aelig} = chr(0xb5); # latin small letter ae |
615
|
1
|
|
|
|
|
2
|
$inchar{oelig} = chr(0xb6); # latin small letter oe |
616
|
1
|
|
|
|
|
2
|
$inchar{hardsign} = chr(0xb7); # modifier letter hard sign |
617
|
1
|
|
|
|
|
1
|
$inchar{inodot} = chr(0xb8); # latin small letter dotless i |
618
|
1
|
|
|
|
|
2
|
$inchar{pound} = chr(0xb9); # pound sign |
619
|
1
|
|
|
|
|
27
|
$inchar{eth} = chr(0xba); # latin small letter eth |
620
|
1
|
|
|
|
|
3
|
$inchar{ohorn} = chr(0xbc); # latin small letter o with horn |
621
|
1
|
|
|
|
|
7
|
$inchar{uhorn} = chr(0xbd); # latin small letter u with horn |
622
|
1
|
|
|
|
|
3
|
$inchar{deg} = chr(0xc0); # degree sign |
623
|
1
|
|
|
|
|
2
|
$inchar{scriptl} = chr(0xc1); # latin small letter script l |
624
|
1
|
|
|
|
|
3
|
$inchar{phono} = chr(0xc2); # sound recording copyright |
625
|
1
|
|
|
|
|
2
|
$inchar{copy} = chr(0xc3); # copyright sign |
626
|
1
|
|
|
|
|
3
|
$inchar{sharp} = chr(0xc4); # sharp |
627
|
1
|
|
|
|
|
2
|
$inchar{iquest} = chr(0xc5); # inverted question mark |
628
|
1
|
|
|
|
|
2
|
$inchar{iexcl} = chr(0xc6); # inverted exclamation mark |
629
|
1
|
|
|
|
|
2
|
$inchar{hooka} = chr(0xe0); # combining hook above |
630
|
1
|
|
|
|
|
3
|
$inchar{grave} = chr(0xe1); # combining grave |
631
|
1
|
|
|
|
|
1
|
$inchar{acute} = chr(0xe2); # combining acute |
632
|
1
|
|
|
|
|
3
|
$inchar{circ} = chr(0xe3); # combining circumflex |
633
|
1
|
|
|
|
|
2
|
$inchar{tilde} = chr(0xe4); # combining tilde |
634
|
1
|
|
|
|
|
2
|
$inchar{macr} = chr(0xe5); # combining macron |
635
|
1
|
|
|
|
|
2
|
$inchar{breve} = chr(0xe6); # combining breve |
636
|
1
|
|
|
|
|
2
|
$inchar{dot} = chr(0xe7); # combining dot above |
637
|
1
|
|
|
|
|
3
|
$inchar{diaer} = chr(0xe8); # combining diaeresis |
638
|
1
|
|
|
|
|
2
|
$inchar{uml} = chr(0xe8); # combining umlaut |
639
|
1
|
|
|
|
|
2
|
$inchar{caron} = chr(0xe9); # combining hacek |
640
|
1
|
|
|
|
|
2
|
$inchar{ring} = chr(0xea); # combining ring above |
641
|
1
|
|
|
|
|
1
|
$inchar{llig} = chr(0xeb); # combining ligature left half |
642
|
1
|
|
|
|
|
2
|
$inchar{rlig} = chr(0xec); # combining ligature right half |
643
|
1
|
|
|
|
|
2
|
$inchar{rcommaa} = chr(0xed); # combining comma above right |
644
|
1
|
|
|
|
|
2
|
$inchar{dblac} = chr(0xee); # combining double acute |
645
|
1
|
|
|
|
|
2
|
$inchar{candra} = chr(0xef); # combining candrabindu |
646
|
1
|
|
|
|
|
2
|
$inchar{cedil} = chr(0xf0); # combining cedilla |
647
|
1
|
|
|
|
|
2
|
$inchar{ogon} = chr(0xf1); # combining ogonek |
648
|
1
|
|
|
|
|
2
|
$inchar{dotb} = chr(0xf2); # combining dot below |
649
|
1
|
|
|
|
|
2
|
$inchar{dbldotb} = chr(0xf3); # combining double dot below |
650
|
1
|
|
|
|
|
2
|
$inchar{ringb} = chr(0xf4); # combining ring below |
651
|
1
|
|
|
|
|
1
|
$inchar{dblunder} = chr(0xf5); # combining double underscore |
652
|
1
|
|
|
|
|
2
|
$inchar{under} = chr(0xf6); # combining underscore |
653
|
1
|
|
|
|
|
2
|
$inchar{commab} = chr(0xf7); # combining comma below |
654
|
1
|
|
|
|
|
2
|
$inchar{rcedil} = chr(0xf8); # combining right cedilla |
655
|
1
|
|
|
|
|
2
|
$inchar{breveb} = chr(0xf9); # combining breve below |
656
|
1
|
|
|
|
|
1
|
$inchar{ldbltil} = chr(0xfa); # combining double tilde left half |
657
|
1
|
|
|
|
|
2
|
$inchar{rdbltil} = chr(0xfb); # combining double tilde right half |
658
|
1
|
|
|
|
|
2
|
$inchar{commaa} = chr(0xfe); # combining comma above |
659
|
1
|
50
|
|
|
|
3
|
if ($MARC::DEBUG) { |
660
|
0
|
|
|
|
|
0
|
foreach my $str (sort keys %inchar) { |
661
|
0
|
|
|
|
|
0
|
printf "%s = %x\n", $str, ord($inchar{$str}); |
662
|
|
|
|
|
|
|
} |
663
|
|
|
|
|
|
|
} |
664
|
1
|
|
|
|
|
5
|
return \%inchar; |
665
|
|
|
|
|
|
|
} #usmarc_default |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
################################################### |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=head2 ustext_default |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
ustext_default -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
Used by _char2maker. |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
This perhaps should be an internal _ustext_default(). |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=cut |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
sub ustext_default { |
680
|
12
|
|
|
12
|
1
|
109
|
my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb, |
681
|
|
|
|
|
|
|
0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff); |
682
|
12
|
|
|
|
|
24
|
my %outchar = map {chr($_), sprintf ("{%2.2X}",int $_)} @hexchar; |
|
1104
|
|
|
|
|
3599
|
|
683
|
|
|
|
|
|
|
|
684
|
12
|
|
|
|
|
139
|
my @ascchar = map {chr($_)} (0x20..0x23,0x25..0x7a,0x7c,0x7e); |
|
1104
|
|
|
|
|
1829
|
|
685
|
12
|
|
|
|
|
64
|
foreach my $asc (@ascchar) { $outchar{$asc} = $asc;} |
|
1104
|
|
|
|
|
1901
|
|
686
|
|
|
|
|
|
|
|
687
|
12
|
|
|
|
|
28
|
$outchar{chr(0x1b)} = '{esc}'; # escape |
688
|
12
|
|
|
|
|
21
|
$outchar{chr(0x24)} = '{dollar}'; # dollar sign |
689
|
12
|
|
|
|
|
22
|
$outchar{chr(0x5c)} = '{bsol}'; # back slash (reverse solidus) |
690
|
12
|
|
|
|
|
51
|
$outchar{chr(0x7b)} = '{lcub}'; # opening curly brace |
691
|
12
|
|
|
|
|
21
|
$outchar{chr(0x7d)} = '{rcub}'; # closing curly brace |
692
|
12
|
|
|
|
|
20
|
$outchar{chr(0x8d)} = '{joiner}'; # zero width joiner |
693
|
12
|
|
|
|
|
16
|
$outchar{chr(0x8e)} = '{nonjoin}'; # zero width non-joiner |
694
|
12
|
|
|
|
|
18
|
$outchar{chr(0xa1)} = '{Lstrok}'; # latin capital letter l with stroke |
695
|
12
|
|
|
|
|
21
|
$outchar{chr(0xa2)} = '{Ostrok}'; # latin capital letter o with stroke |
696
|
12
|
|
|
|
|
15
|
$outchar{chr(0xa3)} = '{Dstrok}'; # latin capital letter d with stroke |
697
|
12
|
|
|
|
|
21
|
$outchar{chr(0xa4)} = '{THORN}'; # latin capital letter thorn (icelandic) |
698
|
12
|
|
|
|
|
16
|
$outchar{chr(0xa5)} = '{AElig}'; # latin capital letter AE |
699
|
12
|
|
|
|
|
15
|
$outchar{chr(0xa6)} = '{OElig}'; # latin capital letter OE |
700
|
12
|
|
|
|
|
21
|
$outchar{chr(0xa7)} = '{softsign}'; # modifier letter soft sign |
701
|
12
|
|
|
|
|
17
|
$outchar{chr(0xa8)} = '{middot}'; # middle dot |
702
|
12
|
|
|
|
|
17
|
$outchar{chr(0xa9)} = '{flat}'; # musical flat sign |
703
|
12
|
|
|
|
|
17
|
$outchar{chr(0xaa)} = '{reg}'; # registered sign |
704
|
12
|
|
|
|
|
19
|
$outchar{chr(0xab)} = '{plusmn}'; # plus-minus sign |
705
|
12
|
|
|
|
|
35
|
$outchar{chr(0xac)} = '{Ohorn}'; # latin capital letter o with horn |
706
|
12
|
|
|
|
|
16
|
$outchar{chr(0xad)} = '{Uhorn}'; # latin capital letter u with horn |
707
|
12
|
|
|
|
|
19
|
$outchar{chr(0xae)} = '{mlrhring}'; # modifier letter right half ring (alif) |
708
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb0)} = '{mllhring}'; # modifier letter left half ring (ayn) |
709
|
12
|
|
|
|
|
14
|
$outchar{chr(0xb1)} = '{lstrok}'; # latin small letter l with stroke |
710
|
12
|
|
|
|
|
18
|
$outchar{chr(0xb2)} = '{ostrok}'; # latin small letter o with stroke |
711
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb3)} = '{dstrok}'; # latin small letter d with stroke |
712
|
12
|
|
|
|
|
25
|
$outchar{chr(0xb4)} = '{thorn}'; # latin small letter thorn (icelandic) |
713
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb5)} = '{aelig}'; # latin small letter ae |
714
|
12
|
|
|
|
|
18
|
$outchar{chr(0xb6)} = '{oelig}'; # latin small letter oe |
715
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb7)} = '{hardsign}'; # modifier letter hard sign |
716
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb8)} = '{inodot}'; # latin small letter dotless i |
717
|
12
|
|
|
|
|
36
|
$outchar{chr(0xb9)} = '{pound}'; # pound sign |
718
|
12
|
|
|
|
|
19
|
$outchar{chr(0xba)} = '{eth}'; # latin small letter eth |
719
|
12
|
|
|
|
|
21
|
$outchar{chr(0xbc)} = '{ohorn}'; # latin small letter o with horn |
720
|
12
|
|
|
|
|
16
|
$outchar{chr(0xbd)} = '{uhorn}'; # latin small letter u with horn |
721
|
12
|
|
|
|
|
20
|
$outchar{chr(0xc0)} = '{deg}'; # degree sign |
722
|
12
|
|
|
|
|
20
|
$outchar{chr(0xc1)} = '{scriptl}'; # latin small letter script l |
723
|
12
|
|
|
|
|
18
|
$outchar{chr(0xc2)} = '{phono}'; # sound recording copyright |
724
|
12
|
|
|
|
|
17
|
$outchar{chr(0xc3)} = '{copy}'; # copyright sign |
725
|
12
|
|
|
|
|
23
|
$outchar{chr(0xc4)} = '{sharp}'; # sharp |
726
|
12
|
|
|
|
|
19
|
$outchar{chr(0xc5)} = '{iquest}'; # inverted question mark |
727
|
12
|
|
|
|
|
21
|
$outchar{chr(0xc6)} = '{iexcl}'; # inverted exclamation mark |
728
|
12
|
|
|
|
|
16
|
$outchar{chr(0xe0)} = '{hooka}'; # combining hook above |
729
|
12
|
|
|
|
|
16
|
$outchar{chr(0xe1)} = '{grave}'; # combining grave |
730
|
12
|
|
|
|
|
15
|
$outchar{chr(0xe2)} = '{acute}'; # combining acute |
731
|
12
|
|
|
|
|
17
|
$outchar{chr(0xe3)} = '{circ}'; # combining circumflex |
732
|
12
|
|
|
|
|
16
|
$outchar{chr(0xe4)} = '{tilde}'; # combining tilde |
733
|
12
|
|
|
|
|
17
|
$outchar{chr(0xe5)} = '{macr}'; # combining macron |
734
|
12
|
|
|
|
|
15
|
$outchar{chr(0xe6)} = '{breve}'; # combining breve |
735
|
12
|
|
|
|
|
14
|
$outchar{chr(0xe7)} = '{dot}'; # combining dot above |
736
|
12
|
|
|
|
|
17
|
$outchar{chr(0xe8)} = '{uml}'; # combining diaeresis (umlaut) |
737
|
12
|
|
|
|
|
28
|
$outchar{chr(0xe9)} = '{caron}'; # combining hacek |
738
|
12
|
|
|
|
|
23
|
$outchar{chr(0xea)} = '{ring}'; # combining ring above |
739
|
12
|
|
|
|
|
25
|
$outchar{chr(0xeb)} = '{llig}'; # combining ligature left half |
740
|
12
|
|
|
|
|
15
|
$outchar{chr(0xec)} = '{rlig}'; # combining ligature right half |
741
|
12
|
|
|
|
|
16
|
$outchar{chr(0xed)} = '{rcommaa}'; # combining comma above right |
742
|
12
|
|
|
|
|
17
|
$outchar{chr(0xee)} = '{dblac}'; # combining double acute |
743
|
12
|
|
|
|
|
18
|
$outchar{chr(0xef)} = '{candra}'; # combining candrabindu |
744
|
12
|
|
|
|
|
15
|
$outchar{chr(0xf0)} = '{cedil}'; # combining cedilla |
745
|
12
|
|
|
|
|
18
|
$outchar{chr(0xf1)} = '{ogon}'; # combining ogonek |
746
|
12
|
|
|
|
|
17
|
$outchar{chr(0xf2)} = '{dotb}'; # combining dot below |
747
|
12
|
|
|
|
|
16
|
$outchar{chr(0xf3)} = '{dbldotb}'; # combining double dot below |
748
|
12
|
|
|
|
|
15
|
$outchar{chr(0xf4)} = '{ringb}'; # combining ring below |
749
|
12
|
|
|
|
|
20
|
$outchar{chr(0xf5)} = '{dblunder}'; # combining double underscore |
750
|
12
|
|
|
|
|
31
|
$outchar{chr(0xf6)} = '{under}'; # combining underscore |
751
|
12
|
|
|
|
|
17
|
$outchar{chr(0xf7)} = '{commab}'; # combining comma below |
752
|
12
|
|
|
|
|
17
|
$outchar{chr(0xf8)} = '{rcedil}'; # combining right cedilla |
753
|
12
|
|
|
|
|
15
|
$outchar{chr(0xf9)} = '{breveb}'; # combining breve below |
754
|
12
|
|
|
|
|
18
|
$outchar{chr(0xfa)} = '{ldbltil}'; # combining double tilde left half |
755
|
12
|
|
|
|
|
18
|
$outchar{chr(0xfb)} = '{rdbltil}'; # combining double tilde right half |
756
|
12
|
|
|
|
|
17
|
$outchar{chr(0xfe)} = '{commaa}'; # combining comma above |
757
|
12
|
50
|
|
|
|
31
|
if ($MARC::DEBUG) { |
758
|
0
|
|
|
|
|
0
|
foreach my $num (sort keys %outchar) { |
759
|
0
|
|
|
|
|
0
|
printf "%x = %s\n", ord($num), $outchar{$num}; |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
} |
762
|
12
|
|
|
|
|
134
|
return \%outchar; |
763
|
|
|
|
|
|
|
} #ustext_default |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
#################################################################### |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=head2 _maker2char default |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
_maker2char() -- Translates MARCMaker encoded character into MARC-8 character. |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=cut |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
sub _maker2char { # rec |
775
|
13
|
|
|
13
|
|
18
|
my $marc_string = shift; |
776
|
13
|
|
|
|
|
14
|
my $charmap = shift; |
777
|
13
|
|
|
|
|
35
|
while ($marc_string =~ /{(\w{1,8}?)}/o) { |
778
|
75
|
50
|
|
|
|
70
|
if (exists ${$charmap}{$1}) { |
|
75
|
|
|
|
|
146
|
|
779
|
75
|
|
|
|
|
70
|
$marc_string = join ('', $`, ${$charmap}{$1}, $'); |
|
75
|
|
|
|
|
525
|
|
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
else { |
782
|
0
|
|
|
|
|
0
|
$marc_string = join ('', $`, '&', $1, ';', $'); |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
} |
785
|
|
|
|
|
|
|
# closing curly brace - part 2, permits {lcub}text{rcub} in input |
786
|
13
|
|
|
|
|
19
|
$marc_string =~ s/\}/\x7d/go; |
787
|
13
|
|
|
|
|
27
|
return $marc_string; |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
################################ |
791
|
|
|
|
|
|
|
### END OF MARCMAKER METHODS ### |
792
|
|
|
|
|
|
|
################################ |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
1; |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=head1 RELATED MODULES |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
L |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
L |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=head1 SEE ALSO |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
L |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
L for more information about the |
807
|
|
|
|
|
|
|
DOS-based MARCMaker and MARCBreaker programs. |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
The methods in this MARCMaker module are based upon MARC::File::USMARC.pm and MARC::File::MicroLIF.pm. |
811
|
|
|
|
|
|
|
Those are distributed with MARC::Record. |
812
|
|
|
|
|
|
|
The underlying code is based on the MARCMaker-related methods in MARC.pm. |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=head1 LICENSE |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
This code may be distributed under the same terms as Perl itself. |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
Please note that this module is not a product of or supported by the |
820
|
|
|
|
|
|
|
employers of the various contributors to the code. |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=head1 AUTHOR |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
Bryan Baldus |
825
|
|
|
|
|
|
|
eijabb@cpan.org |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Copyright (c) 2004-2005. |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
=cut |