line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package File::BOM; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
File::BOM - Utilities for handling Byte Order Marks |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use File::BOM qw( :all ) |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head2 high-level functions |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# read a file with encoding from the BOM: |
14
|
|
|
|
|
|
|
open_bom(FH, $file) |
15
|
|
|
|
|
|
|
open_bom(FH, $file, ':utf8') # the same but with a default encoding |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# get encoding too |
18
|
|
|
|
|
|
|
$encoding = open_bom(FH, $file, ':utf8'); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# open a potentially unseekable file: |
21
|
|
|
|
|
|
|
($encoding, $spillage) = open_bom(FH, $file, ':utf8'); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# change encoding of an open handle according to BOM |
24
|
|
|
|
|
|
|
$encoding = defuse(*HANDLE); |
25
|
|
|
|
|
|
|
($encoding, $spillage) = defuse(*HANDLE); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Decode a string according to leading BOM: |
28
|
|
|
|
|
|
|
$unicode = decode_from_bom($string_with_bom); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Decode a string and get the encoding: |
31
|
|
|
|
|
|
|
($unicode, $encoding) = decode_from_bom($string_with_bom) |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 PerlIO::via interface |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Read the Right Thing from a unicode file with BOM: |
36
|
|
|
|
|
|
|
open(HANDLE, '<:via(File::BOM)', $filename) |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Writing little-endian UTF-16 file with BOM: |
39
|
|
|
|
|
|
|
open(HANDLE, '>:encoding(UTF-16LE):via(File::BOM)', $filename) |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 lower-level functions |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# read BOM encoding from a filehandle: |
45
|
|
|
|
|
|
|
$encoding = get_encoding_from_filehandle(FH) |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# Get encoding even if FH is unseekable: |
48
|
|
|
|
|
|
|
($encoding, $spillage) = get_encoding_from_filehandle(FH); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# Get encoding from a known unseekable handle: |
51
|
|
|
|
|
|
|
($encdoing, $spillage) = get_encoding_from_stream(FH); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# get encoding and BOM length from BOM at start of string: |
54
|
|
|
|
|
|
|
($encoding, $offset) = get_encoding_from_bom($string); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head2 variables |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# print a BOM for a known encoding |
59
|
|
|
|
|
|
|
print FH $enc2bom{$encoding}; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# get an encoding from a known BOM |
62
|
|
|
|
|
|
|
$enc = $bom2enc{$bom} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head1 DESCRIPTION |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
This module provides functions for handling unicode byte order marks, which are |
67
|
|
|
|
|
|
|
to be found at the beginning of some files and streams. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
For details about what a byte order mark is, see |
70
|
|
|
|
|
|
|
L |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
The intention of File::BOM is for files with BOMs to be readable as seamlessly |
73
|
|
|
|
|
|
|
as possible, regardless of the encoding used. To that end, several different |
74
|
|
|
|
|
|
|
interfaces are available, as shown in the synopsis above. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
16
|
|
|
16
|
|
1838904
|
use strict; |
|
16
|
|
|
|
|
361
|
|
|
16
|
|
|
|
|
580
|
|
79
|
16
|
|
|
16
|
|
81
|
use warnings; |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
637
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# We don't want any character semantics at all |
82
|
16
|
|
|
16
|
|
104
|
use bytes; |
|
16
|
|
|
|
|
17
|
|
|
16
|
|
|
|
|
100
|
|
83
|
|
|
|
|
|
|
|
84
|
16
|
|
|
16
|
|
475
|
use base qw( Exporter ); |
|
16
|
|
|
|
|
20
|
|
|
16
|
|
|
|
|
1533
|
|
85
|
|
|
|
|
|
|
|
86
|
16
|
|
|
16
|
|
9957
|
use Readonly; |
|
16
|
|
|
|
|
67349
|
|
|
16
|
|
|
|
|
903
|
|
87
|
|
|
|
|
|
|
|
88
|
16
|
|
|
16
|
|
125
|
use Carp qw( croak ); |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
733
|
|
89
|
16
|
|
|
16
|
|
80
|
use Fcntl qw( :seek ); |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
2413
|
|
90
|
16
|
|
|
16
|
|
127
|
use Encode qw( :DEFAULT :fallbacks is_utf8 ); |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
2901
|
|
91
|
16
|
|
|
16
|
|
136
|
use Symbol qw( gensym qualify_to_ref ); |
|
16
|
|
|
|
|
53
|
|
|
16
|
|
|
|
|
36568
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
my @subs = qw( |
94
|
|
|
|
|
|
|
open_bom |
95
|
|
|
|
|
|
|
defuse |
96
|
|
|
|
|
|
|
decode_from_bom |
97
|
|
|
|
|
|
|
get_encoding_from_bom |
98
|
|
|
|
|
|
|
get_encoding_from_filehandle |
99
|
|
|
|
|
|
|
get_encoding_from_stream |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
my @vars = qw( %bom2enc %enc2bom ); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
our $VERSION = '0.17'; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
our @EXPORT = (); |
107
|
|
|
|
|
|
|
our @EXPORT_OK = ( @subs, @vars ); |
108
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
109
|
|
|
|
|
|
|
all => \@EXPORT_OK, |
110
|
|
|
|
|
|
|
subs => \@subs, |
111
|
|
|
|
|
|
|
vars => \@vars |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 EXPORTS |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Nothing by default. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head2 symbols |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=over 4 |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item * open_bom() |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item * defuse() |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item * decode_from_bom() |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item * get_encoding_from_filehandle() |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item * get_encoding_from_stream() |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item * get_encoding_from_bom() |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item * %bom2enc |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item * %enc2bom |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=back |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head2 tags |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=over 4 |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item * :all |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
All of the above |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=item * :subs |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
subroutines only |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item * :vars |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
just %bom2enc and %enc2bom |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=back |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=cut |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 VARIABLES |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head2 %bom2enc |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Maps Byte Order marks to their encodings. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
The keys of this hash are strings which represent the BOMs, the values are their |
167
|
|
|
|
|
|
|
encodings, in a format which is understood by L |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
The encodings represented in this hash are: UTF-8, UTF-16BE, UTF-16LE, |
170
|
|
|
|
|
|
|
UTF-32BE and UTF-32LE |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 %enc2bom |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
A reverse-lookup hash for bom2enc, with a few aliases used in L, namely utf8, iso-10646-1 and UCS-2. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Note that UTF-16, UTF-32 and UCS-4 are not included in this hash. Mainly |
177
|
|
|
|
|
|
|
because Encode::encode automatically puts BOMs on output. See L |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=cut |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
our(%bom2enc, %enc2bom, $MAX_BOM_LENGTH, $bom_re); |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# length in bytes of the longest BOM |
184
|
|
|
|
|
|
|
$MAX_BOM_LENGTH = 4; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
Readonly %bom2enc => ( |
187
|
|
|
|
|
|
|
map { encode($_, "\x{feff}") => $_ } qw( |
188
|
|
|
|
|
|
|
UTF-8 |
189
|
|
|
|
|
|
|
UTF-16BE |
190
|
|
|
|
|
|
|
UTF-16LE |
191
|
|
|
|
|
|
|
UTF-32BE |
192
|
|
|
|
|
|
|
UTF-32LE |
193
|
|
|
|
|
|
|
) |
194
|
|
|
|
|
|
|
); |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Readonly %enc2bom => ( |
197
|
|
|
|
|
|
|
reverse(%bom2enc), |
198
|
|
|
|
|
|
|
map { $_ => encode($_, "\x{feff}") } qw( |
199
|
|
|
|
|
|
|
UCS-2 |
200
|
|
|
|
|
|
|
iso-10646-1 |
201
|
|
|
|
|
|
|
utf8 |
202
|
|
|
|
|
|
|
) |
203
|
|
|
|
|
|
|
); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
{ |
206
|
|
|
|
|
|
|
local $" = '|'; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
my @bombs = sort { length $b <=> length $a } keys %bom2enc; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Readonly $MAX_BOM_LENGTH => length $bombs[0]; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
Readonly $bom_re => qr/^(@bombs)/o; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head1 FUNCTIONS |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head2 open_bom |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
$encoding = open_bom(HANDLE, $filename, $default_mode) |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
($encoding, $spill) = open_bom(HANDLE, $filename, $default_mode) |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
opens HANDLE for reading on $filename, setting the mode to the appropriate |
224
|
|
|
|
|
|
|
encoding for the BOM stored in the file. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
On failure, a fatal error is raised, see the DIAGNOSTICS section for details on |
227
|
|
|
|
|
|
|
how to catch these. This is in order to allow the return value(s) to be used for |
228
|
|
|
|
|
|
|
other purposes. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
If the file doesn't contain a BOM, $default_mode is used instead. Hence: |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
open_bom(FH, 'my_file.txt', ':utf8') |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Opens my_file.txt for reading in an appropriate encoding found from the BOM in |
235
|
|
|
|
|
|
|
that file, or as a UTF-8 file if none is found. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
In the absence of a $default_mode argument, the following 2 calls should be equivalent: |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
open_bom(FH, 'no_bom.txt'); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
open(FH, '<', 'no_bom.txt'); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
If an undefined value is passed as the handle, a symbol will be generated for it |
244
|
|
|
|
|
|
|
like open() does: |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# create filehandle on the fly |
247
|
|
|
|
|
|
|
$enc = open_bom(my $fh, $filename, ':utf8'); |
248
|
|
|
|
|
|
|
$line = <$fh>; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
The filehandle will be cued up to read after the BOM. Unseekable files (e.g. |
251
|
|
|
|
|
|
|
fifos) will cause croaking, unless called in list context to catch spillage |
252
|
|
|
|
|
|
|
from the handle. Any spillage will be automatically decoded from the encoding, |
253
|
|
|
|
|
|
|
if found. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
e.g. |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# croak if my_socket is unseekable |
258
|
|
|
|
|
|
|
open_bom(FH, 'my_socket'); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# keep spillage if my_socket is unseekable |
261
|
|
|
|
|
|
|
($encoding, $spillage) = open_bom(FH, 'my_socket'); |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# discard any spillage from open_bom |
264
|
|
|
|
|
|
|
($encoding) = open_bom(FH, 'my_socket'); |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=cut |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
sub open_bom (*$;$) { |
269
|
120
|
|
|
120
|
1
|
8779578
|
my($fh, $filename, $mode) = @_; |
270
|
120
|
100
|
|
|
|
824
|
if (defined $fh) { |
271
|
72
|
|
|
|
|
336
|
$fh = qualify_to_ref($fh, caller); |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
else { |
274
|
48
|
|
|
|
|
1673
|
$fh = $_[0] = gensym(); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
120
|
|
|
|
|
6263
|
my $enc; |
278
|
120
|
|
|
|
|
760
|
my $spill = ''; |
279
|
|
|
|
|
|
|
|
280
|
120
|
100
|
|
|
|
12039
|
open($fh, '<', $filename) |
281
|
|
|
|
|
|
|
or croak "Couldn't read '$filename': $!"; |
282
|
|
|
|
|
|
|
|
283
|
111
|
100
|
|
|
|
674
|
if (wantarray) { |
284
|
38
|
|
|
|
|
773
|
($enc, $spill) = get_encoding_from_filehandle($fh); |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
else { |
287
|
73
|
|
|
|
|
246
|
$enc = get_encoding_from_filehandle($fh); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
111
|
100
|
|
|
|
314
|
if ($enc) { |
291
|
95
|
|
|
|
|
492
|
$mode = ":encoding($enc)"; |
292
|
|
|
|
|
|
|
|
293
|
95
|
100
|
|
|
|
713
|
$spill = decode($enc, $spill, FB_CROAK) if $spill; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
111
|
100
|
|
|
|
1731
|
if ($mode) { |
297
|
13
|
100
|
|
13
|
|
160
|
binmode($fh, $mode) |
|
13
|
|
|
|
|
28
|
|
|
13
|
|
|
|
|
134
|
|
|
96
|
|
|
|
|
2680
|
|
298
|
|
|
|
|
|
|
or croak "Couldn't set binmode of handle opened on '$filename' " |
299
|
|
|
|
|
|
|
. "to '$mode': $!"; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
110
|
100
|
|
|
|
21596
|
return wantarray ? ($enc, $spill) : $enc; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=head2 defuse |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
$enc = defuse(FH); |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
($enc, $spill) = defuse(FH); |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
FH should be a filehandle opened for reading, it will have the relevant encoding |
312
|
|
|
|
|
|
|
layer pushed onto it be binmode if a BOM is found. Spillage should be Unicode, |
313
|
|
|
|
|
|
|
not bytes. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Any uncaptured spillage will be silently lost. If the handle is unseekable, use |
316
|
|
|
|
|
|
|
list context to avoid data loss. |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
If no BOM is found, the mode will be unaffected. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=cut |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub defuse (*) { |
323
|
102
|
|
|
102
|
1
|
9490860
|
my $fh = qualify_to_ref(shift, caller); |
324
|
|
|
|
|
|
|
|
325
|
102
|
|
|
|
|
3178
|
my($enc, $spill) = get_encoding_from_filehandle($fh); |
326
|
|
|
|
|
|
|
|
327
|
102
|
100
|
|
|
|
332
|
if ($enc) { |
328
|
90
|
|
|
|
|
2616
|
binmode($fh, ":encoding($enc)"); |
329
|
90
|
100
|
|
|
|
8136
|
$spill = decode($enc, $spill, FB_CROAK) if $spill; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
102
|
100
|
|
|
|
1252
|
return wantarray ? ($enc, $spill) : $enc; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 decode_from_bom |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
$unicode_string = decode_from_bom($string, $default, $check) |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
($unicode_string, $encoding) = decode_from_bom($string, $default, $check) |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Reads a BOM from the beginning of $string, decodes $string (minus the BOM) and |
342
|
|
|
|
|
|
|
returns it to you as a perl unicode string. |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
if $string doesn't have a BOM, $default is used instead. |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
$check, if supplied, is passed to Encode::decode as the third argument. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
If there's no BOM and no default, the original string is returned and encoding |
349
|
|
|
|
|
|
|
is ''. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
See L |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=cut |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
sub decode_from_bom ($;$$) { |
356
|
219
|
|
|
219
|
1
|
208893
|
my($string, $default, $check) = @_; |
357
|
|
|
|
|
|
|
|
358
|
219
|
100
|
|
|
|
645
|
croak "No string" unless defined $string; |
359
|
|
|
|
|
|
|
|
360
|
216
|
|
|
|
|
456
|
my($enc, $off) = get_encoding_from_bom($string); |
361
|
216
|
|
100
|
|
|
624
|
$enc ||= $default; |
362
|
|
|
|
|
|
|
|
363
|
216
|
|
|
|
|
312
|
my $out; |
364
|
216
|
100
|
|
|
|
480
|
if (defined $enc) { |
365
|
204
|
|
|
|
|
684
|
$out = decode($enc, substr($string, $off), $check); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
else { |
368
|
12
|
|
|
|
|
48
|
$out = $string; |
369
|
12
|
|
|
|
|
36
|
$enc = ''; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
216
|
100
|
|
|
|
11112
|
return wantarray ? ($out, $enc) : $out; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=head2 get_encoding_from_filehandle |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
$encoding = get_encoding_from_filehandle(HANDLE) |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
($encoding, $spillage) = get_encoding_from_filehandle(HANDLE) |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Returns the encoding found in the given filehandle. |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
The handle should be opened in a non-unicode way (e.g. mode '<:bytes') so that |
384
|
|
|
|
|
|
|
the BOM can be read in its natural state. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
After calling, the handle will be set to read at a point after the BOM (or at |
387
|
|
|
|
|
|
|
the beginning of the file if no BOM was found) |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
If called in scalar context, unseekable handles cause a croak(). |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
If called in list context, unseekable handles will be read byte-by-byte and any |
392
|
|
|
|
|
|
|
spillage will be returned. See get_encoding_from_stream() |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=cut |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub get_encoding_from_filehandle (*) { |
397
|
303
|
|
|
303
|
1
|
96758
|
my $fh = qualify_to_ref(shift, caller); |
398
|
|
|
|
|
|
|
|
399
|
303
|
|
|
|
|
5039
|
my $enc; |
400
|
303
|
|
|
|
|
972
|
my $spill = ''; |
401
|
303
|
100
|
|
|
|
3712
|
if (seek($fh, 0, SEEK_SET)) { |
|
|
100
|
|
|
|
|
|
402
|
234
|
|
|
|
|
753
|
$enc = _get_encoding_seekable($fh); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
elsif (wantarray) { |
405
|
67
|
|
|
|
|
1429
|
($enc, $spill) = _get_encoding_unseekable($fh); |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
else { |
408
|
2
|
|
|
|
|
40
|
croak "Unseekable handle: $!"; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
301
|
100
|
|
|
|
1599
|
return wantarray ? ($enc, $spill) : $enc; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 get_encoding_from_stream |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
($encoding, $spillage) = get_encoding_from_stream(*FH); |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Read a BOM from an unrewindable source. This means reading the stream one byte |
419
|
|
|
|
|
|
|
at a time until either a BOM is found or every possible BOM is ruled out. Any |
420
|
|
|
|
|
|
|
non-BOM bytes read from the handle will be returned in $spillage. |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
If a BOM is found and the spillage contains a partial character (judging by the |
423
|
|
|
|
|
|
|
expected character width for the encoding) more bytes will be read from the |
424
|
|
|
|
|
|
|
handle to ensure that a complete character is returned. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
Spillage is always in bytes, not characters. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
This function is less efficient than get_encoding_from_filehandle, but should |
429
|
|
|
|
|
|
|
work just as well on a seekable handle as on an unseekable one. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=cut |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
sub get_encoding_from_stream (*) { |
434
|
72
|
|
|
72
|
1
|
80424
|
my $fh = qualify_to_ref(shift, caller); |
435
|
|
|
|
|
|
|
|
436
|
72
|
|
|
|
|
1764
|
_get_encoding_unseekable($fh); |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# internal: |
440
|
|
|
|
|
|
|
# |
441
|
|
|
|
|
|
|
# Return encoding and seek to position after BOM |
442
|
|
|
|
|
|
|
sub _get_encoding_seekable (*) { |
443
|
239
|
|
|
239
|
|
9404
|
my $fh = shift; |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# This doesn't work on all platforms: |
446
|
|
|
|
|
|
|
# defined(read($fh, my $bom, $MAX_BOM_LENGTH)) |
447
|
|
|
|
|
|
|
# or croak "Couldn't read from handle: $!"; |
448
|
|
|
|
|
|
|
|
449
|
239
|
|
|
|
|
394
|
my $bom = eval { _safe_read($fh, $MAX_BOM_LENGTH) }; |
|
239
|
|
|
|
|
536
|
|
450
|
239
|
100
|
|
|
|
612
|
croak "Couldn't read from handle: $@" if $@; |
451
|
|
|
|
|
|
|
|
452
|
236
|
|
|
|
|
591
|
my($enc, $off) = get_encoding_from_bom($bom); |
453
|
|
|
|
|
|
|
|
454
|
236
|
100
|
|
|
|
3360
|
seek($fh, $off, SEEK_SET) or croak "Couldn't reset read position: $!"; |
455
|
|
|
|
|
|
|
|
456
|
234
|
|
|
|
|
792
|
return $enc; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# internal: |
460
|
|
|
|
|
|
|
# |
461
|
|
|
|
|
|
|
# Return encoding and non-BOM overspill |
462
|
|
|
|
|
|
|
sub _get_encoding_unseekable (*) { |
463
|
142
|
|
|
142
|
|
2752
|
my $fh = shift; |
464
|
|
|
|
|
|
|
|
465
|
142
|
|
|
|
|
728
|
my $so_far = ''; |
466
|
142
|
|
|
|
|
4065
|
for my $c (1 .. $MAX_BOM_LENGTH) { |
467
|
|
|
|
|
|
|
# defined(read($fh, my $byte, 1)) or croak "Couldn't read byte: $!"; |
468
|
406
|
|
|
|
|
5090
|
my $byte = eval { _safe_read($fh, 1) }; |
|
406
|
|
|
|
|
1230
|
|
469
|
406
|
100
|
|
|
|
915
|
croak "Couldn't read byte: $@" if $@; |
470
|
|
|
|
|
|
|
|
471
|
403
|
|
|
|
|
701
|
$so_far .= $byte; |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# find matching BOMs |
474
|
403
|
|
|
|
|
3112
|
my @possible = grep { $so_far eq substr($_, 0, $c) } keys %bom2enc; |
|
2015
|
|
|
|
|
16122
|
|
475
|
|
|
|
|
|
|
|
476
|
403
|
100
|
100
|
|
|
4279
|
if (@possible == 1 and my $enc = $bom2enc{$so_far}) { |
|
|
100
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# There's only one match, this must be it |
478
|
96
|
|
|
|
|
2296
|
return ($enc, ''); |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
elsif (@possible == 0) { |
481
|
|
|
|
|
|
|
# might need to backtrack one byte |
482
|
43
|
|
|
|
|
300
|
my $spill = chop $so_far; |
483
|
|
|
|
|
|
|
|
484
|
43
|
100
|
|
|
|
657
|
if (my $enc = $bom2enc{$so_far}) { |
485
|
29
|
|
|
|
|
505
|
my $char_length = _get_char_length($enc, $spill); |
486
|
|
|
|
|
|
|
|
487
|
29
|
|
|
|
|
79
|
my $extra = eval { |
488
|
29
|
|
|
|
|
99
|
_safe_read($fh, $char_length - length $spill); |
489
|
|
|
|
|
|
|
}; |
490
|
29
|
50
|
|
|
|
102
|
croak "Coudln't read byte: $@" if $@; |
491
|
29
|
|
|
|
|
50
|
$spill .= $extra; |
492
|
|
|
|
|
|
|
|
493
|
29
|
|
|
|
|
301
|
return ($enc, $spill); |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
else { |
496
|
|
|
|
|
|
|
# no BOM |
497
|
14
|
|
|
|
|
275
|
return ('', $so_far . $spill); |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub _safe_read { |
504
|
674
|
|
|
674
|
|
2120
|
my ($fh, $count) = @_; |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# read is supposed to return undef on error, but on some platforms it |
507
|
|
|
|
|
|
|
# seems to just return 0 and set $! |
508
|
674
|
|
|
|
|
6849
|
local $!; |
509
|
674
|
|
|
|
|
7981
|
my $status = read($fh, my $out, $count); |
510
|
|
|
|
|
|
|
|
511
|
674
|
100
|
100
|
|
|
2099
|
die $! if !$status && $!; |
512
|
|
|
|
|
|
|
|
513
|
668
|
|
|
|
|
3458
|
return $out; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=head2 get_encoding_from_bom |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
($encoding, $offset) = get_encoding_from_bom($string) |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Returns the encoding and length in bytes of the BOM in $string. |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
If there is no BOM, an empty string is returned and $offset is zero. |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
To get the data from the string, the following should work: |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
use Encode; |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
my($encoding, $offset) = get_encoding_from_bom($string); |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
if ($encoding) { |
531
|
|
|
|
|
|
|
$string = decode($encoding, substr($string, $offset)) |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=cut |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
sub get_encoding_from_bom ($) { |
537
|
524
|
|
|
524
|
1
|
1031
|
my $bom = shift; |
538
|
|
|
|
|
|
|
|
539
|
524
|
|
|
|
|
755
|
my $encoding = ''; |
540
|
524
|
|
|
|
|
646
|
my $offset = 0; |
541
|
|
|
|
|
|
|
|
542
|
524
|
100
|
|
|
|
1975
|
if (my($found) = $bom =~ $bom_re) { |
543
|
434
|
|
|
|
|
6064
|
$encoding = $bom2enc{$found}; |
544
|
434
|
|
|
|
|
3143
|
$offset = length($found); |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
524
|
|
|
|
|
2269
|
return ($encoding, $offset); |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
# Internal: |
551
|
|
|
|
|
|
|
# Work out character length for given encoding and spillage byte |
552
|
|
|
|
|
|
|
sub _get_char_length ($$) { |
553
|
34
|
|
|
34
|
|
304058
|
my($enc, $byte) = @_; |
554
|
|
|
|
|
|
|
|
555
|
34
|
100
|
|
|
|
829
|
if ($enc eq 'UTF-8') { |
|
|
100
|
|
|
|
|
|
556
|
3
|
100
|
|
|
|
31
|
if (($byte & 0x80) == 0) { |
557
|
1
|
|
|
|
|
15
|
return 1; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
else { |
560
|
2
|
|
|
|
|
9
|
my $length = 0; |
561
|
|
|
|
|
|
|
|
562
|
2
|
|
|
|
|
32
|
1 while (($byte << $length++) & 0xc0) == 0xc0; |
563
|
|
|
|
|
|
|
|
564
|
2
|
|
|
|
|
86
|
return $length; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
elsif ($enc =~ /^UTF-(16|32)/) { |
568
|
30
|
|
|
|
|
443
|
return $1 / 8; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
else { |
571
|
1
|
|
|
|
|
11
|
return; |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
=head1 PerlIO::via interface |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
File::BOM can be used as a PerlIO::via interface. |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
open(HANDLE, '<:via(File::BOM)', 'my_file.txt'); |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
open(HANDLE, '>:encoding(UTF-16LE):via(File::BOM)', 'out_file.txt'); |
582
|
|
|
|
|
|
|
print "foo\n"; # BOM is written to file here |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
This method is less prone to errors on non-seekable files as spillage is |
585
|
|
|
|
|
|
|
incorporated into an internal buffer, but it doesn't give you any information |
586
|
|
|
|
|
|
|
about the encoding being used, or indeed whether or not a BOM |
587
|
|
|
|
|
|
|
was present. |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
There are a few known problems with this interface, especially surrounding |
590
|
|
|
|
|
|
|
seek() and tell(), please see the BUGS section for more details about this. |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=head2 Reading |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
The via(File::BOM) layer must be added before the handle is read from, otherwise |
595
|
|
|
|
|
|
|
any BOM will be missed. If there is no BOM, no decoding will be done. |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
Because of a limitation in PerlIO::via, read() always works on bytes, not characters. BOM decoding will still be done but output will be bytes of UTF-8. |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
open(BOM, '<:via(File::BOM)', $file); |
600
|
|
|
|
|
|
|
$bytes_read = read(BOM, $buffer, $length); |
601
|
|
|
|
|
|
|
$unicode = decode('UTF-8', $buffer, Encode::FB_QUIET); |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# Now $unicode is valid unicode and $buffer contains any left-over bytes |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=head2 Writing |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
Add the via(File::BOM) layer on top of a unicode encoding layer to print a BOM |
608
|
|
|
|
|
|
|
at the start of the output file. This needs to be done before any data is |
609
|
|
|
|
|
|
|
written. The BOM is written as part of the first print command on the handle, so |
610
|
|
|
|
|
|
|
if you don't print anything to the handle, you won't get a BOM. |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
There is a "Wide character in print" warning generated when the via(File::BOM) |
613
|
|
|
|
|
|
|
layer doesn't receive utf8 on writing. This glitch was resolved in perl version |
614
|
|
|
|
|
|
|
5.8.7, but if your perl version is older than that, you'll need to make sure |
615
|
|
|
|
|
|
|
that the via(File::BOM) layer receives utf8 like this: |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
# This works OK |
618
|
|
|
|
|
|
|
open(FH, '>:encoding(UTF-16LE):via(File::BOM):utf8', $filename) |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# This generates warnings with older perls |
621
|
|
|
|
|
|
|
open(FH, '>:encoding(UTF-16LE):via(File::BOM)', $filename) |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=head2 Seeking |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
Seeking with SEEK_SET results in an offset equal to the length of any detected |
626
|
|
|
|
|
|
|
BOM being applied to the position parameter. Thus: |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# Seek to end of BOM (not start of file!) |
629
|
|
|
|
|
|
|
seek(FILE_BOM_HANDLE, 0, SEEK_SET) |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=head2 Telling |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
In order to work correctly with seek(), tell() also returns a postion adjusted |
634
|
|
|
|
|
|
|
by the length of the BOM. |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=cut |
637
|
|
|
|
|
|
|
|
638
|
28
|
50
|
|
28
|
0
|
12527
|
sub PUSHED { bless({offset => 0}, $_[0]) || -1 } |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
sub UTF8 { |
641
|
|
|
|
|
|
|
# There is a bug with this method previous to 5.8.7 |
642
|
|
|
|
|
|
|
|
643
|
28
|
50
|
|
28
|
0
|
85
|
if ($] >= 5.008007) { |
644
|
28
|
|
|
|
|
1329
|
return 1; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
else { |
647
|
0
|
|
|
|
|
0
|
return 0; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
sub FILL { |
652
|
28
|
|
|
28
|
0
|
4742
|
my($self, $fh) = @_; |
653
|
|
|
|
|
|
|
|
654
|
28
|
|
|
|
|
43
|
my $line; |
655
|
28
|
100
|
|
|
|
71
|
if (not defined $self->{enc}) { |
656
|
15
|
|
|
|
|
37
|
($self->{enc}, my $spill) = get_encoding_from_filehandle($fh); |
657
|
|
|
|
|
|
|
|
658
|
15
|
100
|
|
|
|
44
|
if ($self->{enc} ne '') { |
659
|
14
|
|
|
1
|
|
188
|
binmode($fh, ":encoding($self->{enc})"); |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
660
|
14
|
50
|
|
|
|
1537
|
$line .= decode($self->{enc}, $spill, FB_CROAK) if $spill; |
661
|
|
|
|
|
|
|
|
662
|
14
|
|
|
|
|
69
|
$self->{offset} = length $enc2bom{$self->{enc}}; |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
|
665
|
15
|
|
|
|
|
353
|
$line .= <$fh>; |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
else { |
668
|
13
|
|
|
|
|
62
|
$line = <$fh>; |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
28
|
|
|
|
|
192
|
return $line; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
sub WRITE { |
675
|
21
|
|
|
21
|
|
5758
|
my($self, $buf, $fh) = @_; |
676
|
|
|
|
|
|
|
|
677
|
21
|
100
|
66
|
|
|
388
|
if (tell $fh == 0 and not $self->{wrote_bom}) { |
678
|
13
|
|
|
|
|
68
|
print $fh "\x{feff}"; |
679
|
13
|
|
|
|
|
26
|
$self->{wrote_bom} = 1; |
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
|
682
|
21
|
|
|
|
|
81
|
$buf = decode('UTF-8', $buf, FB_CROAK); |
683
|
|
|
|
|
|
|
|
684
|
21
|
|
|
|
|
1154
|
print $fh $buf; |
685
|
|
|
|
|
|
|
|
686
|
21
|
|
|
|
|
74
|
return 1; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
28
|
|
|
28
|
0
|
12482
|
sub FLUSH { 0 } |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
sub SEEK { |
692
|
2
|
|
|
2
|
|
13
|
my $self = shift; |
693
|
|
|
|
|
|
|
|
694
|
2
|
|
|
|
|
5
|
my($pos, $whence, $fh) = @_; |
695
|
|
|
|
|
|
|
|
696
|
2
|
50
|
|
|
|
6
|
if ($whence == SEEK_SET) { |
697
|
2
|
|
|
|
|
6
|
$pos += $self->{offset}; |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
2
|
50
|
|
|
|
24
|
if (seek($fh, $pos, $whence)) { |
701
|
2
|
|
|
|
|
9
|
return 0; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
else { |
704
|
0
|
|
|
|
|
0
|
return -1; |
705
|
|
|
|
|
|
|
} |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
sub TELL { |
709
|
1
|
|
|
1
|
|
12
|
my($self, $fh) = @_; |
710
|
|
|
|
|
|
|
|
711
|
1
|
|
|
|
|
7
|
my $pos = tell $fh; |
712
|
|
|
|
|
|
|
|
713
|
1
|
50
|
|
|
|
5
|
if ($pos == -1) { |
714
|
0
|
|
|
|
|
0
|
return -1; |
715
|
|
|
|
|
|
|
} |
716
|
|
|
|
|
|
|
else { |
717
|
1
|
|
|
|
|
4
|
return $pos - $self->{offset}; |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
1; |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
__END__ |