line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package File::GetLineMaxLength; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
File::GetLineMaxLength - Get lines from a file, up to a maximum line length |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use File::GetLineMaxLength; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$FML = File::GetLineMaxLength->new(STDIN); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Read lines, up to 1024 chars |
16
|
|
|
|
|
|
|
while (my $Line = $FML->getline(1024, $Excess)) { |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 DESCRIPTION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
While generally reading lines of data is easy in perl (eg C$FhE>), |
22
|
|
|
|
|
|
|
there's apparently no easy way to limit the read line to a maximum length |
23
|
|
|
|
|
|
|
(as in the C call C). This can |
24
|
|
|
|
|
|
|
lead to potential DOS situations in your code where an attacker can send |
25
|
|
|
|
|
|
|
an arbitrarily large line and use up all your memory. Of course you can |
26
|
|
|
|
|
|
|
use things like BSD::Resource to stop your program using all memory, |
27
|
|
|
|
|
|
|
but that just kills off the process and gives you no more information |
28
|
|
|
|
|
|
|
about what was causing the problem. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This question was raised on perlmonks, and the general |
31
|
|
|
|
|
|
|
response seemed to be "roll your own using the C |
32
|
|
|
|
|
|
|
call." L |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
This module basically does that, but makes it reusable, so you can wrap |
35
|
|
|
|
|
|
|
any handle and get line length limited IO. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 IMPLEMENTATION |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
It basically creates an internal buffer, and uses C to read up to |
40
|
|
|
|
|
|
|
4096 bytes at a time, looking for the appropriate EOL marker. When found, |
41
|
|
|
|
|
|
|
it returns the line and leaves the remaining data in the internal buffer |
42
|
|
|
|
|
|
|
for the next call. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Because of this internal buffering, you should NOT mix calling |
45
|
|
|
|
|
|
|
C via this class and any other standard IO calls on the file |
46
|
|
|
|
|
|
|
handle you passed to C, you'll get surprising results. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 PERFORMANCE |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
The code tries to be pretty careful performance wise (single buffer, |
51
|
|
|
|
|
|
|
no copying, use index to find EOL), but because it's |
52
|
|
|
|
|
|
|
perl, a tight loop is still an order of magnitude slower. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
For instance, just a loop reading a file with 10,000 50 char or so |
55
|
|
|
|
|
|
|
lines, 100 times: |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
read: 0.588507 |
58
|
|
|
|
|
|
|
glml read: 4.654946 |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
However, if you do any work in the loop at all, that time difference |
61
|
|
|
|
|
|
|
becomes quite a bit less. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Same as above, but do C<@_ = split / /> in the loop |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
read: 8.688189 |
66
|
|
|
|
|
|
|
glml read: 12.529909 |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
So basically any "work" you do will probably easily swamp the read time |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=cut |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Use modules {{{ |
73
|
|
|
|
|
|
|
our $VERSION = '1.01'; |
74
|
|
|
|
|
|
|
|
75
|
2
|
|
|
2
|
|
49715
|
use strict; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
905
|
|
76
|
|
|
|
|
|
|
# }}} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 METHODS |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=over 4 |
81
|
|
|
|
|
|
|
=cut |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item I |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Wrap handle and return object which you can call C on. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Note: See above about not calling any other IO calls on the passed handle |
88
|
|
|
|
|
|
|
after you pass it to this C call. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
91
|
|
|
|
|
|
|
sub new { |
92
|
111
|
|
|
111
|
1
|
579933
|
my $Proto = shift; |
93
|
111
|
|
33
|
|
|
2044
|
my $Class = ref($Proto) || $Proto; |
94
|
|
|
|
|
|
|
|
95
|
111
|
50
|
33
|
|
|
900
|
@_ >= 1 && @_ <= 2 |
96
|
|
|
|
|
|
|
|| die "Must call $Class->new(HANDLE)"; |
97
|
|
|
|
|
|
|
|
98
|
111
|
|
|
|
|
919
|
my $Self = bless { }, $Class; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Save file handle |
101
|
111
|
|
|
|
|
396
|
my $Fd = $Self->{Fd} = shift; |
102
|
|
|
|
|
|
|
# Get current EOL chars for this file handle |
103
|
111
|
|
|
|
|
731
|
my $ofh = select($Fd); $Self->{EOL} = $/; select ($ofh); |
|
111
|
|
|
|
|
447
|
|
|
111
|
|
|
|
|
843
|
|
104
|
|
|
|
|
|
|
# Initialise empty read buffer |
105
|
111
|
|
|
|
|
496
|
$Self->{Buffer} = ''; |
106
|
|
|
|
|
|
|
|
107
|
111
|
|
100
|
|
|
1581
|
$Self->{ReadSize} = int(shift || 0) || 4096; |
108
|
|
|
|
|
|
|
|
109
|
111
|
|
|
|
|
340
|
return $Self; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=item I |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Get a line of data from the file handle, up to $max_length |
115
|
|
|
|
|
|
|
bytes long. If no $max_length passed, works just like |
116
|
|
|
|
|
|
|
standard perl <$fh>. If the $was_long_line variable is passed, |
117
|
|
|
|
|
|
|
it's set to 0 or 1 depending on whether the line was |
118
|
|
|
|
|
|
|
very long and has been truncated. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Note: Actually this might return up to $maxlength + length(EOL) |
121
|
|
|
|
|
|
|
chars as the EOL chars are not considered part of the line |
122
|
|
|
|
|
|
|
length. The current EOL chars for the file handle are gotten |
123
|
|
|
|
|
|
|
via $/ when you called C above |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
126
|
|
|
|
|
|
|
sub getline { |
127
|
1000570
|
|
|
1000570
|
1
|
3135253
|
my ($Self, $MaxLength) = (shift, shift); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Get EOL char and reference to current line buffer |
130
|
1000570
|
|
|
|
|
1298781
|
my $EOL = $Self->{EOL}; |
131
|
1000570
|
|
|
|
|
1234333
|
my $Buffer = \$Self->{Buffer}; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Reset "line was long" marker if passed |
134
|
1000570
|
100
|
|
|
|
1791797
|
$_[0] = 0 if @_; |
135
|
|
|
|
|
|
|
|
136
|
1000570
|
|
|
|
|
888316
|
while (1) { |
137
|
|
|
|
|
|
|
# Search for EOL chars in current buffer |
138
|
1008648
|
|
|
|
|
1241475
|
my $FoundLineLen = index($$Buffer, $EOL); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# If EOL found... |
141
|
1008648
|
100
|
|
|
|
1375500
|
if ($FoundLineLen != -1) { |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# If no maxlen, or line is <= max length, just rip from buffer and return it |
144
|
1000452
|
100
|
100
|
|
|
1982879
|
if (!$MaxLength || $FoundLineLen <= $MaxLength) { |
145
|
1000264
|
|
|
|
|
2506183
|
return substr($$Buffer, 0, $FoundLineLen + length($EOL), ''); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Otherwise, set $was_long_line param, and return up to max length chars |
148
|
|
|
|
|
|
|
} else { |
149
|
188
|
50
|
|
|
|
416
|
$_[0] = 1 if @_; |
150
|
188
|
|
|
|
|
780
|
return substr($$Buffer, 0, $MaxLength, ''); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# No EOL found... |
154
|
|
|
|
|
|
|
} else { |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# Already > max length chars available, just return it |
157
|
8196
|
100
|
100
|
|
|
19360
|
if ($MaxLength && length($$Buffer) > $MaxLength + length($EOL)) { |
158
|
6
|
50
|
|
|
|
19
|
$_[0] = 1 if @_; |
159
|
6
|
|
|
|
|
22
|
return substr($$Buffer, 0, $MaxLength, ''); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# Otherwise grab more data and add to buffer |
164
|
8190
|
|
|
|
|
75241
|
my $BytesRead = read($Self->{Fd}, $$Buffer, $Self->{ReadSize}, length($$Buffer)); |
165
|
8190
|
50
|
|
|
|
16567
|
defined($BytesRead) || die "getline failed: $!"; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Reached EOF? Just return remnants from buffer |
168
|
8190
|
100
|
|
|
|
19899
|
if ($BytesRead == 0) { |
169
|
112
|
|
|
|
|
558
|
return substr($$Buffer, 0, length($$Buffer), ''); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
0
|
|
|
|
|
|
die "Unexpected exit from loop"; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=back |
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 SEE ALSO |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
L, L |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Latest news/details can also be found at: |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
http://cpan.robm.fastmail.fm/filegetlinemaxlength/ |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head1 AUTHOR |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Rob Mueller Ecpan@robm.fastmail.fmE. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=cut |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Copyright (C) 2004-2007 by FastMail IP Partners |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
201
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|