| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package File::GetLineMaxLength; |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
File::GetLineMaxLength - Get lines from a file, up to a maximum line length |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use File::GetLineMaxLength; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$FML = File::GetLineMaxLength->new(STDIN); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Read lines, up to 1024 chars |
|
16
|
|
|
|
|
|
|
while (my $Line = $FML->getline(1024, $Excess)) { |
|
17
|
|
|
|
|
|
|
} |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
While generally reading lines of data is easy in perl (eg C$FhE>), |
|
22
|
|
|
|
|
|
|
there's apparently no easy way to limit the read line to a maximum length |
|
23
|
|
|
|
|
|
|
(as in the C call C). This can |
|
24
|
|
|
|
|
|
|
lead to potential DOS situations in your code where an attacker can send |
|
25
|
|
|
|
|
|
|
an arbitrarily large line and use up all your memory. Of course you can |
|
26
|
|
|
|
|
|
|
use things like BSD::Resource to stop your program using all memory, |
|
27
|
|
|
|
|
|
|
but that just kills off the process and gives you no more information |
|
28
|
|
|
|
|
|
|
about what was causing the problem. |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This question was raised on perlmonks, and the general |
|
31
|
|
|
|
|
|
|
response seemed to be "roll your own using the C |
|
32
|
|
|
|
|
|
|
call." L |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
This module basically does that, but makes it reusable, so you can wrap |
|
35
|
|
|
|
|
|
|
any handle and get line length limited IO. |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 IMPLEMENTATION |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
It basically creates an internal buffer, and uses C to read up to |
|
40
|
|
|
|
|
|
|
4096 bytes at a time, looking for the appropriate EOL marker. When found, |
|
41
|
|
|
|
|
|
|
it returns the line and leaves the remaining data in the internal buffer |
|
42
|
|
|
|
|
|
|
for the next call. |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Because of this internal buffering, you should NOT mix calling |
|
45
|
|
|
|
|
|
|
C via this class and any other standard IO calls on the file |
|
46
|
|
|
|
|
|
|
handle you passed to C, you'll get surprising results. |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 PERFORMANCE |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
The code tries to be pretty careful performance wise (single buffer, |
|
51
|
|
|
|
|
|
|
no copying, use index to find EOL), but because it's |
|
52
|
|
|
|
|
|
|
perl, a tight loop is still an order of magnitude slower. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
For instance, just a loop reading a file with 10,000 50 char or so |
|
55
|
|
|
|
|
|
|
lines, 100 times: |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
read: 0.588507 |
|
58
|
|
|
|
|
|
|
glml read: 4.654946 |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
However, if you do any work in the loop at all, that time difference |
|
61
|
|
|
|
|
|
|
becomes quite a bit less. |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Same as above, but do C<@_ = split / /> in the loop |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
read: 8.688189 |
|
66
|
|
|
|
|
|
|
glml read: 12.529909 |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
So basically any "work" you do will probably easily swamp the read time |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=cut |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Use modules {{{ |
|
73
|
|
|
|
|
|
|
our $VERSION = '1.01'; |
|
74
|
|
|
|
|
|
|
|
|
75
|
2
|
|
|
2
|
|
49715
|
use strict; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
905
|
|
|
76
|
|
|
|
|
|
|
# }}} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 METHODS |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=over 4 |
|
81
|
|
|
|
|
|
|
=cut |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=item I |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Wrap handle and return object which you can call C on. |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Note: See above about not calling any other IO calls on the passed handle |
|
88
|
|
|
|
|
|
|
after you pass it to this C call. |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=cut |
|
91
|
|
|
|
|
|
|
sub new { |
|
92
|
111
|
|
|
111
|
1
|
579933
|
my $Proto = shift; |
|
93
|
111
|
|
33
|
|
|
2044
|
my $Class = ref($Proto) || $Proto; |
|
94
|
|
|
|
|
|
|
|
|
95
|
111
|
50
|
33
|
|
|
900
|
@_ >= 1 && @_ <= 2 |
|
96
|
|
|
|
|
|
|
|| die "Must call $Class->new(HANDLE)"; |
|
97
|
|
|
|
|
|
|
|
|
98
|
111
|
|
|
|
|
919
|
my $Self = bless { }, $Class; |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Save file handle |
|
101
|
111
|
|
|
|
|
396
|
my $Fd = $Self->{Fd} = shift; |
|
102
|
|
|
|
|
|
|
# Get current EOL chars for this file handle |
|
103
|
111
|
|
|
|
|
731
|
my $ofh = select($Fd); $Self->{EOL} = $/; select ($ofh); |
|
|
111
|
|
|
|
|
447
|
|
|
|
111
|
|
|
|
|
843
|
|
|
104
|
|
|
|
|
|
|
# Initialise empty read buffer |
|
105
|
111
|
|
|
|
|
496
|
$Self->{Buffer} = ''; |
|
106
|
|
|
|
|
|
|
|
|
107
|
111
|
|
100
|
|
|
1581
|
$Self->{ReadSize} = int(shift || 0) || 4096; |
|
108
|
|
|
|
|
|
|
|
|
109
|
111
|
|
|
|
|
340
|
return $Self; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=item I |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Get a line of data from the file handle, up to $max_length |
|
115
|
|
|
|
|
|
|
bytes long. If no $max_length passed, works just like |
|
116
|
|
|
|
|
|
|
standard perl <$fh>. If the $was_long_line variable is passed, |
|
117
|
|
|
|
|
|
|
it's set to 0 or 1 depending on whether the line was |
|
118
|
|
|
|
|
|
|
very long and has been truncated. |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Note: Actually this might return up to $maxlength + length(EOL) |
|
121
|
|
|
|
|
|
|
chars as the EOL chars are not considered part of the line |
|
122
|
|
|
|
|
|
|
length. The current EOL chars for the file handle are gotten |
|
123
|
|
|
|
|
|
|
via $/ when you called C above |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
|
126
|
|
|
|
|
|
|
sub getline { |
|
127
|
1000570
|
|
|
1000570
|
1
|
3135253
|
my ($Self, $MaxLength) = (shift, shift); |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Get EOL char and reference to current line buffer |
|
130
|
1000570
|
|
|
|
|
1298781
|
my $EOL = $Self->{EOL}; |
|
131
|
1000570
|
|
|
|
|
1234333
|
my $Buffer = \$Self->{Buffer}; |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Reset "line was long" marker if passed |
|
134
|
1000570
|
100
|
|
|
|
1791797
|
$_[0] = 0 if @_; |
|
135
|
|
|
|
|
|
|
|
|
136
|
1000570
|
|
|
|
|
888316
|
while (1) { |
|
137
|
|
|
|
|
|
|
# Search for EOL chars in current buffer |
|
138
|
1008648
|
|
|
|
|
1241475
|
my $FoundLineLen = index($$Buffer, $EOL); |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# If EOL found... |
|
141
|
1008648
|
100
|
|
|
|
1375500
|
if ($FoundLineLen != -1) { |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# If no maxlen, or line is <= max length, just rip from buffer and return it |
|
144
|
1000452
|
100
|
100
|
|
|
1982879
|
if (!$MaxLength || $FoundLineLen <= $MaxLength) { |
|
145
|
1000264
|
|
|
|
|
2506183
|
return substr($$Buffer, 0, $FoundLineLen + length($EOL), ''); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Otherwise, set $was_long_line param, and return up to max length chars |
|
148
|
|
|
|
|
|
|
} else { |
|
149
|
188
|
50
|
|
|
|
416
|
$_[0] = 1 if @_; |
|
150
|
188
|
|
|
|
|
780
|
return substr($$Buffer, 0, $MaxLength, ''); |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# No EOL found... |
|
154
|
|
|
|
|
|
|
} else { |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# Already > max length chars available, just return it |
|
157
|
8196
|
100
|
100
|
|
|
19360
|
if ($MaxLength && length($$Buffer) > $MaxLength + length($EOL)) { |
|
158
|
6
|
50
|
|
|
|
19
|
$_[0] = 1 if @_; |
|
159
|
6
|
|
|
|
|
22
|
return substr($$Buffer, 0, $MaxLength, ''); |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# Otherwise grab more data and add to buffer |
|
164
|
8190
|
|
|
|
|
75241
|
my $BytesRead = read($Self->{Fd}, $$Buffer, $Self->{ReadSize}, length($$Buffer)); |
|
165
|
8190
|
50
|
|
|
|
16567
|
defined($BytesRead) || die "getline failed: $!"; |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Reached EOF? Just return remnants from buffer |
|
168
|
8190
|
100
|
|
|
|
19899
|
if ($BytesRead == 0) { |
|
169
|
112
|
|
|
|
|
558
|
return substr($$Buffer, 0, length($$Buffer), ''); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
0
|
|
|
|
|
|
die "Unexpected exit from loop"; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=back |
|
178
|
|
|
|
|
|
|
=cut |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
L, L |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
Latest news/details can also be found at: |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
http://cpan.robm.fastmail.fm/filegetlinemaxlength/ |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head1 AUTHOR |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Rob Mueller Ecpan@robm.fastmail.fmE. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=cut |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Copyright (C) 2004-2007 by FastMail IP Partners |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
201
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
|
204
|
|
|
|
|
|
|
|