File Coverage

blib/lib/File/GetLineMaxLength.pm
Criterion Covered Total %
statement 32 33 96.9
branch 14 18 77.7
condition 10 14 71.4
subroutine 3 3 100.0
pod 2 2 100.0
total 61 70 87.1


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             package File::GetLineMaxLength;
4              
5             =head1 NAME
6              
7             File::GetLineMaxLength - Get lines from a file, up to a maximum line length
8              
9             =head1 SYNOPSIS
10              
11             use File::GetLineMaxLength;
12              
13             $FML = File::GetLineMaxLength->new(STDIN);
14              
15             # Read lines, up to 1024 chars
16             while (my $Line = $FML->getline(1024, $Excess)) {
17             }
18              
19             =head1 DESCRIPTION
20              
21             While generally reading lines of data is easy in perl (eg C$FhE>),
22             there's apparently no easy way to limit the read line to a maximum length
23             (as in the C call C). This can
24             lead to potential DOS situations in your code where an attacker can send
25             an arbitrarily large line and use up all your memory. Of course you can
26             use things like BSD::Resource to stop your program using all memory,
27             but that just kills off the process and gives you no more information
28             about what was causing the problem.
29              
30             This question was raised on perlmonks, and the general
31             response seemed to be "roll your own using the C
32             call." L
33              
34             This module basically does that, but makes it reusable, so you can wrap
35             any handle and get line length limited IO.
36              
37             =head1 IMPLEMENTATION
38              
39             It basically creates an internal buffer, and uses C to read up to
40             4096 bytes at a time, looking for the appropriate EOL marker. When found,
41             it returns the line and leaves the remaining data in the internal buffer
42             for the next call.
43              
44             Because of this internal buffering, you should NOT mix calling
45             C via this class and any other standard IO calls on the file
46             handle you passed to C, you'll get surprising results.
47              
48             =head1 PERFORMANCE
49              
50             The code tries to be pretty careful performance wise (single buffer,
51             no copying, use index to find EOL), but because it's
52             perl, a tight loop is still an order of magnitude slower.
53              
54             For instance, just a loop reading a file with 10,000 50 char or so
55             lines, 100 times:
56              
57             read: 0.588507
58             glml read: 4.654946
59              
60             However, if you do any work in the loop at all, that time difference
61             becomes quite a bit less.
62              
63             Same as above, but do C<@_ = split / /> in the loop
64              
65             read: 8.688189
66             glml read: 12.529909
67              
68             So basically any "work" you do will probably easily swamp the read time
69              
70             =cut
71              
72             # Use modules {{{
73             our $VERSION = '1.01';
74              
75 2     2   49715 use strict;
  2         6  
  2         905  
76             # }}}
77              
78             =head1 METHODS
79              
80             =over 4
81             =cut
82              
83             =item I
84              
85             Wrap handle and return object which you can call C on.
86              
87             Note: See above about not calling any other IO calls on the passed handle
88             after you pass it to this C call.
89              
90             =cut
91             sub new {
92 111     111 1 579933 my $Proto = shift;
93 111   33     2044 my $Class = ref($Proto) || $Proto;
94              
95 111 50 33     900 @_ >= 1 && @_ <= 2
96             || die "Must call $Class->new(HANDLE)";
97              
98 111         919 my $Self = bless { }, $Class;
99              
100             # Save file handle
101 111         396 my $Fd = $Self->{Fd} = shift;
102             # Get current EOL chars for this file handle
103 111         731 my $ofh = select($Fd); $Self->{EOL} = $/; select ($ofh);
  111         447  
  111         843  
104             # Initialise empty read buffer
105 111         496 $Self->{Buffer} = '';
106              
107 111   100     1581 $Self->{ReadSize} = int(shift || 0) || 4096;
108              
109 111         340 return $Self;
110             }
111              
112             =item I
113              
114             Get a line of data from the file handle, up to $max_length
115             bytes long. If no $max_length passed, works just like
116             standard perl <$fh>. If the $was_long_line variable is passed,
117             it's set to 0 or 1 depending on whether the line was
118             very long and has been truncated.
119              
120             Note: Actually this might return up to $maxlength + length(EOL)
121             chars as the EOL chars are not considered part of the line
122             length. The current EOL chars for the file handle are gotten
123             via $/ when you called C above
124              
125             =cut
126             sub getline {
127 1000570     1000570 1 3135253 my ($Self, $MaxLength) = (shift, shift);
128              
129             # Get EOL char and reference to current line buffer
130 1000570         1298781 my $EOL = $Self->{EOL};
131 1000570         1234333 my $Buffer = \$Self->{Buffer};
132              
133             # Reset "line was long" marker if passed
134 1000570 100       1791797 $_[0] = 0 if @_;
135              
136 1000570         888316 while (1) {
137             # Search for EOL chars in current buffer
138 1008648         1241475 my $FoundLineLen = index($$Buffer, $EOL);
139              
140             # If EOL found...
141 1008648 100       1375500 if ($FoundLineLen != -1) {
142              
143             # If no maxlen, or line is <= max length, just rip from buffer and return it
144 1000452 100 100     1982879 if (!$MaxLength || $FoundLineLen <= $MaxLength) {
145 1000264         2506183 return substr($$Buffer, 0, $FoundLineLen + length($EOL), '');
146              
147             # Otherwise, set $was_long_line param, and return up to max length chars
148             } else {
149 188 50       416 $_[0] = 1 if @_;
150 188         780 return substr($$Buffer, 0, $MaxLength, '');
151             }
152              
153             # No EOL found...
154             } else {
155              
156             # Already > max length chars available, just return it
157 8196 100 100     19360 if ($MaxLength && length($$Buffer) > $MaxLength + length($EOL)) {
158 6 50       19 $_[0] = 1 if @_;
159 6         22 return substr($$Buffer, 0, $MaxLength, '');
160             }
161             }
162              
163             # Otherwise grab more data and add to buffer
164 8190         75241 my $BytesRead = read($Self->{Fd}, $$Buffer, $Self->{ReadSize}, length($$Buffer));
165 8190 50       16567 defined($BytesRead) || die "getline failed: $!";
166              
167             # Reached EOF? Just return remnants from buffer
168 8190 100       19899 if ($BytesRead == 0) {
169 112         558 return substr($$Buffer, 0, length($$Buffer), '');
170             }
171              
172             }
173              
174 0           die "Unexpected exit from loop";
175             }
176              
177             =back
178             =cut
179              
180             =head1 SEE ALSO
181              
182             L, L
183              
184             Latest news/details can also be found at:
185              
186             http://cpan.robm.fastmail.fm/filegetlinemaxlength/
187              
188             =cut
189              
190             =head1 AUTHOR
191              
192             Rob Mueller Ecpan@robm.fastmail.fmE.
193              
194             =cut
195              
196             =head1 COPYRIGHT AND LICENSE
197              
198             Copyright (C) 2004-2007 by FastMail IP Partners
199              
200             This library is free software; you can redistribute it and/or modify
201             it under the same terms as Perl itself.
202              
203             =cut
204