File Coverage

lib/File/CountLines.pm

Criterion	Covered	Total	%
statement	53	53	100.0
branch	17	22	77.2
condition	3	6	50.0
subroutine	8	8	100.0
pod	0	1	0.0
total	81	90	90.0

line	stmt	bran	cond	sub	pod	time	code
1							package File::CountLines;
2	1			1		70550	use strict;
	1					2
	1					29
3	1			1		6	use warnings;
	1					2
	1					75
4
5							our $VERSION = '0.0.3';
6							our @EXPORT_OK = qw(count_lines);
7
8	1			1		5	use Exporter 5.057;
	1					36
	1					55
9							Exporter->import('import');
10
11	1			1		5	use Carp qw(croak);
	1					2
	1					82
12	1			1		1663	use charnames qw(:full);
	1					39552
	1					7
13
14							our %StyleMap = (
15							'cr' => "\N{CARRIAGE RETURN}",
16							'lf' => "\N{LINE FEED}",
17							'crlf' => "\N{CARRIAGE RETURN}\N{LINE FEED}",
18							'native' => "\n",
19							);
20
21							our $BlockSize = 4096;
22
23							sub count_lines {
24	21			21	0	9691	my $filename = shift;
25	21	100				308	croak 'expected filename in call to count_lines()'
26							unless defined $filename;
27	19					54	my %options = @_;
28	19					29	my $sep = $options{separator};
29	19	100				43	unless (defined $sep) {
30	9	100				44	my $style = exists $options{style} ? $options{style} : 'native';
31	9					21	$sep = $StyleMap{$style};
32	9	50				25	die "Don't know how to map style '$style'" unless defined $sep;
33							}
34	19	100				53	if (length($sep) > 1) {
35	6		66			57	return _cl_sysread_multiple_chars(
36							$filename,
37							$sep,
38							$options{blocksize} \|\| $BlockSize,
39							);
40							} else {
41	13		33			68	return _cl_sysread_one_char(
42							$filename,
43							$sep,
44							$options{blocksize} \|\| $BlockSize,
45							);
46							}
47							}
48
49							sub _cl_sysread_one_char {
50	13			13		25	my ($filename, $sep, $blocksize) = @_;
51	13					21	local $Carp::CarpLevel = 1;
52	13	100				1062	open my $handle, '<:raw', $filename
53							or croak "Can't open file `$filename' for reading: $!";
54	12					26	binmode $handle;
55	12					18	my $lines = 0;
56	12					43	$sep =~ s/([\\{}])/\\$1/g;
57							# need eval here because tr/// doesn't interpolate
58	12					13	my $sysread_status;
59	12					2061	eval qq[
60							while (\$sysread_status = sysread \$handle, my \$buffer, $blocksize) {
61							\$lines += (\$buffer =~ tr{$sep}{});
62							}
63							];
64	12	50				98	die "Can't sysread() from file `$filename': $!"
65							unless defined ($sysread_status);
66	12	50				131	close $handle or croak "Can't close file `$filename': $!";
67	12					105	return $lines;
68							}
69
70							sub _cl_sysread_multiple_chars {
71	6			6		38	my ($filename, $sep, $blocksize) = @_;
72	6					11	local $Carp::CarpLevel = 1;
73	6	100				582	open my $handle, '<:raw', $filename
74							or croak "Can't open file `$filename' for reading: $!";
75	5					11	binmode $handle;
76	5					8	my $len = length($sep);
77	5					8	my $lines = 0;
78	5					8	my $buffer = '';
79	5					6	my $sysread_status;
80	5					45	while ($sysread_status = sysread $handle, $buffer, $blocksize, length($buffer)) {
81	10					17	my $offset = -$len;
82	10					30	while (-1 != ($offset = index $buffer, $sep, $offset + $len)) {
83	13					30	$lines++;
84							}
85							# we assume $len >= 2; otherwise use _cl_sysread_one_char()
86	10					66	$buffer = substr $buffer, 1 - $len;
87							}
88	5	50				12	die "Can't sysread() from file `$filename': $!"
89							unless defined ($sysread_status);
90	5	50				54	close $handle or croak "Can't close file `$filename': $!";
91	5					46	return $lines;
92							}
93
94							1;
95
96							__END__
97
98							=head1 NAME
99
100							File::CountLines - efficiently count the number of line breaks in a file.
101
102							=head1 SYNOPSIS
103
104							use File::CountLines qw(count_lines);
105							my $no_of_lines = count_lines('/etc/passwd');
106
107							# other uses
108							my $carriage_returns = count_lines(
109							'path/to/file.txt',
110							style => 'cr',
111							);
112							# possible styles are 'native' (the default), 'cr', 'lf'
113
114							=head1 DESCRIPTION
115
116							L<perlfaq5> answers the question on how to count the number of lines
117							in a file. This module is a convenient wrapper around that method, with
118							additional options.
119
120							More specifically, it counts the number of I<line breaks> rather than lines.
121							On Unix systems nearlly all text files end with a newline (by convention), so
122							usually the number of lines and number of line breaks is equal.
123
124							Since different operating systems have different ideas of what a newline is,
125							you can specifiy a C<style> option, which can be one of the following values:
126
127							=over
128
129							=item C<native>
130
131							This takes Perl's C<\n> as the line separator, which should be the right thing in most cases. See L<perlport> for details. This is the default.
132
133							=item C<cr>
134
135							Take a carriage return as line separator (MacOS style)
136
137							=item C<lf>
138
139							Take a line feed as line separator (Unix style)
140
141							=item C<crlf>
142
143							Take a carriage return followed by a line feed as separator (Microsoft
144							Windows style)
145
146							=back
147
148							Alternatively you can specify an arbitrary separator like this:
149
150							my $lists = count_lines($file, separator => '\end{itemize}');
151
152							It is taken verbatim and searched for in the file.
153
154							The file is read in equally sized blocks. The size of the blocks
155							can be supplied with the C<blocksize> option. The default is 4096,
156							and can be changed by setting C<$File::CountLines::BlockSize>.
157
158							Do not use a block size smaller than the length of the separator, that
159							might produce wrong results. (In general there's no reason to chose a
160							smaller block size at all. Depending on your size a larger block size
161							might speed up things a bit.)
162
163							=head1 Character Encodings
164
165							If you supply a separator yourself, it should not be a decoded string.
166
167							The file is read in binary mode, which implies that this module
168							works fine for text files in ASCII-compatible encodings, including
169							ASCII itself, UTF-8 and all the ISO-8859-* encodings (aka Latin-1,
170							Latin-2, ...).
171
172							Note that the multi byte encodings like UTF-32, UTF-16le, UTF-16be
173							and UCS-2 encode a line feed character in a way that the C<0x0A> byte
174							is a substring of the encoded character, but if you search blindly for
175							that byte you will get false positives. For example the I<LATIN CAPITAL
176							LETTER C WITH DOT ABOVE>, U+010A has the byte sequence C<0x0A 0x01> when
177							encoded as UTF-16le, so it would be counted as a newline. Even search for
178							C<0x0A 0x00> might give false positives.
179
180							So the summary is that for now you can't use this module in a meaningful
181							way to count lines of text files in encodings that are not ASCII-compatible.
182							If there's demand for, I can implement that though.
183
184							=head1 Extending
185
186							You can add your own EOL styles by adding them to the
187							C<%File::CountLines::StyleMap> hash, with the name of the style as hash key
188							and the separator as the value.
189
190							=head1 AUTHOR
191
192							Moritz Lenz L<http://perlgeek.de>, L<mailto:moritz@faui2k3.org>
193
194							=head1 COPYRIGHT AND LICENSE
195
196							Copyright (C) 2008 by Moritz A. Lenz. This module is free software.
197							You may use, redistribute and modify it under the same terms as perl itself.
198
199							Example code included in this package may be used as if it were in the Public
200							Domain.
201
202							=head1 DEVELOPMENT
203
204							You can obtain the latest development version from L<http://github.com/moritz/File-CountLines>:
205
206							git clone git://github.com/moritz/File-CountLines.git
207
208							=cut
209