File Coverage

blib/lib/File/LoadLines.pm
Criterion Covered Total %
statement 62 94 65.9
branch 35 64 54.6
condition 7 10 70.0
subroutine 7 7 100.0
pod 1 1 100.0
total 112 176 63.6


line stmt bran cond sub pod time code
1             #! perl
2              
3             package File::LoadLines;
4              
5 8     8   596261 use warnings;
  8         87  
  8         291  
6 8     8   107 use strict;
  8         15  
  8         197  
7 8     8   42 use base 'Exporter';
  8         15  
  8         1503  
8             our @EXPORT = qw( loadlines );
9 8     8   4236 use Encode;
  8         77099  
  8         629  
10 8     8   66 use Carp;
  8         19  
  8         530  
11 8     8   647 use utf8;
  8         27  
  8         54  
12              
13             =head1 NAME
14              
15             File::LoadLines - Load lines from file
16              
17             =cut
18              
19             our $VERSION = '1.02';
20              
21             =head1 SYNOPSIS
22              
23             use File::LoadLines;
24              
25             my @lines = loadlines("mydata.txt");
26             ...
27              
28             =head1 DESCRIPTION
29              
30             File::LoadLines provides an easy way to load the contents of a text
31             file into an array of lines. It is intended for relatively small files
32             like config files that are often produced by weird tools (and users).
33              
34             It automatically handles ASCII, Latin-1 and UTF-8 text.
35             When the file has a BOM, it handles UTF-8, UTF-16 LE and BE, and
36             UTF-32 LE and BE.
37              
38             Recognized line terminators are NL (Unix, Linux), CRLF (DOS, Windows)
39             and CR (Mac)
40              
41             =head1 EXPORT
42              
43             =head2 loadlines
44              
45             =head1 FUNCTIONS
46              
47             =head2 loadlines
48              
49             my @lines = loadlines("mydata.txt");
50             my @lines = loadlines("mydata.txt", $options);
51              
52             Basically, the file is opened, read, decoded and split into lines
53             that are returned in the result array. Line terminators are removed.
54              
55             In scalar context, returns an array reference.
56              
57             The first argument may be the name of a file, an opened file handle,
58             or a reference to a string that contains the data.
59              
60             The second argument can be used to influence the behaviour.
61             It is a hash reference of option settings.
62              
63             Note that loadlines() is a I, it reads the whole file into
64             memory and requires temporarily memory for twice the size of the
65             file.
66              
67             =over
68              
69             =item split
70              
71             Enabled by default.
72              
73             If set to zero, the data is not split into lines but returned as a
74             single string.
75              
76             =item chomp
77              
78             Enabled by default.
79              
80             If set to zero, the line terminators are not removed from the
81             resultant lines.
82              
83             =item encoding
84              
85             If specified, loadlines() will use this encoding to decode the file
86             data if it cannot automatically detect the encoding.
87              
88             If you pass an options hash, File::LoadLines will set C to
89             the encoding it detected and used for this file data.
90              
91             =back
92              
93             =cut
94              
95             sub loadlines {
96 25     25 1 55947 my ( $filename, $options ) = @_;
97 25 100       110 croak("Missing filename.\n") unless defined $filename;
98 24 100 100     113 croak("Invalid options.\n") if (defined $options && (ref($options) ne "HASH"));
99              
100 23   50     155 $options->{split} //= 1;
101 23   50     102 $options->{chomp} //= 1;
102              
103 23         43 my $data; # slurped file data
104             my $encoded; # already encoded
105              
106             # Gather data from the input.
107 23 50       87 if ( ref($filename) ) {
    50          
108 0 0       0 if ( ref($filename) eq 'GLOB' ) {
109 0         0 binmode( $filename, ':raw' );
110 0         0 $data = do { local $/; <$filename> };
  0         0  
  0         0  
111 0         0 $filename = "__GLOB__";
112             }
113             else {
114 0         0 $data = $$filename;
115 0         0 $filename = "__STRING__";
116 0         0 $encoded++;
117             }
118             }
119             elsif ( $filename eq '-' ) {
120 0         0 $filename = "__STDIN__";
121 0         0 $data = do { local $/; };
  0         0  
  0         0  
122             }
123             else {
124 23         45 my $name = $filename;
125 23         85 $filename = decode_utf8($name);
126             # On MS Windows, non-latin (wide) filenames need special treatment.
127 23 50 66     791 if ( $filename ne $name && $^O =~ /mswin/i ) {
128 0         0 require Win32API::File;
129 0         0 my $fn = encode('UTF-16LE', "$filename").chr(0).chr(0);
130 0         0 my $fh = Win32API::File::CreateFileW
131             ( $fn, Win32API::File::FILE_READ_DATA(), 0, [],
132             Win32API::File::OPEN_EXISTING(), 0, []);
133 0 0       0 croak("$filename: $^E (Win32)\n") if $^E;
134 0 0       0 Win32API::File::OsFHandleOpen( 'FILE', $fh, "r")
135             or croak("$filename: $!\n");
136 0         0 $data = do { local $/; readline(\*FILE) };
  0         0  
  0         0  
137             # warn("$filename³: len=", length($data), "\n");
138             }
139             else {
140 23 50       1121 open( my $f, '<', $filename )
141             or croak("$filename: $!\n");
142 23         70 $data = do { local $/; <$f> };
  23         111  
  23         928  
143             }
144             }
145 23 50       129 $options->{_filesource} = $filename if $options;
146              
147 23         111 my $name = encode_utf8($filename);
148 23 50       477 if ( $encoded ) {
    100          
    50          
    50          
    100          
    50          
    100          
149             # Nothing to do, already dealt with.
150 0         0 $options->{encoding} = 'Perl';
151             }
152              
153             # Detect Byte Order Mark.
154             elsif ( $data =~ /^\xEF\xBB\xBF/ ) {
155 6 50       27 warn("$name is UTF-8 (BOM)\n") if $options->{debug};
156 6         14 $options->{encoding} = 'UTF-8';
157 6         25 $data = decode( "UTF-8", substr($data, 3) );
158             }
159             elsif ( $data =~ /^\xFE\xFF/ ) {
160 0 0       0 warn("$name is UTF-16BE (BOM)\n") if $options->{debug};
161 0         0 $options->{encoding} = 'UTF-16BE';
162 0         0 $data = decode( "UTF-16BE", substr($data, 2) );
163             }
164             elsif ( $data =~ /^\xFF\xFE\x00\x00/ ) {
165 0 0       0 warn("$name is UTF-32LE (BOM)\n") if $options->{debug};
166 0         0 $options->{encoding} = 'UTF-32LE';
167 0         0 $data = decode( "UTF-32LE", substr($data, 4) );
168             }
169             elsif ( $data =~ /^\xFF\xFE/ ) {
170 6 50       26 warn("$name is UTF-16LE (BOM)\n") if $options->{debug};
171 6         17 $options->{encoding} = 'UTF-16LE';
172 6         24 $data = decode( "UTF-16LE", substr($data, 2) );
173             }
174             elsif ( $data =~ /^\x00\x00\xFE\xFF/ ) {
175 0 0       0 warn("$name is UTF-32BE (BOM)\n") if $options->{debug};
176 0         0 $options->{encoding} = 'UTF-32BE';
177 0         0 $data = decode( "UTF-32BE", substr($data, 4) );
178             }
179              
180             # No BOM, did user specify an encoding?
181             elsif ( $options->{encoding} ) {
182             warn("$name is ", $options->{encoding}, " (fallback)\n")
183 1 50       5 if $options->{debug};
184 1         7 $data = decode( $options->{encoding}, $data, 1 );
185             }
186              
187             # Try UTF8, fallback to ISO-8895.1.
188             else {
189 10         24 my $d = eval { decode( "UTF-8", $data, 1 ) };
  10         37  
190 10 100       1322 if ( $@ ) {
    100          
191 1 50       4 warn("$name is ISO-8859.1 (assumed)\n") if $options->{debug};
192 1         3 $options->{encoding} = 'ISO-8859-1';
193 1         4 $data = decode( "iso-8859-1", $data );
194             }
195             elsif ( $d !~ /[^[:ascii:]]/ ) {
196 2 50       10 warn("$name is ASCII (detected)\n") if $options->{debug};
197 2         6 $options->{encoding} = 'ASCII';
198 2         4 $data = $d;
199             }
200             else {
201 7 50       26 warn("$name is UTF-8 (detected)\n") if $options->{debug};
202 7         18 $options->{encoding} = 'UTF-8';
203 7         15 $data = $d;
204             }
205             }
206              
207 23 50       11383 return $data unless $options->{split};
208              
209             # Split in lines;
210 23         44 my @lines;
211 23 50       59 if ( $options->{chomp} ) {
212             # Unless empty, make sure there is a final newline.
213 23 100       185 $data .= "\n" if $data =~ /.(?!\r\n|\n|\r)\z/;
214             # We need to maintain trailing newlines.
215 23         478 push( @lines, $1 ) while $data =~ /(.*?)(?:\r\n|\n|\r)/g;
216             }
217             else {
218             # We need to maintain trailing newlines.
219 0         0 push( @lines, $1 ) while $data =~ /(.*?(?:\r\n|\n|\r))/g;
220             }
221 23         63 undef $data;
222 23 100       140 return wantarray ? @lines : \@lines;
223             }
224              
225             =head1 SEE ALSO
226              
227             There are currently no other modules that handle BOM detection and
228             line splitting.
229              
230             I have a faint hope that future versions of Perl and Raku will deal
231             with this transparently, but I fear the worst.
232              
233             =head1 HINTS
234              
235             When you have raw file data (e.g. from a zip), you can use loadlines()
236             to decode and unpack:
237              
238             open( my $data, '<', \$contents );
239             $lines = loadlines( $data, $options );
240              
241             =head1 AUTHOR
242              
243             Johan Vromans, C<< >>
244              
245             =head1 SUPPORT AND DOCUMENTATION
246              
247             Development of this module takes place on GitHub:
248             https://github.com/sciurius/perl-File-LoadLines.
249              
250             You can find documentation for this module with the perldoc command.
251              
252             perldoc File::LoadLines
253              
254             Please report any bugs or feature requests using the issue tracker on
255             GitHub.
256              
257             =head1 COPYRIGHT & LICENSE
258              
259             Copyright 2018,2020 Johan Vromans, all rights reserved.
260              
261             This program is free software; you can redistribute it and/or modify it
262             under the same terms as Perl itself.
263              
264             =cut
265              
266             1; # End of File::LoadLines