| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #! perl | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | package File::LoadLines; | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 8 |  |  | 8 |  | 596261 | use warnings; | 
|  | 8 |  |  |  |  | 87 |  | 
|  | 8 |  |  |  |  | 291 |  | 
| 6 | 8 |  |  | 8 |  | 107 | use strict; | 
|  | 8 |  |  |  |  | 15 |  | 
|  | 8 |  |  |  |  | 197 |  | 
| 7 | 8 |  |  | 8 |  | 42 | use base 'Exporter'; | 
|  | 8 |  |  |  |  | 15 |  | 
|  | 8 |  |  |  |  | 1503 |  | 
| 8 |  |  |  |  |  |  | our @EXPORT = qw( loadlines ); | 
| 9 | 8 |  |  | 8 |  | 4236 | use Encode; | 
|  | 8 |  |  |  |  | 77099 |  | 
|  | 8 |  |  |  |  | 629 |  | 
| 10 | 8 |  |  | 8 |  | 66 | use Carp; | 
|  | 8 |  |  |  |  | 19 |  | 
|  | 8 |  |  |  |  | 530 |  | 
| 11 | 8 |  |  | 8 |  | 647 | use utf8; | 
|  | 8 |  |  |  |  | 27 |  | 
|  | 8 |  |  |  |  | 54 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | =head1 NAME | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | File::LoadLines - Load lines from file | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | =cut | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | our $VERSION = '1.02'; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | use File::LoadLines; | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | my @lines = loadlines("mydata.txt"); | 
| 26 |  |  |  |  |  |  | ... | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | File::LoadLines provides an easy way to load the contents of a text | 
| 31 |  |  |  |  |  |  | file into an array of lines. It is intended for relatively small files | 
| 32 |  |  |  |  |  |  | like config files that are often produced by weird tools (and users). | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | It automatically handles ASCII, Latin-1 and UTF-8 text. | 
| 35 |  |  |  |  |  |  | When the file has a BOM, it handles UTF-8, UTF-16 LE and BE, and | 
| 36 |  |  |  |  |  |  | UTF-32 LE and BE. | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | Recognized line terminators are NL (Unix, Linux), CRLF (DOS, Windows) | 
| 39 |  |  |  |  |  |  | and CR (Mac) | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | =head1 EXPORT | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | =head2 loadlines | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | =head2 loadlines | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | my @lines = loadlines("mydata.txt"); | 
| 50 |  |  |  |  |  |  | my @lines = loadlines("mydata.txt", $options); | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | Basically, the file is opened, read, decoded and split into lines | 
| 53 |  |  |  |  |  |  | that are returned in the result array. Line terminators are removed. | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | In scalar context, returns an array reference. | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | The first argument may be the name of a file, an opened file handle, | 
| 58 |  |  |  |  |  |  | or a reference to a string that contains the data. | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | The second argument can be used to influence the behaviour. | 
| 61 |  |  |  |  |  |  | It is a hash reference of option settings. | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | Note that loadlines() is a I, it reads the whole file into | 
| 64 |  |  |  |  |  |  | memory and requires temporarily memory for twice the size of the | 
| 65 |  |  |  |  |  |  | file. | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | =over | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | =item split | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | Enabled by default. | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | If set to zero, the data is not split into lines but returned as a | 
| 74 |  |  |  |  |  |  | single string. | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =item chomp | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | Enabled by default. | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | If set to zero, the line terminators are not removed from the | 
| 81 |  |  |  |  |  |  | resultant lines. | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | =item encoding | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | If specified, loadlines() will use this encoding to decode the file | 
| 86 |  |  |  |  |  |  | data if it cannot automatically detect the encoding. | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | If you pass an options hash, File::LoadLines will set C to | 
| 89 |  |  |  |  |  |  | the encoding it detected and used for this file data. | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | =back | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | =cut | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | sub loadlines { | 
| 96 | 25 |  |  | 25 | 1 | 55947 | my ( $filename, $options ) = @_; | 
| 97 | 25 | 100 |  |  |  | 110 | croak("Missing filename.\n") unless defined $filename; | 
| 98 | 24 | 100 | 100 |  |  | 113 | croak("Invalid options.\n")  if (defined $options && (ref($options) ne "HASH")); | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 23 |  | 50 |  |  | 155 | $options->{split} //= 1; | 
| 101 | 23 |  | 50 |  |  | 102 | $options->{chomp} //= 1; | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 23 |  |  |  |  | 43 | my $data;			# slurped file data | 
| 104 |  |  |  |  |  |  | my $encoded;		# already encoded | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | # Gather data from the input. | 
| 107 | 23 | 50 |  |  |  | 87 | if ( ref($filename) ) { | 
|  |  | 50 |  |  |  |  |  | 
| 108 | 0 | 0 |  |  |  | 0 | if ( ref($filename) eq 'GLOB' ) { | 
| 109 | 0 |  |  |  |  | 0 | binmode( $filename, ':raw' ); | 
| 110 | 0 |  |  |  |  | 0 | $data = do { local $/; <$filename> }; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 111 | 0 |  |  |  |  | 0 | $filename = "__GLOB__"; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  | else { | 
| 114 | 0 |  |  |  |  | 0 | $data = $$filename; | 
| 115 | 0 |  |  |  |  | 0 | $filename = "__STRING__"; | 
| 116 | 0 |  |  |  |  | 0 | $encoded++; | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  | elsif ( $filename eq '-' ) { | 
| 120 | 0 |  |  |  |  | 0 | $filename = "__STDIN__"; | 
| 121 | 0 |  |  |  |  | 0 | $data = do { local $/;  }; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | else { | 
| 124 | 23 |  |  |  |  | 45 | my $name = $filename; | 
| 125 | 23 |  |  |  |  | 85 | $filename = decode_utf8($name); | 
| 126 |  |  |  |  |  |  | # On MS Windows, non-latin (wide) filenames need special treatment. | 
| 127 | 23 | 50 | 66 |  |  | 791 | if ( $filename ne $name && $^O =~ /mswin/i ) { | 
| 128 | 0 |  |  |  |  | 0 | require Win32API::File; | 
| 129 | 0 |  |  |  |  | 0 | my $fn = encode('UTF-16LE', "$filename").chr(0).chr(0); | 
| 130 | 0 |  |  |  |  | 0 | my $fh = Win32API::File::CreateFileW | 
| 131 |  |  |  |  |  |  | ( $fn, Win32API::File::FILE_READ_DATA(), 0, [], | 
| 132 |  |  |  |  |  |  | Win32API::File::OPEN_EXISTING(), 0, []); | 
| 133 | 0 | 0 |  |  |  | 0 | croak("$filename: $^E (Win32)\n") if $^E; | 
| 134 | 0 | 0 |  |  |  | 0 | Win32API::File::OsFHandleOpen( 'FILE', $fh, "r") | 
| 135 |  |  |  |  |  |  | or croak("$filename: $!\n"); | 
| 136 | 0 |  |  |  |  | 0 | $data = do { local $/; readline(\*FILE) }; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 137 |  |  |  |  |  |  | # warn("$filename³: len=", length($data), "\n"); | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  | else { | 
| 140 | 23 | 50 |  |  |  | 1121 | open( my $f, '<', $filename ) | 
| 141 |  |  |  |  |  |  | or croak("$filename: $!\n"); | 
| 142 | 23 |  |  |  |  | 70 | $data = do { local $/; <$f> }; | 
|  | 23 |  |  |  |  | 111 |  | 
|  | 23 |  |  |  |  | 928 |  | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  | } | 
| 145 | 23 | 50 |  |  |  | 129 | $options->{_filesource} = $filename if $options; | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 23 |  |  |  |  | 111 | my $name = encode_utf8($filename); | 
| 148 | 23 | 50 |  |  |  | 477 | if ( $encoded ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | # Nothing to do, already dealt with. | 
| 150 | 0 |  |  |  |  | 0 | $options->{encoding} = 'Perl'; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | # Detect Byte Order Mark. | 
| 154 |  |  |  |  |  |  | elsif ( $data =~ /^\xEF\xBB\xBF/ ) { | 
| 155 | 6 | 50 |  |  |  | 27 | warn("$name is UTF-8 (BOM)\n") if $options->{debug}; | 
| 156 | 6 |  |  |  |  | 14 | $options->{encoding} = 'UTF-8'; | 
| 157 | 6 |  |  |  |  | 25 | $data = decode( "UTF-8", substr($data, 3) ); | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  | elsif ( $data =~ /^\xFE\xFF/ ) { | 
| 160 | 0 | 0 |  |  |  | 0 | warn("$name is UTF-16BE (BOM)\n") if $options->{debug}; | 
| 161 | 0 |  |  |  |  | 0 | $options->{encoding} = 'UTF-16BE'; | 
| 162 | 0 |  |  |  |  | 0 | $data = decode( "UTF-16BE", substr($data, 2) ); | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  | elsif ( $data =~ /^\xFF\xFE\x00\x00/ ) { | 
| 165 | 0 | 0 |  |  |  | 0 | warn("$name is UTF-32LE (BOM)\n") if $options->{debug}; | 
| 166 | 0 |  |  |  |  | 0 | $options->{encoding} = 'UTF-32LE'; | 
| 167 | 0 |  |  |  |  | 0 | $data = decode( "UTF-32LE", substr($data, 4) ); | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  | elsif ( $data =~ /^\xFF\xFE/ ) { | 
| 170 | 6 | 50 |  |  |  | 26 | warn("$name is UTF-16LE (BOM)\n") if $options->{debug}; | 
| 171 | 6 |  |  |  |  | 17 | $options->{encoding} = 'UTF-16LE'; | 
| 172 | 6 |  |  |  |  | 24 | $data = decode( "UTF-16LE", substr($data, 2) ); | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | elsif ( $data =~ /^\x00\x00\xFE\xFF/ ) { | 
| 175 | 0 | 0 |  |  |  | 0 | warn("$name is UTF-32BE (BOM)\n") if $options->{debug}; | 
| 176 | 0 |  |  |  |  | 0 | $options->{encoding} = 'UTF-32BE'; | 
| 177 | 0 |  |  |  |  | 0 | $data = decode( "UTF-32BE", substr($data, 4) ); | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | # No BOM, did user specify an encoding? | 
| 181 |  |  |  |  |  |  | elsif ( $options->{encoding} ) { | 
| 182 |  |  |  |  |  |  | warn("$name is ", $options->{encoding}, " (fallback)\n") | 
| 183 | 1 | 50 |  |  |  | 5 | if $options->{debug}; | 
| 184 | 1 |  |  |  |  | 7 | $data = decode( $options->{encoding}, $data, 1 ); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | # Try UTF8, fallback to ISO-8895.1. | 
| 188 |  |  |  |  |  |  | else { | 
| 189 | 10 |  |  |  |  | 24 | my $d = eval { decode( "UTF-8", $data, 1 ) }; | 
|  | 10 |  |  |  |  | 37 |  | 
| 190 | 10 | 100 |  |  |  | 1322 | if ( $@ ) { | 
|  |  | 100 |  |  |  |  |  | 
| 191 | 1 | 50 |  |  |  | 4 | warn("$name is ISO-8859.1 (assumed)\n") if $options->{debug}; | 
| 192 | 1 |  |  |  |  | 3 | $options->{encoding} = 'ISO-8859-1'; | 
| 193 | 1 |  |  |  |  | 4 | $data = decode( "iso-8859-1", $data ); | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  | elsif ( $d !~ /[^[:ascii:]]/ ) { | 
| 196 | 2 | 50 |  |  |  | 10 | warn("$name is ASCII (detected)\n") if $options->{debug}; | 
| 197 | 2 |  |  |  |  | 6 | $options->{encoding} = 'ASCII'; | 
| 198 | 2 |  |  |  |  | 4 | $data = $d; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  | else { | 
| 201 | 7 | 50 |  |  |  | 26 | warn("$name is UTF-8 (detected)\n") if $options->{debug}; | 
| 202 | 7 |  |  |  |  | 18 | $options->{encoding} = 'UTF-8'; | 
| 203 | 7 |  |  |  |  | 15 | $data = $d; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  |  | 
| 207 | 23 | 50 |  |  |  | 11383 | return $data unless $options->{split}; | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | # Split in lines; | 
| 210 | 23 |  |  |  |  | 44 | my @lines; | 
| 211 | 23 | 50 |  |  |  | 59 | if ( $options->{chomp} ) { | 
| 212 |  |  |  |  |  |  | # Unless empty, make sure there is a final newline. | 
| 213 | 23 | 100 |  |  |  | 185 | $data .= "\n" if $data =~ /.(?!\r\n|\n|\r)\z/; | 
| 214 |  |  |  |  |  |  | # We need to maintain trailing newlines. | 
| 215 | 23 |  |  |  |  | 478 | push( @lines, $1 ) while $data =~ /(.*?)(?:\r\n|\n|\r)/g; | 
| 216 |  |  |  |  |  |  | } | 
| 217 |  |  |  |  |  |  | else { | 
| 218 |  |  |  |  |  |  | # We need to maintain trailing newlines. | 
| 219 | 0 |  |  |  |  | 0 | push( @lines, $1 ) while $data =~ /(.*?(?:\r\n|\n|\r))/g; | 
| 220 |  |  |  |  |  |  | } | 
| 221 | 23 |  |  |  |  | 63 | undef $data; | 
| 222 | 23 | 100 |  |  |  | 140 | return wantarray ? @lines : \@lines; | 
| 223 |  |  |  |  |  |  | } | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | There are currently no other modules that handle BOM detection and | 
| 228 |  |  |  |  |  |  | line splitting. | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | I have a faint hope that future versions of Perl and Raku will deal | 
| 231 |  |  |  |  |  |  | with this transparently, but I fear the worst. | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | =head1 HINTS | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | When you have raw file data (e.g. from a zip), you can use loadlines() | 
| 236 |  |  |  |  |  |  | to decode and unpack: | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | open( my $data, '<', \$contents ); | 
| 239 |  |  |  |  |  |  | $lines = loadlines( $data, $options ); | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | =head1 AUTHOR | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | Johan Vromans, C<<  >> | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | =head1 SUPPORT AND DOCUMENTATION | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | Development of this module takes place on GitHub: | 
| 248 |  |  |  |  |  |  | https://github.com/sciurius/perl-File-LoadLines. | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | You can find documentation for this module with the perldoc command. | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | perldoc File::LoadLines | 
| 253 |  |  |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | Please report any bugs or feature requests using the issue tracker on | 
| 255 |  |  |  |  |  |  | GitHub. | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | =head1 COPYRIGHT & LICENSE | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | Copyright 2018,2020 Johan Vromans, all rights reserved. | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | This program is free software; you can redistribute it and/or modify it | 
| 262 |  |  |  |  |  |  | under the same terms as Perl itself. | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  | =cut | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | 1; # End of File::LoadLines |