line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
2
|
|
|
2
|
|
142859
|
use 5.006; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
83
|
|
2
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
159
|
|
3
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
107
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package File::RandomLine; |
6
|
|
|
|
|
|
|
# ABSTRACT: Retrieve random lines from a file |
7
|
|
|
|
|
|
|
our $VERSION = '0.20'; # VERSION |
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
10
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
151
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# Required modules |
12
|
2
|
|
|
2
|
|
2213
|
use Want 'howmany'; |
|
2
|
|
|
|
|
5645
|
|
|
2
|
|
|
|
|
20544
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
17
|
6
|
|
|
6
|
1
|
13613
|
my ($class, $filename, $args) = @_; |
18
|
6
|
100
|
|
|
|
52
|
croak "new requires a filename parameter" unless $filename; |
19
|
5
|
|
100
|
|
|
33
|
my $algo = $args->{algorithm} || q{}; |
20
|
5
|
100
|
100
|
|
|
71
|
croak "unknown algorithm '$algo'" if $algo && $algo !~ /fast|uniform/i; |
21
|
4
|
100
|
|
|
|
208
|
open(my $fh, "<", $filename) or croak "Can't read $filename"; |
22
|
3
|
100
|
|
|
|
16
|
my $line_index = lc $algo eq 'uniform' ? _index_file($fh) : undef ; |
23
|
3
|
|
|
|
|
28
|
my $filesize = -s $fh; |
24
|
3
|
100
|
|
|
|
25
|
my $self = { |
25
|
|
|
|
|
|
|
fh => $fh, |
26
|
|
|
|
|
|
|
line_index => $line_index, |
27
|
|
|
|
|
|
|
line_count => $line_index ? scalar @$line_index : undef, |
28
|
|
|
|
|
|
|
filesize => $filesize |
29
|
|
|
|
|
|
|
}; |
30
|
3
|
100
|
|
|
|
32
|
return bless( $self, ref($class) ? ref($class) : $class ); |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
34
|
|
|
|
|
|
|
# _index_file |
35
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _index_file { |
38
|
1
|
|
|
1
|
|
3
|
my ($fh) = @_; |
39
|
1
|
|
|
|
|
2
|
my @index; |
40
|
1
|
|
|
|
|
30
|
while (! eof $fh) { |
41
|
4
|
|
|
|
|
8
|
push @index, tell $fh; |
42
|
4
|
|
|
|
|
19
|
<$fh>; |
43
|
|
|
|
|
|
|
} |
44
|
1
|
|
|
|
|
4
|
return \@index; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
48
|
|
|
|
|
|
|
# next() |
49
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub next { |
53
|
60
|
|
|
60
|
1
|
89697
|
my ($self,$n) = @_; |
54
|
|
|
|
|
|
|
# behavior copied from File::Random |
55
|
60
|
100
|
100
|
|
|
619
|
if (!defined($n) and wantarray) { |
56
|
4
|
|
|
|
|
18
|
$n = howmany(); |
57
|
4
|
|
100
|
|
|
317
|
$n ||= 1; |
58
|
|
|
|
|
|
|
} |
59
|
60
|
100
|
100
|
|
|
282
|
unless (!defined($n) or $n =~ /^\d+$/) { |
60
|
6
|
|
|
|
|
105
|
croak "Number of random_lines should be a positive integer, not '$n'"; |
61
|
|
|
|
|
|
|
} |
62
|
54
|
100
|
100
|
|
|
322
|
carp "Strange call to File::Random->next(): 0 random lines requested" |
63
|
|
|
|
|
|
|
if defined($n) and $n == 0; |
64
|
54
|
|
100
|
|
|
1985
|
$n ||= 1; |
65
|
54
|
|
|
|
|
154
|
my @sample; |
66
|
54
|
|
|
|
|
141
|
while (@sample < $n) { |
67
|
60
|
100
|
|
|
|
255
|
push @sample, $self->{line_index} ? $self->_uniform : $self->_fast; |
68
|
|
|
|
|
|
|
} |
69
|
54
|
|
|
|
|
128
|
chomp @sample; |
70
|
54
|
100
|
|
|
|
544
|
return wantarray ? @sample : shift @sample; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
75
|
|
|
|
|
|
|
# Fast Algorithm |
76
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub _fast { |
79
|
48
|
|
|
48
|
|
61
|
my $self = shift; |
80
|
48
|
|
|
|
|
109
|
my $fh = $self->{fh}; |
81
|
48
|
|
|
|
|
183
|
seek($fh,int(rand($self->{filesize})),0); |
82
|
48
|
|
|
|
|
6848
|
<$fh>; # skip this fragment of a line |
83
|
48
|
100
|
|
|
|
282
|
seek($fh,0,0) if eof $fh; # wrap if hit EOF |
84
|
48
|
|
|
|
|
438
|
return scalar <$fh>; # get the next line |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
88
|
|
|
|
|
|
|
# Uniform Algorithm |
89
|
|
|
|
|
|
|
#--------------------------------------------------------------------------# |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub _uniform { |
92
|
12
|
|
|
12
|
|
79
|
my $self = shift; |
93
|
12
|
|
|
|
|
16
|
my $fh = $self->{fh}; |
94
|
12
|
|
|
|
|
42
|
my $start = $self->{line_index}[int(rand($self->{line_count}))]; |
95
|
12
|
|
|
|
|
331
|
seek($fh,$start,0); |
96
|
12
|
|
|
|
|
125
|
return scalar <$fh>; # get the next line |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
1; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
__END__ |