| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::PickRandomLines; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
335161
|
use 5.010001; |
|
|
1
|
|
|
|
|
4
|
|
|
4
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
30
|
|
|
5
|
1
|
|
|
1
|
|
11
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
638
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY |
|
8
|
|
|
|
|
|
|
our $DATE = '2023-11-20'; # DATE |
|
9
|
|
|
|
|
|
|
our $DIST = 'App-PickRandomLines'; # DIST |
|
10
|
|
|
|
|
|
|
our $VERSION = '0.021'; # VERSION |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our %SPEC; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$SPEC{pick_random_lines} = { |
|
15
|
|
|
|
|
|
|
v => 1.1, |
|
16
|
|
|
|
|
|
|
summary => 'Pick one or more random lines from input', |
|
17
|
|
|
|
|
|
|
description => <<'_', |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
TODO: |
|
20
|
|
|
|
|
|
|
* option to allow or disallow duplicates |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
_ |
|
23
|
|
|
|
|
|
|
args => { |
|
24
|
|
|
|
|
|
|
files => { |
|
25
|
|
|
|
|
|
|
schema => ['array*', of=>'filename*'], |
|
26
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
|
27
|
|
|
|
|
|
|
pos => 0, |
|
28
|
|
|
|
|
|
|
greedy => 1, |
|
29
|
|
|
|
|
|
|
description => <<'_', |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
If none is specified, will get input from stdin. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
_ |
|
34
|
|
|
|
|
|
|
}, |
|
35
|
|
|
|
|
|
|
algorithm => { |
|
36
|
|
|
|
|
|
|
schema => ['str*', in=>[qw/scan seek/]], |
|
37
|
|
|
|
|
|
|
default => 'scan', |
|
38
|
|
|
|
|
|
|
description => <<'_', |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
`scan` is the algorithm described in the `perlfaq` manual (`perldoc -q "random |
|
41
|
|
|
|
|
|
|
line"). This algorithm scans the whole input once and picks one or more lines |
|
42
|
|
|
|
|
|
|
randomly from it. |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
`seek` is the algorithm employed by the Perl module `File::RandomLine`. It works |
|
45
|
|
|
|
|
|
|
by seeking a file randomly and finding the next line (repeated `n` number of |
|
46
|
|
|
|
|
|
|
times). This algorithm is faster when the input is very large as it avoids |
|
47
|
|
|
|
|
|
|
having to scan the whole input. But it requires that the input is seekable (a |
|
48
|
|
|
|
|
|
|
single file, stdin is not supported and currently multiple files are not |
|
49
|
|
|
|
|
|
|
supported as well). *Might produce duplicate lines*. |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
_ |
|
52
|
|
|
|
|
|
|
}, |
|
53
|
|
|
|
|
|
|
num_lines => { |
|
54
|
|
|
|
|
|
|
schema => ['int*', min=>1], |
|
55
|
|
|
|
|
|
|
default => 1, |
|
56
|
|
|
|
|
|
|
cmdline_aliases => {n=>{}}, |
|
57
|
|
|
|
|
|
|
description => <<'_', |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
If input contains less lines than the requested number of lines, then will only |
|
60
|
|
|
|
|
|
|
return as many lines as the input contains. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
_ |
|
63
|
|
|
|
|
|
|
}, |
|
64
|
|
|
|
|
|
|
}, |
|
65
|
|
|
|
|
|
|
links => [ |
|
66
|
|
|
|
|
|
|
{url=>'pm:Data::Unixish::pick'}, |
|
67
|
|
|
|
|
|
|
{ |
|
68
|
|
|
|
|
|
|
url=>'prog:shuf', |
|
69
|
|
|
|
|
|
|
summary=>'The venerable Unix utility', |
|
70
|
|
|
|
|
|
|
description => <<'MARKDOWN' |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
`shuf -n` is a Unix idiom for when wanting to pick one or several lines from an |
|
73
|
|
|
|
|
|
|
input. Our `pick` is generally slower than the optimized C-based utility, but |
|
74
|
|
|
|
|
|
|
offers several pick algorithms like `scan` (which does not need to hold the |
|
75
|
|
|
|
|
|
|
entire input in memory for shuffling) and `seek` (which does not need to scan |
|
76
|
|
|
|
|
|
|
the entire input). |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
MARKDOWN |
|
79
|
|
|
|
|
|
|
}, |
|
80
|
|
|
|
|
|
|
], |
|
81
|
|
|
|
|
|
|
}; |
|
82
|
|
|
|
|
|
|
sub pick_random_lines { |
|
83
|
0
|
|
|
0
|
1
|
|
my %args = @_; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# XXX schema |
|
86
|
0
|
|
0
|
|
|
|
my $n = $args{num_lines} // 1; |
|
87
|
0
|
0
|
|
|
|
|
$n > 0 or return [400, "Please specify a positive number of lines"]; |
|
88
|
0
|
|
0
|
|
|
|
my $files = $args{files} // []; |
|
89
|
0
|
|
0
|
|
|
|
my $algo = $args{algorithm} // 'scan'; |
|
90
|
0
|
0
|
0
|
|
|
|
$algo = 'scan' if !@$files || @$files > 1; |
|
91
|
|
|
|
|
|
|
|
|
92
|
0
|
|
|
|
|
|
my @lines; |
|
93
|
0
|
0
|
|
|
|
|
if ($algo eq 'scan') { |
|
94
|
0
|
|
|
|
|
|
require File::Random::Pick; |
|
95
|
0
|
|
|
|
|
|
my $path; |
|
96
|
0
|
0
|
|
|
|
|
if (!@$files) { |
|
|
|
0
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
$path = \*STDIN; |
|
98
|
|
|
|
|
|
|
} elsif (@$files > 1) { |
|
99
|
0
|
|
|
|
|
|
$path = \*ARGV; |
|
100
|
|
|
|
|
|
|
} else { |
|
101
|
0
|
|
|
|
|
|
$path = $files->[0]; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
0
|
|
|
|
|
|
@lines = File::Random::Pick::random_line($path, $n); |
|
104
|
|
|
|
|
|
|
} else { |
|
105
|
0
|
|
|
|
|
|
require File::RandomLine; |
|
106
|
0
|
|
|
|
|
|
my $rl = File::RandomLine->new($files->[0]); |
|
107
|
0
|
|
|
|
|
|
for (1..$n) { push @lines, $rl->next } |
|
|
0
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
} |
|
109
|
0
|
|
|
|
|
|
chomp @lines; |
|
110
|
0
|
|
|
|
|
|
[200, "OK", \@lines]; |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
1; |
|
114
|
|
|
|
|
|
|
# ABSTRACT: Pick one or more random lines from input |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
__END__ |