File Coverage

blib/lib/App/PickRandomLines.pm

Criterion	Covered	Total	%
statement	8	29	27.5
branch	0	10	0.0
condition	0	9	0.0
subroutine	3	4	75.0
pod	1	1	100.0
total	12	53	22.6

line	stmt	bran	cond	sub	pod	time	code
1							package App::PickRandomLines;
2
3	1			1		335161	use 5.010001;
	1					4
4	1			1		7	use strict;
	1					2
	1					30
5	1			1		11	use warnings;
	1					2
	1					638
6
7							our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8							our $DATE = '2023-11-20'; # DATE
9							our $DIST = 'App-PickRandomLines'; # DIST
10							our $VERSION = '0.021'; # VERSION
11
12							our %SPEC;
13
14							$SPEC{pick_random_lines} = {
15							v => 1.1,
16							summary => 'Pick one or more random lines from input',
17							description => <<'_',
18
19							TODO:
20							* option to allow or disallow duplicates
21
22							_
23							args => {
24							files => {
25							schema => ['array', of=>'filename'],
26							'x.name.is_plural' => 1,
27							pos => 0,
28							greedy => 1,
29							description => <<'_',
30
31							If none is specified, will get input from stdin.
32
33							_
34							},
35							algorithm => {
36							schema => ['str*', in=>[qw/scan seek/]],
37							default => 'scan',
38							description => <<'_',
39
40							`scan` is the algorithm described in the `perlfaq` manual (`perldoc -q "random
41							line"). This algorithm scans the whole input once and picks one or more lines
42							randomly from it.
43
44							`seek` is the algorithm employed by the Perl module `File::RandomLine`. It works
45							by seeking a file randomly and finding the next line (repeated `n` number of
46							times). This algorithm is faster when the input is very large as it avoids
47							having to scan the whole input. But it requires that the input is seekable (a
48							single file, stdin is not supported and currently multiple files are not
49							supported as well). Might produce duplicate lines.
50
51							_
52							},
53							num_lines => {
54							schema => ['int*', min=>1],
55							default => 1,
56							cmdline_aliases => {n=>{}},
57							description => <<'_',
58
59							If input contains less lines than the requested number of lines, then will only
60							return as many lines as the input contains.
61
62							_
63							},
64							},
65							links => [
66							{url=>'pm:Data::Unixish::pick'},
67							{
68							url=>'prog:shuf',
69							summary=>'The venerable Unix utility',
70							description => <<'MARKDOWN'
71
72							`shuf -n` is a Unix idiom for when wanting to pick one or several lines from an
73							input. Our `pick` is generally slower than the optimized C-based utility, but
74							offers several pick algorithms like `scan` (which does not need to hold the
75							entire input in memory for shuffling) and `seek` (which does not need to scan
76							the entire input).
77
78							MARKDOWN
79							},
80							],
81							};
82							sub pick_random_lines {
83	0			0	1		my %args = @_;
84
85							# XXX schema
86	0		0				my $n = $args{num_lines} // 1;
87	0	0					$n > 0 or return [400, "Please specify a positive number of lines"];
88	0		0				my $files = $args{files} // [];
89	0		0				my $algo = $args{algorithm} // 'scan';
90	0	0	0				$algo = 'scan' if !@$files \|\| @$files > 1;
91
92	0						my @lines;
93	0	0					if ($algo eq 'scan') {
94	0						require File::Random::Pick;
95	0						my $path;
96	0	0					if (!@$files) {
		0
97	0						$path = \*STDIN;
98							} elsif (@$files > 1) {
99	0						$path = \*ARGV;
100							} else {
101	0						$path = $files->[0];
102							}
103	0						@lines = File::Random::Pick::random_line($path, $n);
104							} else {
105	0						require File::RandomLine;
106	0						my $rl = File::RandomLine->new($files->[0]);
107	0						for (1..$n) { push @lines, $rl->next }
	0
108							}
109	0						chomp @lines;
110	0						[200, "OK", \@lines];
111							}
112
113							1;
114							# ABSTRACT: Pick one or more random lines from input
115
116							__END__