File Coverage

blib/lib/App/PickRandomLines.pm
Criterion Covered Total %
statement 8 29 27.5
branch 0 10 0.0
condition 0 9 0.0
subroutine 3 4 75.0
pod 1 1 100.0
total 12 53 22.6


line stmt bran cond sub pod time code
1             package App::PickRandomLines;
2              
3 1     1   335161 use 5.010001;
  1         4  
4 1     1   7 use strict;
  1         2  
  1         30  
5 1     1   11 use warnings;
  1         2  
  1         638  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-11-20'; # DATE
9             our $DIST = 'App-PickRandomLines'; # DIST
10             our $VERSION = '0.021'; # VERSION
11              
12             our %SPEC;
13              
14             $SPEC{pick_random_lines} = {
15             v => 1.1,
16             summary => 'Pick one or more random lines from input',
17             description => <<'_',
18              
19             TODO:
20             * option to allow or disallow duplicates
21              
22             _
23             args => {
24             files => {
25             schema => ['array*', of=>'filename*'],
26             'x.name.is_plural' => 1,
27             pos => 0,
28             greedy => 1,
29             description => <<'_',
30              
31             If none is specified, will get input from stdin.
32              
33             _
34             },
35             algorithm => {
36             schema => ['str*', in=>[qw/scan seek/]],
37             default => 'scan',
38             description => <<'_',
39              
40             `scan` is the algorithm described in the `perlfaq` manual (`perldoc -q "random
41             line"). This algorithm scans the whole input once and picks one or more lines
42             randomly from it.
43              
44             `seek` is the algorithm employed by the Perl module `File::RandomLine`. It works
45             by seeking a file randomly and finding the next line (repeated `n` number of
46             times). This algorithm is faster when the input is very large as it avoids
47             having to scan the whole input. But it requires that the input is seekable (a
48             single file, stdin is not supported and currently multiple files are not
49             supported as well). *Might produce duplicate lines*.
50              
51             _
52             },
53             num_lines => {
54             schema => ['int*', min=>1],
55             default => 1,
56             cmdline_aliases => {n=>{}},
57             description => <<'_',
58              
59             If input contains less lines than the requested number of lines, then will only
60             return as many lines as the input contains.
61              
62             _
63             },
64             },
65             links => [
66             {url=>'pm:Data::Unixish::pick'},
67             {
68             url=>'prog:shuf',
69             summary=>'The venerable Unix utility',
70             description => <<'MARKDOWN'
71              
72             `shuf -n` is a Unix idiom for when wanting to pick one or several lines from an
73             input. Our `pick` is generally slower than the optimized C-based utility, but
74             offers several pick algorithms like `scan` (which does not need to hold the
75             entire input in memory for shuffling) and `seek` (which does not need to scan
76             the entire input).
77              
78             MARKDOWN
79             },
80             ],
81             };
82             sub pick_random_lines {
83 0     0 1   my %args = @_;
84              
85             # XXX schema
86 0   0       my $n = $args{num_lines} // 1;
87 0 0         $n > 0 or return [400, "Please specify a positive number of lines"];
88 0   0       my $files = $args{files} // [];
89 0   0       my $algo = $args{algorithm} // 'scan';
90 0 0 0       $algo = 'scan' if !@$files || @$files > 1;
91              
92 0           my @lines;
93 0 0         if ($algo eq 'scan') {
94 0           require File::Random::Pick;
95 0           my $path;
96 0 0         if (!@$files) {
    0          
97 0           $path = \*STDIN;
98             } elsif (@$files > 1) {
99 0           $path = \*ARGV;
100             } else {
101 0           $path = $files->[0];
102             }
103 0           @lines = File::Random::Pick::random_line($path, $n);
104             } else {
105 0           require File::RandomLine;
106 0           my $rl = File::RandomLine->new($files->[0]);
107 0           for (1..$n) { push @lines, $rl->next }
  0            
108             }
109 0           chomp @lines;
110 0           [200, "OK", \@lines];
111             }
112              
113             1;
114             # ABSTRACT: Pick one or more random lines from input
115              
116             __END__