| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Role::TinyCommons::Collection::PickItems::RandomSeekLines; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
397752
|
use Role::Tiny; |
|
|
1
|
|
|
|
|
7959
|
|
|
|
1
|
|
|
|
|
9
|
|
|
4
|
1
|
|
|
1
|
|
917
|
use Role::Tiny::With; |
|
|
1
|
|
|
|
|
342
|
|
|
|
1
|
|
|
|
|
709
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY |
|
7
|
|
|
|
|
|
|
our $DATE = '2024-01-16'; # DATE |
|
8
|
|
|
|
|
|
|
our $DIST = 'Role-TinyCommons-Collection'; # DIST |
|
9
|
|
|
|
|
|
|
our $VERSION = '0.010'; # VERSION |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
requires 'fh'; |
|
12
|
|
|
|
|
|
|
# optionally depended methods |
|
13
|
|
|
|
|
|
|
# fh_min_offset |
|
14
|
|
|
|
|
|
|
# fh_max_offset |
|
15
|
|
|
|
|
|
|
# cmp_items |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
with 'Role::TinyCommons::Collection::PickItems'; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub pick_items { |
|
20
|
0
|
|
|
0
|
0
|
|
my ($self, %args) = @_; |
|
21
|
0
|
|
0
|
|
|
|
my $n = $args{n} || 1; |
|
22
|
0
|
0
|
|
|
|
|
my $allow_resampling = defined $args{allow_resampling} ? $args{allow_resampling} : 0; |
|
23
|
0
|
|
0
|
|
|
|
my $max_attempts = $args{max_attempts} || 10_000; |
|
24
|
|
|
|
|
|
|
|
|
25
|
0
|
|
|
|
|
|
my $fh = $self->fh; |
|
26
|
0
|
0
|
|
|
|
|
my $fh_min_offset = $self->can('fh_min_offset') ? $self->fh_min_offset : 0; |
|
27
|
0
|
0
|
|
|
|
|
my $fh_max_offset = $self->can('fh_max_offset') ? $self->fh_max_offset : undef; |
|
28
|
0
|
0
|
|
|
|
|
unless (defined $fh_max_offset) { |
|
29
|
0
|
0
|
|
|
|
|
my @st = stat($fh) or die "Can't stat filehandle: $!"; |
|
30
|
0
|
|
|
|
|
|
$fh_max_offset = $st[7]-1; |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
#print "D:fh_min_offset=$fh_min_offset, fh_max_offset=$fh_max_offset\n"; |
|
33
|
|
|
|
|
|
|
|
|
34
|
0
|
0
|
0
|
|
|
|
return () unless $fh_max_offset >= 0 && $fh_max_offset > $fh_min_offset; |
|
35
|
|
|
|
|
|
|
|
|
36
|
0
|
|
|
|
|
|
my @items; |
|
37
|
|
|
|
|
|
|
my %used_pos; |
|
38
|
0
|
|
|
|
|
|
my $attempts = 0; |
|
39
|
|
|
|
|
|
|
PICK: |
|
40
|
0
|
|
|
|
|
|
while (@items < $n) { |
|
41
|
0
|
0
|
|
|
|
|
if ($attempts++ > $max_attempts) { |
|
42
|
0
|
|
|
|
|
|
warn "max_attempts exceeded, only picked ".scalar(@items)." out of $n"; |
|
43
|
0
|
|
|
|
|
|
last PICK; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
|
|
|
|
|
my ($line, $pos); |
|
47
|
|
|
|
|
|
|
GET_RANDOM_LINE: { |
|
48
|
0
|
|
|
|
|
|
my $pos0 = int(rand($fh_max_offset-$fh_min_offset+1)) + $fh_min_offset; |
|
|
0
|
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
seek $fh, $pos0, 0; # XXX this is random *bytes* |
|
50
|
0
|
0
|
|
|
|
|
if ($pos0 > $fh_min_offset) { |
|
51
|
|
|
|
|
|
|
# discard partial line |
|
52
|
0
|
|
|
|
|
|
<$fh>; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
0
|
|
|
|
|
|
$line = <$fh>; |
|
55
|
0
|
|
|
|
|
|
$pos = tell $fh; |
|
56
|
0
|
0
|
|
|
|
|
die "Can't tell filehandle position: $!" if $pos < 0; |
|
57
|
0
|
0
|
0
|
|
|
|
next PICK if !defined($line) || ($pos >= $fh_max_offset && $line !~ /\R\z/); |
|
|
|
|
0
|
|
|
|
|
|
58
|
|
|
|
|
|
|
#print "D:line=<$line>\n"; |
|
59
|
0
|
|
|
|
|
|
chomp($line); |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
0
|
0
|
|
|
|
|
unless ($allow_resampling) { |
|
63
|
0
|
0
|
|
|
|
|
next if $used_pos{$pos}++; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
0
|
|
|
|
|
|
push @items, $line; |
|
66
|
|
|
|
|
|
|
} |
|
67
|
0
|
|
|
|
|
|
@items; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
1; |
|
71
|
|
|
|
|
|
|
# ABSTRACT: Provide pick_items() that picks items by random seeking lines in a (file)handle |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
__END__ |