| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package LTSV::LINQ; |
|
2
|
|
|
|
|
|
|
###################################################################### |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# LTSV::LINQ - LINQ-style query interface for LTSV files |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# https://metacpan.org/dist/LTSV-LINQ |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright (c) 2026 INABA Hitoshi |
|
9
|
|
|
|
|
|
|
###################################################################### |
|
10
|
|
|
|
|
|
|
|
|
11
|
12
|
|
|
12
|
|
93050
|
use 5.00503; # Universal Consensus 1998 for primetools |
|
|
12
|
|
|
|
|
73
|
|
|
12
|
|
|
|
|
|
|
# Perl 5.005_03 compatibility for historical toolchains |
|
13
|
|
|
|
|
|
|
# use 5.008001; # Lancaster Consensus 2013 for toolchains |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$VERSION = '1.05'; |
|
16
|
|
|
|
|
|
|
$VERSION = $VERSION; |
|
17
|
|
|
|
|
|
|
# VERSION policy: avoid `our` for 5.005_03 compatibility. |
|
18
|
|
|
|
|
|
|
# Self-assignment prevents "used only once" warning under `use strict`. |
|
19
|
|
|
|
|
|
|
|
|
20
|
12
|
50
|
|
12
|
|
425
|
BEGIN { pop @INC if $INC[-1] eq '.' } # CVE-2016-1238: Important unsafe module load path flaw |
|
21
|
12
|
|
|
12
|
|
72
|
use strict; |
|
|
12
|
|
|
|
|
86
|
|
|
|
12
|
|
|
|
|
888
|
|
|
22
|
12
|
50
|
|
12
|
|
364
|
BEGIN { if ($] < 5.006) { $INC{'warnings.pm'} = 'stub'; eval 'package warnings; sub import {}' } } use warnings; local $^W=1; |
|
|
0
|
|
|
12
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
12
|
|
|
|
|
68
|
|
|
|
12
|
|
|
|
|
20
|
|
|
|
12
|
|
|
|
|
116553
|
|
|
23
|
|
|
|
|
|
|
# warnings.pm compatibility: stub with import() for Perl < 5.6 |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
26
|
|
|
|
|
|
|
# Constructor and Iterator Infrastructure |
|
27
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub new { |
|
30
|
487
|
|
|
487
|
1
|
715
|
my($class, $iterator) = @_; |
|
31
|
487
|
|
|
|
|
6609
|
return bless { iterator => $iterator }, $class; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub iterator { |
|
35
|
625
|
|
|
625
|
1
|
732
|
my $self = $_[0]; |
|
36
|
|
|
|
|
|
|
# If this object was created by _from_snapshot, _factory provides |
|
37
|
|
|
|
|
|
|
# a fresh iterator closure each time iterator() is called. |
|
38
|
625
|
100
|
|
|
|
1094
|
if (exists $self->{_factory}) { |
|
39
|
141
|
|
|
|
|
197
|
return $self->{_factory}->(); |
|
40
|
|
|
|
|
|
|
} |
|
41
|
484
|
|
|
|
|
660
|
return $self->{iterator}; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
45
|
|
|
|
|
|
|
# Data Source Methods |
|
46
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# From - create query from array |
|
49
|
|
|
|
|
|
|
sub From { |
|
50
|
316
|
|
|
316
|
1
|
1606012
|
my($class, $source) = @_; |
|
51
|
|
|
|
|
|
|
|
|
52
|
316
|
50
|
|
|
|
680
|
if (ref($source) eq 'ARRAY') { |
|
53
|
316
|
|
|
|
|
350
|
my $i = 0; |
|
54
|
|
|
|
|
|
|
return $class->new(sub { |
|
55
|
1191
|
100
|
|
1191
|
|
2073
|
return undef if $i >= scalar(@$source); |
|
56
|
903
|
|
|
|
|
1382
|
return $source->[$i++]; |
|
57
|
316
|
|
|
|
|
1312
|
}); |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
0
|
die "From() requires ARRAY reference"; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# FromLTSV - read from LTSV file |
|
64
|
|
|
|
|
|
|
sub FromLTSV { |
|
65
|
6
|
|
|
6
|
1
|
122933
|
my($class, $file) = @_; |
|
66
|
|
|
|
|
|
|
|
|
67
|
6
|
|
|
|
|
8
|
my $fh; |
|
68
|
6
|
50
|
|
|
|
14
|
if ($] >= 5.006) { |
|
69
|
|
|
|
|
|
|
# Avoid "Too many arguments for open at" error when running with Perl 5.005_03 |
|
70
|
6
|
50
|
|
|
|
393
|
eval q{ open($fh, '<', $file) } or die "Cannot open '$file': $!"; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
else { |
|
73
|
0
|
|
|
|
|
0
|
$fh = \do { local *_ }; |
|
|
0
|
|
|
|
|
0
|
|
|
74
|
0
|
0
|
|
|
|
0
|
open($fh, "< $file") or die "Cannot open '$file': $!"; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
6
|
|
|
|
|
26
|
binmode $fh; # Treat as raw bytes; handles all multibyte encodings |
|
77
|
|
|
|
|
|
|
# and prevents \r\n -> \n translation on Windows |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
return $class->new(sub { |
|
80
|
20
|
|
|
20
|
|
161
|
while (my $line = <$fh>) { |
|
81
|
15
|
|
|
|
|
17
|
chomp $line; |
|
82
|
15
|
|
|
|
|
33
|
$line =~ s/\r\z//; # Remove CR for CRLF files on any platform |
|
83
|
15
|
100
|
|
|
|
20
|
next unless length $line; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
my %record = map { |
|
86
|
14
|
50
|
|
|
|
52
|
/\A(.+?):(.*)\z/ ? ($1, $2) : () |
|
|
48
|
|
|
|
|
156
|
|
|
87
|
|
|
|
|
|
|
} split /\t/, $line; |
|
88
|
|
|
|
|
|
|
|
|
89
|
14
|
50
|
|
|
|
59
|
return \%record if %record; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
6
|
|
|
|
|
67
|
close $fh; |
|
92
|
6
|
|
|
|
|
16
|
return undef; |
|
93
|
6
|
|
|
|
|
38
|
}); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# Range - generate sequence of integers |
|
97
|
|
|
|
|
|
|
sub Range { |
|
98
|
4
|
|
|
4
|
1
|
181691
|
my($class, $start, $count) = @_; |
|
99
|
|
|
|
|
|
|
|
|
100
|
4
|
|
|
|
|
8
|
my $current = $start; |
|
101
|
4
|
|
|
|
|
6
|
my $remaining = $count; |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
return $class->new(sub { |
|
104
|
37
|
100
|
|
37
|
|
76
|
return undef if $remaining <= 0; |
|
105
|
33
|
|
|
|
|
28
|
$remaining--; |
|
106
|
33
|
|
|
|
|
33
|
return $current++; |
|
107
|
4
|
|
|
|
|
26
|
}); |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Empty - return empty sequence |
|
111
|
|
|
|
|
|
|
sub Empty { |
|
112
|
3
|
|
|
3
|
1
|
175658
|
my($class) = @_; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
return $class->new(sub { |
|
115
|
4
|
|
|
4
|
|
13
|
return undef; |
|
116
|
3
|
|
|
|
|
21
|
}); |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# Repeat - repeat element specified number of times |
|
120
|
|
|
|
|
|
|
sub Repeat { |
|
121
|
4
|
|
|
4
|
1
|
152
|
my($class, $element, $count) = @_; |
|
122
|
|
|
|
|
|
|
|
|
123
|
4
|
|
|
|
|
7
|
my $remaining = $count; |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
return $class->new(sub { |
|
126
|
22
|
100
|
|
22
|
|
44
|
return undef if $remaining <= 0; |
|
127
|
18
|
|
|
|
|
22
|
$remaining--; |
|
128
|
18
|
|
|
|
|
30
|
return $element; |
|
129
|
4
|
|
|
|
|
22
|
}); |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
133
|
|
|
|
|
|
|
# Filtering Methods |
|
134
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Where - filter elements |
|
137
|
|
|
|
|
|
|
sub Where { |
|
138
|
26
|
|
|
26
|
1
|
82
|
my($self, @args) = @_; |
|
139
|
26
|
|
|
|
|
55
|
my $iter = $self->iterator; |
|
140
|
26
|
|
|
|
|
45
|
my $class = ref($self); |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Support both code reference and DSL form |
|
143
|
26
|
|
|
|
|
34
|
my $cond; |
|
144
|
26
|
100
|
66
|
|
|
128
|
if (@args == 1 && ref($args[0]) eq 'CODE') { |
|
145
|
17
|
|
|
|
|
27
|
$cond = $args[0]; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
else { |
|
148
|
|
|
|
|
|
|
# DSL form: Where(key => value, ...) |
|
149
|
9
|
|
|
|
|
21
|
my %match = @args; |
|
150
|
|
|
|
|
|
|
$cond = sub { |
|
151
|
34
|
|
|
34
|
|
40
|
my $row = shift; |
|
152
|
34
|
|
|
|
|
57
|
for my $k (keys %match) { |
|
153
|
36
|
50
|
|
|
|
64
|
return 0 unless defined $row->{$k}; |
|
154
|
36
|
100
|
|
|
|
99
|
return 0 unless $row->{$k} eq $match{$k}; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
16
|
|
|
|
|
49
|
return 1; |
|
157
|
9
|
|
|
|
|
28
|
}; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
return $class->new(sub { |
|
161
|
88
|
|
|
88
|
|
119
|
while (1) { |
|
162
|
141
|
|
|
|
|
233
|
my $item = $iter->(); |
|
163
|
141
|
100
|
|
|
|
242
|
return undef unless defined $item; |
|
164
|
116
|
100
|
|
|
|
162
|
return $item if $cond->($item); |
|
165
|
|
|
|
|
|
|
} |
|
166
|
26
|
|
|
|
|
162
|
}); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
170
|
|
|
|
|
|
|
# Projection Methods |
|
171
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Select - transform elements |
|
174
|
|
|
|
|
|
|
sub Select { |
|
175
|
29
|
|
|
29
|
1
|
83
|
my($self, $selector) = @_; |
|
176
|
29
|
|
|
|
|
82
|
my $iter = $self->iterator; |
|
177
|
29
|
|
|
|
|
52
|
my $class = ref($self); |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
return $class->new(sub { |
|
180
|
151
|
|
|
151
|
|
167
|
my $item = $iter->(); |
|
181
|
151
|
100
|
|
|
|
279
|
return undef unless defined $item; |
|
182
|
123
|
|
|
|
|
171
|
return $selector->($item); |
|
183
|
29
|
|
|
|
|
134
|
}); |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# SelectMany - flatten sequences |
|
187
|
|
|
|
|
|
|
sub SelectMany { |
|
188
|
5
|
|
|
5
|
1
|
4
|
my($self, $selector) = @_; |
|
189
|
5
|
|
|
|
|
5
|
my $iter = $self->iterator; |
|
190
|
5
|
|
|
|
|
4
|
my $class = ref($self); |
|
191
|
|
|
|
|
|
|
|
|
192
|
5
|
|
|
|
|
4
|
my @buffer; |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
return $class->new(sub { |
|
195
|
12
|
|
|
12
|
|
8
|
while (1) { |
|
196
|
21
|
100
|
|
|
|
23
|
if (@buffer) { |
|
197
|
7
|
|
|
|
|
10
|
return shift @buffer; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
14
|
|
|
|
|
15
|
my $item = $iter->(); |
|
201
|
14
|
100
|
|
|
|
19
|
return undef unless defined $item; |
|
202
|
|
|
|
|
|
|
|
|
203
|
11
|
|
|
|
|
14
|
my $result = $selector->($item); |
|
204
|
11
|
100
|
|
|
|
24
|
unless (ref($result) eq 'ARRAY') { |
|
205
|
2
|
|
|
|
|
15
|
die "SelectMany: selector must return an ARRAY reference"; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
9
|
|
|
|
|
12
|
@buffer = @$result; |
|
208
|
|
|
|
|
|
|
} |
|
209
|
5
|
|
|
|
|
11
|
}); |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# Concat - concatenate two sequences |
|
213
|
|
|
|
|
|
|
sub Concat { |
|
214
|
10
|
|
|
10
|
1
|
33
|
my($self, $second) = @_; |
|
215
|
10
|
|
|
|
|
17
|
my $class = ref($self); |
|
216
|
|
|
|
|
|
|
|
|
217
|
10
|
|
|
|
|
19
|
my $first_iter = $self->iterator; |
|
218
|
10
|
|
|
|
|
14
|
my $second_iter; |
|
219
|
10
|
|
|
|
|
16
|
my $first_done = 0; |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
return $class->new(sub { |
|
222
|
44
|
100
|
|
44
|
|
82
|
if (!$first_done) { |
|
223
|
28
|
|
|
|
|
42
|
my $item = $first_iter->(); |
|
224
|
28
|
100
|
|
|
|
68
|
if (defined $item) { |
|
225
|
18
|
|
|
|
|
39
|
return $item; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
10
|
|
|
|
|
11
|
$first_done = 1; |
|
228
|
10
|
|
|
|
|
31
|
$second_iter = $second->iterator; |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
|
|
231
|
26
|
50
|
|
|
|
56
|
return $second_iter ? $second_iter->() : undef; |
|
232
|
10
|
|
|
|
|
43
|
}); |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
# Zip - combine two sequences element-wise |
|
236
|
|
|
|
|
|
|
sub Zip { |
|
237
|
5
|
|
|
5
|
1
|
12
|
my($self, $second, $result_selector) = @_; |
|
238
|
|
|
|
|
|
|
|
|
239
|
5
|
|
|
|
|
16
|
my $iter1 = $self->iterator; |
|
240
|
5
|
|
|
|
|
10
|
my $iter2 = $second->iterator; |
|
241
|
5
|
|
|
|
|
9
|
my $class = ref($self); |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
return $class->new(sub { |
|
244
|
17
|
|
|
17
|
|
26
|
my $item1 = $iter1->(); |
|
245
|
17
|
|
|
|
|
30
|
my $item2 = $iter2->(); |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# Return undef if either sequence ends |
|
248
|
17
|
100
|
100
|
|
|
63
|
return undef unless defined($item1) && defined($item2); |
|
249
|
|
|
|
|
|
|
|
|
250
|
12
|
|
|
|
|
29
|
return $result_selector->($item1, $item2); |
|
251
|
5
|
|
|
|
|
22
|
}); |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
255
|
|
|
|
|
|
|
# Partitioning Methods |
|
256
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# Take - take first N elements |
|
259
|
|
|
|
|
|
|
sub Take { |
|
260
|
7
|
|
|
7
|
1
|
14
|
my($self, $count) = @_; |
|
261
|
7
|
|
|
|
|
14
|
my $iter = $self->iterator; |
|
262
|
7
|
|
|
|
|
12
|
my $class = ref($self); |
|
263
|
7
|
|
|
|
|
14
|
my $taken = 0; |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
return $class->new(sub { |
|
266
|
26
|
100
|
|
26
|
|
46
|
return undef if $taken >= $count; |
|
267
|
19
|
|
|
|
|
26
|
my $item = $iter->(); |
|
268
|
19
|
50
|
|
|
|
39
|
return undef unless defined $item; |
|
269
|
19
|
|
|
|
|
18
|
$taken++; |
|
270
|
19
|
|
|
|
|
29
|
return $item; |
|
271
|
7
|
|
|
|
|
41
|
}); |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# Skip - skip first N elements |
|
275
|
|
|
|
|
|
|
sub Skip { |
|
276
|
3
|
|
|
3
|
1
|
13
|
my($self, $count) = @_; |
|
277
|
3
|
|
|
|
|
7
|
my $iter = $self->iterator; |
|
278
|
3
|
|
|
|
|
6
|
my $class = ref($self); |
|
279
|
3
|
|
|
|
|
4
|
my $skipped = 0; |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
return $class->new(sub { |
|
282
|
9
|
|
|
9
|
|
19
|
while ($skipped < $count) { |
|
283
|
6
|
|
|
|
|
7
|
my $item = $iter->(); |
|
284
|
6
|
50
|
|
|
|
9
|
return undef unless defined $item; |
|
285
|
6
|
|
|
|
|
25
|
$skipped++; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
9
|
|
|
|
|
13
|
return $iter->(); |
|
288
|
3
|
|
|
|
|
12
|
}); |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# TakeWhile - take while condition is true |
|
292
|
|
|
|
|
|
|
sub TakeWhile { |
|
293
|
2
|
|
|
2
|
1
|
4
|
my($self, $predicate) = @_; |
|
294
|
2
|
|
|
|
|
7
|
my $iter = $self->iterator; |
|
295
|
2
|
|
|
|
|
5
|
my $class = ref($self); |
|
296
|
2
|
|
|
|
|
4
|
my $done = 0; |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
return $class->new(sub { |
|
299
|
7
|
50
|
|
7
|
|
16
|
return undef if $done; |
|
300
|
7
|
|
|
|
|
33
|
my $item = $iter->(); |
|
301
|
7
|
50
|
|
|
|
21
|
return undef unless defined $item; |
|
302
|
|
|
|
|
|
|
|
|
303
|
7
|
100
|
|
|
|
16
|
if ($predicate->($item)) { |
|
304
|
5
|
|
|
|
|
24
|
return $item; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
else { |
|
307
|
2
|
|
|
|
|
7
|
$done = 1; |
|
308
|
2
|
|
|
|
|
6
|
return undef; |
|
309
|
|
|
|
|
|
|
} |
|
310
|
2
|
|
|
|
|
10
|
}); |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
# SkipWhile - skip elements while predicate is true |
|
314
|
|
|
|
|
|
|
sub SkipWhile { |
|
315
|
4
|
|
|
4
|
1
|
9
|
my($self, $predicate) = @_; |
|
316
|
4
|
|
|
|
|
7
|
my $iter = $self->iterator; |
|
317
|
4
|
|
|
|
|
7
|
my $class = ref($self); |
|
318
|
4
|
|
|
|
|
4
|
my $skipping = 1; |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
return $class->new(sub { |
|
321
|
12
|
|
|
12
|
|
15
|
while (1) { |
|
322
|
19
|
|
|
|
|
47
|
my $item = $iter->(); |
|
323
|
19
|
100
|
|
|
|
37
|
return undef unless defined $item; |
|
324
|
|
|
|
|
|
|
|
|
325
|
16
|
100
|
|
|
|
25
|
if ($skipping) { |
|
326
|
10
|
100
|
|
|
|
18
|
if (!$predicate->($item)) { |
|
327
|
3
|
|
|
|
|
11
|
$skipping = 0; |
|
328
|
3
|
|
|
|
|
7
|
return $item; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
else { |
|
332
|
6
|
|
|
|
|
13
|
return $item; |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
} |
|
335
|
4
|
|
|
|
|
27
|
}); |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
339
|
|
|
|
|
|
|
# Ordering Methods |
|
340
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
# OrderBy - sort ascending (smart: numeric when both keys look numeric) |
|
343
|
|
|
|
|
|
|
sub OrderBy { |
|
344
|
21
|
|
|
21
|
1
|
30
|
my($self, $key_selector) = @_; |
|
345
|
21
|
|
|
|
|
50
|
my @items = $self->ToArray(); |
|
346
|
21
|
|
|
|
|
122
|
return LTSV::LINQ::Ordered->_new_ordered( |
|
347
|
|
|
|
|
|
|
\@items, |
|
348
|
|
|
|
|
|
|
[{ sel => $key_selector, dir => 1, type => 'smart' }] |
|
349
|
|
|
|
|
|
|
); |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# OrderByDescending - sort descending (smart comparison) |
|
353
|
|
|
|
|
|
|
sub OrderByDescending { |
|
354
|
4
|
|
|
4
|
1
|
9
|
my($self, $key_selector) = @_; |
|
355
|
4
|
|
|
|
|
17
|
my @items = $self->ToArray(); |
|
356
|
4
|
|
|
|
|
29
|
return LTSV::LINQ::Ordered->_new_ordered( |
|
357
|
|
|
|
|
|
|
\@items, |
|
358
|
|
|
|
|
|
|
[{ sel => $key_selector, dir => -1, type => 'smart' }] |
|
359
|
|
|
|
|
|
|
); |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# OrderByStr - sort ascending by string comparison |
|
363
|
|
|
|
|
|
|
sub OrderByStr { |
|
364
|
21
|
|
|
21
|
1
|
31
|
my($self, $key_selector) = @_; |
|
365
|
21
|
|
|
|
|
1162
|
my @items = $self->ToArray(); |
|
366
|
21
|
|
|
|
|
77
|
return LTSV::LINQ::Ordered->_new_ordered( |
|
367
|
|
|
|
|
|
|
\@items, |
|
368
|
|
|
|
|
|
|
[{ sel => $key_selector, dir => 1, type => 'str' }] |
|
369
|
|
|
|
|
|
|
); |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# OrderByStrDescending - sort descending by string comparison |
|
373
|
|
|
|
|
|
|
sub OrderByStrDescending { |
|
374
|
5
|
|
|
5
|
1
|
6
|
my($self, $key_selector) = @_; |
|
375
|
5
|
|
|
|
|
9
|
my @items = $self->ToArray(); |
|
376
|
5
|
|
|
|
|
29
|
return LTSV::LINQ::Ordered->_new_ordered( |
|
377
|
|
|
|
|
|
|
\@items, |
|
378
|
|
|
|
|
|
|
[{ sel => $key_selector, dir => -1, type => 'str' }] |
|
379
|
|
|
|
|
|
|
); |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# OrderByNum - sort ascending by numeric comparison |
|
383
|
|
|
|
|
|
|
sub OrderByNum { |
|
384
|
14
|
|
|
14
|
1
|
15
|
my($self, $key_selector) = @_; |
|
385
|
14
|
|
|
|
|
19
|
my @items = $self->ToArray(); |
|
386
|
14
|
|
|
|
|
35
|
return LTSV::LINQ::Ordered->_new_ordered( |
|
387
|
|
|
|
|
|
|
\@items, |
|
388
|
|
|
|
|
|
|
[{ sel => $key_selector, dir => 1, type => 'num' }] |
|
389
|
|
|
|
|
|
|
); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# OrderByNumDescending - sort descending by numeric comparison |
|
393
|
|
|
|
|
|
|
sub OrderByNumDescending { |
|
394
|
5
|
|
|
5
|
1
|
6
|
my($self, $key_selector) = @_; |
|
395
|
5
|
|
|
|
|
10
|
my @items = $self->ToArray(); |
|
396
|
5
|
|
|
|
|
12
|
return LTSV::LINQ::Ordered->_new_ordered( |
|
397
|
|
|
|
|
|
|
\@items, |
|
398
|
|
|
|
|
|
|
[{ sel => $key_selector, dir => -1, type => 'num' }] |
|
399
|
|
|
|
|
|
|
); |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# Reverse - reverse order |
|
403
|
|
|
|
|
|
|
sub Reverse { |
|
404
|
1
|
|
|
1
|
1
|
3
|
my($self) = @_; |
|
405
|
1
|
|
|
|
|
4
|
my @items = reverse $self->ToArray(); |
|
406
|
1
|
|
|
|
|
3
|
my $class = ref($self); |
|
407
|
1
|
|
|
|
|
4
|
return $class->From(\@items); |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
411
|
|
|
|
|
|
|
# Grouping Methods |
|
412
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# GroupBy - group elements by key |
|
415
|
|
|
|
|
|
|
sub GroupBy { |
|
416
|
5
|
|
|
5
|
1
|
14
|
my($self, $key_selector, $element_selector) = @_; |
|
417
|
5
|
|
33
|
21
|
|
31
|
$element_selector ||= sub { $_[0] }; |
|
|
21
|
|
|
|
|
58
|
|
|
418
|
|
|
|
|
|
|
|
|
419
|
5
|
|
|
|
|
10
|
my %groups; |
|
420
|
|
|
|
|
|
|
my @key_order; |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
423
|
21
|
|
|
21
|
|
27
|
my $item = shift; |
|
424
|
21
|
|
|
|
|
53
|
my $key = $key_selector->($item); |
|
425
|
21
|
50
|
|
|
|
85
|
$key = '' unless defined $key; |
|
426
|
21
|
100
|
|
|
|
43
|
unless (exists $groups{$key}) { |
|
427
|
13
|
|
|
|
|
19
|
push @key_order, $key; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
21
|
|
|
|
|
53
|
push @{$groups{$key}}, $element_selector->($item); |
|
|
21
|
|
|
|
|
91
|
|
|
430
|
5
|
|
|
|
|
30
|
}); |
|
431
|
|
|
|
|
|
|
|
|
432
|
5
|
|
|
|
|
27
|
my @result; |
|
433
|
5
|
|
|
|
|
11
|
for my $key (@key_order) { |
|
434
|
|
|
|
|
|
|
push @result, { |
|
435
|
|
|
|
|
|
|
Key => $key, |
|
436
|
13
|
|
|
|
|
43
|
Elements => $groups{$key}, |
|
437
|
|
|
|
|
|
|
}; |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
5
|
|
|
|
|
11
|
my $class = ref($self); |
|
441
|
5
|
|
|
|
|
14
|
return $class->From(\@result); |
|
442
|
|
|
|
|
|
|
} |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
445
|
|
|
|
|
|
|
# Set Operations |
|
446
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
# Distinct - remove duplicates |
|
449
|
|
|
|
|
|
|
sub Distinct { |
|
450
|
12
|
|
|
12
|
1
|
26
|
my($self, $key_selector) = @_; |
|
451
|
12
|
|
|
|
|
22
|
my $iter = $self->iterator; |
|
452
|
12
|
|
|
|
|
20
|
my $class = ref($self); |
|
453
|
12
|
|
|
|
|
18
|
my %seen; |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
return $class->new(sub { |
|
456
|
47
|
|
|
47
|
|
56
|
while (1) { |
|
457
|
62
|
|
|
|
|
98
|
my $item = $iter->(); |
|
458
|
62
|
100
|
|
|
|
129
|
return undef unless defined $item; |
|
459
|
|
|
|
|
|
|
|
|
460
|
50
|
100
|
|
|
|
128
|
my $key = $key_selector ? $key_selector->($item) : _make_key($item); |
|
461
|
50
|
50
|
|
|
|
107
|
$key = '' unless defined $key; |
|
462
|
|
|
|
|
|
|
|
|
463
|
50
|
100
|
|
|
|
141
|
unless ($seen{$key}++) { |
|
464
|
35
|
|
|
|
|
84
|
return $item; |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
} |
|
467
|
12
|
|
|
|
|
75
|
}); |
|
468
|
|
|
|
|
|
|
} |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
# Internal helper for set operations - make key from item |
|
471
|
|
|
|
|
|
|
sub _make_key { |
|
472
|
99
|
|
|
99
|
|
149
|
my($item) = @_; |
|
473
|
|
|
|
|
|
|
|
|
474
|
99
|
50
|
|
|
|
169
|
return '' unless defined $item; |
|
475
|
|
|
|
|
|
|
|
|
476
|
99
|
100
|
|
|
|
242
|
if (ref($item) eq 'HASH') { |
|
|
|
100
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Hash to stable key |
|
478
|
9
|
|
|
|
|
15
|
my @pairs = (); |
|
479
|
9
|
|
|
|
|
30
|
for my $k (sort keys %$item) { |
|
480
|
14
|
50
|
|
|
|
35
|
my $v = defined($item->{$k}) ? $item->{$k} : ''; |
|
481
|
14
|
|
|
|
|
38
|
push @pairs, "$k\x1F$v"; # \x1F = Unit Separator |
|
482
|
|
|
|
|
|
|
} |
|
483
|
9
|
|
|
|
|
34
|
return join("\x1E", @pairs); # \x1E = Record Separator |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
elsif (ref($item) eq 'ARRAY') { |
|
486
|
|
|
|
|
|
|
# Array to key |
|
487
|
3
|
50
|
|
|
|
7
|
return join("\x1E", map { defined($_) ? $_ : '' } @$item); |
|
|
6
|
|
|
|
|
19
|
|
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
else { |
|
490
|
|
|
|
|
|
|
# Scalar |
|
491
|
87
|
|
|
|
|
139
|
return $item; |
|
492
|
|
|
|
|
|
|
} |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# _from_snapshot - internal helper for GroupJoin. |
|
496
|
|
|
|
|
|
|
# Returns a LTSV::LINQ object backed by a plain array that can be iterated |
|
497
|
|
|
|
|
|
|
# multiple times within a single result_selector call. |
|
498
|
|
|
|
|
|
|
# Each LINQ terminal method (Count, Sum, ToArray, etc.) calls iterator() |
|
499
|
|
|
|
|
|
|
# to get a fresh iterator. We achieve re-iterability by overriding the |
|
500
|
|
|
|
|
|
|
# iterator() method so it always creates a new closure over the same array. |
|
501
|
|
|
|
|
|
|
sub _from_snapshot { |
|
502
|
68
|
|
|
68
|
|
61
|
my($class_or_self, $aref) = @_; |
|
503
|
|
|
|
|
|
|
|
|
504
|
68
|
|
33
|
|
|
102
|
my $class = ref($class_or_self) || $class_or_self; |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# Build a sentinel sub that, when called, returns a brand-new |
|
507
|
|
|
|
|
|
|
# index-based iterator every time. |
|
508
|
|
|
|
|
|
|
my $iter_factory = sub { |
|
509
|
139
|
|
|
139
|
|
83
|
my $i = 0; |
|
510
|
|
|
|
|
|
|
return sub { |
|
511
|
133
|
100
|
|
|
|
179
|
return undef if $i >= scalar(@$aref); |
|
512
|
71
|
|
|
|
|
84
|
return $aref->[$i++]; |
|
513
|
139
|
|
|
|
|
252
|
}; |
|
514
|
68
|
|
|
|
|
102
|
}; |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
# The object stores the factory in place of a plain iterator. |
|
517
|
|
|
|
|
|
|
# The iterator() accessor returns the result of calling the factory, |
|
518
|
|
|
|
|
|
|
# so every consumer gets its own fresh iterator starting at index 0. |
|
519
|
68
|
|
|
|
|
61
|
my $obj = bless { |
|
520
|
|
|
|
|
|
|
iterator => $iter_factory->(), |
|
521
|
|
|
|
|
|
|
_factory => $iter_factory, |
|
522
|
|
|
|
|
|
|
}, $class; |
|
523
|
|
|
|
|
|
|
|
|
524
|
68
|
|
|
|
|
72
|
return $obj; |
|
525
|
|
|
|
|
|
|
} |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# Union - set union with distinct |
|
528
|
|
|
|
|
|
|
sub Union { |
|
529
|
5
|
|
|
5
|
1
|
11
|
my($self, $second, $key_selector) = @_; |
|
530
|
|
|
|
|
|
|
|
|
531
|
5
|
|
|
|
|
17
|
return $self->Concat($second)->Distinct($key_selector); |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# Intersect - set intersection |
|
535
|
|
|
|
|
|
|
sub Intersect { |
|
536
|
5
|
|
|
5
|
1
|
14
|
my($self, $second, $key_selector) = @_; |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
# Build hash of second sequence |
|
539
|
5
|
|
|
|
|
10
|
my %second_set = (); |
|
540
|
|
|
|
|
|
|
$second->ForEach(sub { |
|
541
|
13
|
|
|
13
|
|
21
|
my $item = shift; |
|
542
|
13
|
50
|
|
|
|
27
|
my $key = $key_selector ? $key_selector->($item) : _make_key($item); |
|
543
|
13
|
|
|
|
|
39
|
$second_set{$key} = $item; |
|
544
|
5
|
|
|
|
|
24
|
}); |
|
545
|
|
|
|
|
|
|
|
|
546
|
5
|
|
|
|
|
24
|
my $class = ref($self); |
|
547
|
5
|
|
|
|
|
12
|
my $iter = $self->iterator; |
|
548
|
5
|
|
|
|
|
11
|
my %seen = (); |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
return $class->new(sub { |
|
551
|
11
|
|
|
11
|
|
21
|
while (defined(my $item = $iter->())) { |
|
552
|
14
|
50
|
|
|
|
31
|
my $key = $key_selector ? $key_selector->($item) : _make_key($item); |
|
553
|
|
|
|
|
|
|
|
|
554
|
14
|
100
|
|
|
|
47
|
next if $seen{$key}++; # Skip duplicates |
|
555
|
12
|
100
|
|
|
|
38
|
return $item if exists $second_set{$key}; |
|
556
|
|
|
|
|
|
|
} |
|
557
|
5
|
|
|
|
|
10
|
return undef; |
|
558
|
5
|
|
|
|
|
26
|
}); |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Except - set difference |
|
562
|
|
|
|
|
|
|
sub Except { |
|
563
|
5
|
|
|
5
|
1
|
11
|
my($self, $second, $key_selector) = @_; |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# Build hash of second sequence |
|
566
|
5
|
|
|
|
|
9
|
my %second_set = (); |
|
567
|
|
|
|
|
|
|
$second->ForEach(sub { |
|
568
|
11
|
|
|
11
|
|
17
|
my $item = shift; |
|
569
|
11
|
50
|
|
|
|
27
|
my $key = $key_selector ? $key_selector->($item) : _make_key($item); |
|
570
|
11
|
|
|
|
|
30
|
$second_set{$key} = 1; |
|
571
|
5
|
|
|
|
|
41
|
}); |
|
572
|
|
|
|
|
|
|
|
|
573
|
5
|
|
|
|
|
22
|
my $class = ref($self); |
|
574
|
5
|
|
|
|
|
11
|
my $iter = $self->iterator; |
|
575
|
5
|
|
|
|
|
9
|
my %seen = (); |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
return $class->new(sub { |
|
578
|
13
|
|
|
13
|
|
22
|
while (defined(my $item = $iter->())) { |
|
579
|
17
|
50
|
|
|
|
34
|
my $key = $key_selector ? $key_selector->($item) : _make_key($item); |
|
580
|
|
|
|
|
|
|
|
|
581
|
17
|
100
|
|
|
|
51
|
next if $seen{$key}++; # Skip duplicates |
|
582
|
15
|
100
|
|
|
|
43
|
return $item unless exists $second_set{$key}; |
|
583
|
|
|
|
|
|
|
} |
|
584
|
5
|
|
|
|
|
10
|
return undef; |
|
585
|
5
|
|
|
|
|
26
|
}); |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# Join - correlates elements of two sequences |
|
589
|
|
|
|
|
|
|
sub Join { |
|
590
|
6
|
|
|
6
|
1
|
16
|
my($self, $inner, $outer_key_selector, $inner_key_selector, $result_selector) = @_; |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
# Build hash table from inner sequence |
|
593
|
6
|
|
|
|
|
11
|
my %inner_hash = (); |
|
594
|
|
|
|
|
|
|
$inner->ForEach(sub { |
|
595
|
12
|
|
|
12
|
|
17
|
my $item = shift; |
|
596
|
12
|
|
|
|
|
49
|
my $key = $inner_key_selector->($item); |
|
597
|
12
|
50
|
|
|
|
44
|
$key = _make_key($key) if ref($key); |
|
598
|
12
|
|
|
|
|
14
|
push @{$inner_hash{$key}}, $item; |
|
|
12
|
|
|
|
|
42
|
|
|
599
|
6
|
|
|
|
|
32
|
}); |
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
# Process outer sequence with lazy evaluation |
|
602
|
6
|
|
|
|
|
29
|
my $class = ref($self); |
|
603
|
6
|
|
|
|
|
14
|
my $iter = $self->iterator; |
|
604
|
6
|
|
|
|
|
9
|
my @buffer = (); |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
return $class->new(sub { |
|
607
|
15
|
|
|
15
|
|
18
|
while (1) { |
|
608
|
|
|
|
|
|
|
# Return from buffer if available |
|
609
|
24
|
100
|
|
|
|
75
|
return shift @buffer if @buffer; |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
# Get next outer element |
|
612
|
15
|
|
|
|
|
21
|
my $outer_item = $iter->(); |
|
613
|
15
|
100
|
|
|
|
33
|
return undef unless defined $outer_item; |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# Find matching inner elements |
|
616
|
9
|
|
|
|
|
32
|
my $key = $outer_key_selector->($outer_item); |
|
617
|
9
|
50
|
|
|
|
30
|
$key = _make_key($key) if ref($key); |
|
618
|
|
|
|
|
|
|
|
|
619
|
9
|
100
|
|
|
|
24
|
if (exists $inner_hash{$key}) { |
|
620
|
7
|
|
|
|
|
7
|
for my $inner_item (@{$inner_hash{$key}}) { |
|
|
7
|
|
|
|
|
16
|
|
|
621
|
9
|
|
|
|
|
23
|
push @buffer, $result_selector->($outer_item, $inner_item); |
|
622
|
|
|
|
|
|
|
} |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
# If no match, continue to next outer element |
|
625
|
|
|
|
|
|
|
} |
|
626
|
6
|
|
|
|
|
32
|
}); |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# GroupJoin - group join (LEFT OUTER JOIN-like operation) |
|
630
|
|
|
|
|
|
|
sub GroupJoin { |
|
631
|
29
|
|
|
29
|
1
|
43
|
my($self, $inner, $outer_key_selector, $inner_key_selector, $result_selector) = @_; |
|
632
|
29
|
|
|
|
|
30
|
my $class = ref($self); |
|
633
|
29
|
|
|
|
|
31
|
my $outer_iter = $self->iterator; |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# 1. Build lookup table from inner sequence. |
|
636
|
|
|
|
|
|
|
# Group all inner items by their keys for efficient lookup. |
|
637
|
|
|
|
|
|
|
# The inner sequence is fully materialized into memory here. |
|
638
|
29
|
|
|
|
|
25
|
my %inner_lookup = (); |
|
639
|
|
|
|
|
|
|
$inner->ForEach(sub { |
|
640
|
78
|
|
|
78
|
|
48
|
my $item = shift; |
|
641
|
78
|
|
|
|
|
78
|
my $key = $inner_key_selector->($item); |
|
642
|
78
|
50
|
|
|
|
149
|
$key = _make_key($key) if ref($key); |
|
643
|
78
|
100
|
|
|
|
75
|
$key = '' unless defined $key; |
|
644
|
78
|
|
|
|
|
45
|
push @{$inner_lookup{$key}}, $item; |
|
|
78
|
|
|
|
|
113
|
|
|
645
|
29
|
|
|
|
|
80
|
}); |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# 2. Return lazy iterator over outer sequence |
|
648
|
|
|
|
|
|
|
return $class->new(sub { |
|
649
|
96
|
|
|
96
|
|
86
|
my $outer_item = $outer_iter->(); |
|
650
|
96
|
100
|
|
|
|
114
|
return undef unless defined $outer_item; |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
# Get key from outer item |
|
653
|
68
|
|
|
|
|
69
|
my $key = $outer_key_selector->($outer_item); |
|
654
|
68
|
50
|
|
|
|
136
|
$key = _make_key($key) if ref($key); |
|
655
|
68
|
100
|
|
|
|
75
|
$key = '' unless defined $key; |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
# Get matching inner items (empty array ref if no matches) |
|
658
|
68
|
100
|
|
|
|
81
|
my $matched_inners = exists $inner_lookup{$key} ? $inner_lookup{$key} : []; |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# Snapshot the matched items into a plain array. |
|
661
|
|
|
|
|
|
|
# We create a LTSV::LINQ object whose iterator sub always reads |
|
662
|
|
|
|
|
|
|
# from a fresh index variable, so the group can be traversed |
|
663
|
|
|
|
|
|
|
# multiple times inside result_selector (e.g. Count() then Sum()). |
|
664
|
68
|
|
|
|
|
65
|
my @snapshot = @$matched_inners; |
|
665
|
68
|
|
|
|
|
74
|
my $inner_group = $class->_from_snapshot(\@snapshot); |
|
666
|
|
|
|
|
|
|
|
|
667
|
68
|
|
|
|
|
70
|
return $result_selector->($outer_item, $inner_group); |
|
668
|
29
|
|
|
|
|
96
|
}); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
672
|
|
|
|
|
|
|
# Quantifier Methods |
|
673
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
# All - test if all elements satisfy condition |
|
676
|
|
|
|
|
|
|
sub All { |
|
677
|
4
|
|
|
4
|
1
|
7
|
my($self, $predicate) = @_; |
|
678
|
4
|
|
|
|
|
6
|
my $iter = $self->iterator; |
|
679
|
|
|
|
|
|
|
|
|
680
|
4
|
|
|
|
|
5
|
while (defined(my $item = $iter->())) { |
|
681
|
8
|
100
|
|
|
|
8
|
return 0 unless $predicate->($item); |
|
682
|
|
|
|
|
|
|
} |
|
683
|
3
|
|
|
|
|
5
|
return 1; |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
# Any - test if any element satisfies condition |
|
687
|
|
|
|
|
|
|
sub Any { |
|
688
|
19
|
|
|
19
|
1
|
42
|
my($self, $predicate) = @_; |
|
689
|
19
|
|
|
|
|
26
|
my $iter = $self->iterator; |
|
690
|
|
|
|
|
|
|
|
|
691
|
19
|
100
|
|
|
|
28
|
if ($predicate) { |
|
692
|
6
|
|
|
|
|
13
|
while (defined(my $item = $iter->())) { |
|
693
|
9
|
100
|
|
|
|
36
|
return 1 if $predicate->($item); |
|
694
|
|
|
|
|
|
|
} |
|
695
|
2
|
|
|
|
|
13
|
return 0; |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
else { |
|
698
|
13
|
|
|
|
|
13
|
my $item = $iter->(); |
|
699
|
13
|
100
|
|
|
|
36
|
return defined($item) ? 1 : 0; |
|
700
|
|
|
|
|
|
|
} |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
# Contains - check if sequence contains element |
|
704
|
|
|
|
|
|
|
sub Contains { |
|
705
|
5
|
|
|
5
|
1
|
12
|
my($self, $value, $comparer) = @_; |
|
706
|
|
|
|
|
|
|
|
|
707
|
5
|
100
|
|
|
|
13
|
if ($comparer) { |
|
708
|
1
|
|
|
1
|
|
5
|
return $self->Any(sub { $comparer->($_[0], $value) }); |
|
|
1
|
|
|
|
|
4
|
|
|
709
|
|
|
|
|
|
|
} |
|
710
|
|
|
|
|
|
|
else { |
|
711
|
|
|
|
|
|
|
return $self->Any(sub { |
|
712
|
6
|
|
|
6
|
|
10
|
my $item = $_[0]; |
|
713
|
6
|
|
33
|
|
|
61
|
return (!defined($item) && !defined($value)) || |
|
714
|
|
|
|
|
|
|
(defined($item) && defined($value) && $item eq $value); |
|
715
|
4
|
|
|
|
|
16
|
}); |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
} |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
# SequenceEqual - compare two sequences for equality |
|
720
|
|
|
|
|
|
|
sub SequenceEqual { |
|
721
|
5
|
|
|
5
|
1
|
10
|
my($self, $second, $comparer) = @_; |
|
722
|
|
|
|
|
|
|
$comparer ||= sub { |
|
723
|
8
|
|
|
8
|
|
14
|
my($a, $b) = @_; |
|
724
|
8
|
|
33
|
|
|
67
|
return (!defined($a) && !defined($b)) || |
|
725
|
|
|
|
|
|
|
(defined($a) && defined($b) && $a eq $b); |
|
726
|
5
|
|
66
|
|
|
66
|
}; |
|
727
|
|
|
|
|
|
|
|
|
728
|
5
|
|
|
|
|
15
|
my $iter1 = $self->iterator; |
|
729
|
5
|
|
|
|
|
10
|
my $iter2 = $second->iterator; |
|
730
|
|
|
|
|
|
|
|
|
731
|
5
|
|
|
|
|
7
|
while (1) { |
|
732
|
14
|
|
|
|
|
32
|
my $item1 = $iter1->(); |
|
733
|
14
|
|
|
|
|
23
|
my $item2 = $iter2->(); |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
# Both ended - equal |
|
736
|
14
|
100
|
100
|
|
|
48
|
return 1 if !defined($item1) && !defined($item2); |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# One ended - not equal |
|
739
|
11
|
100
|
66
|
|
|
45
|
return 0 if !defined($item1) || !defined($item2); |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
# Compare items |
|
742
|
10
|
100
|
|
|
|
18
|
return 0 unless $comparer->($item1, $item2); |
|
743
|
|
|
|
|
|
|
} |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
747
|
|
|
|
|
|
|
# Element Access Methods |
|
748
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# First - get first element |
|
751
|
|
|
|
|
|
|
sub First { |
|
752
|
8
|
|
|
8
|
1
|
31
|
my($self, $predicate) = @_; |
|
753
|
8
|
|
|
|
|
15
|
my $iter = $self->iterator; |
|
754
|
|
|
|
|
|
|
|
|
755
|
8
|
100
|
|
|
|
21
|
if ($predicate) { |
|
756
|
3
|
|
|
|
|
4
|
while (defined(my $item = $iter->())) { |
|
757
|
9
|
100
|
|
|
|
12
|
return $item if $predicate->($item); |
|
758
|
|
|
|
|
|
|
} |
|
759
|
1
|
|
|
|
|
11
|
die "No element satisfies the condition"; |
|
760
|
|
|
|
|
|
|
} |
|
761
|
|
|
|
|
|
|
else { |
|
762
|
5
|
|
|
|
|
12
|
my $item = $iter->(); |
|
763
|
5
|
100
|
|
|
|
23
|
return $item if defined $item; |
|
764
|
1
|
|
|
|
|
8
|
die "Sequence contains no elements"; |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
} |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
# FirstOrDefault - get first element or default |
|
769
|
|
|
|
|
|
|
sub FirstOrDefault { |
|
770
|
4
|
|
|
4
|
1
|
9
|
my $self = shift; |
|
771
|
4
|
|
|
|
|
7
|
my($predicate, $default); |
|
772
|
|
|
|
|
|
|
|
|
773
|
4
|
100
|
|
|
|
19
|
if (@_ >= 2) { |
|
|
|
50
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
# Two arguments: ($predicate, $default) |
|
775
|
2
|
|
|
|
|
3
|
($predicate, $default) = @_; |
|
776
|
|
|
|
|
|
|
} |
|
777
|
|
|
|
|
|
|
elsif (@_ == 1) { |
|
778
|
|
|
|
|
|
|
# One argument: distinguish CODE (predicate) vs non-CODE (default) |
|
779
|
2
|
50
|
|
|
|
5
|
if (ref($_[0]) eq 'CODE') { |
|
780
|
0
|
|
|
|
|
0
|
$predicate = $_[0]; |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
else { |
|
783
|
2
|
|
|
|
|
3
|
$default = $_[0]; |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
|
|
787
|
4
|
|
|
|
|
7
|
my $result = eval { $self->First($predicate) }; |
|
|
4
|
|
|
|
|
8
|
|
|
788
|
4
|
100
|
|
|
|
16
|
return $@ ? $default : $result; |
|
789
|
|
|
|
|
|
|
} |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
# Last - get last element |
|
792
|
|
|
|
|
|
|
sub Last { |
|
793
|
3
|
|
|
3
|
1
|
9
|
my($self, $predicate) = @_; |
|
794
|
3
|
|
|
|
|
7
|
my @items = $self->ToArray(); |
|
795
|
|
|
|
|
|
|
|
|
796
|
3
|
50
|
|
|
|
10
|
if ($predicate) { |
|
797
|
0
|
|
|
|
|
0
|
for (my $i = $#items; $i >= 0; $i--) { |
|
798
|
0
|
0
|
|
|
|
0
|
return $items[$i] if $predicate->($items[$i]); |
|
799
|
|
|
|
|
|
|
} |
|
800
|
0
|
|
|
|
|
0
|
die "No element satisfies the condition"; |
|
801
|
|
|
|
|
|
|
} |
|
802
|
|
|
|
|
|
|
else { |
|
803
|
3
|
50
|
|
|
|
12
|
die "Sequence contains no elements" unless @items; |
|
804
|
3
|
|
|
|
|
10
|
return $items[-1]; |
|
805
|
|
|
|
|
|
|
} |
|
806
|
|
|
|
|
|
|
} |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
# LastOrDefault - return last element or default |
|
809
|
|
|
|
|
|
|
sub LastOrDefault { |
|
810
|
9
|
|
|
9
|
1
|
41
|
my $self = shift; |
|
811
|
9
|
|
|
|
|
14
|
my($predicate, $default); |
|
812
|
|
|
|
|
|
|
|
|
813
|
9
|
100
|
|
|
|
31
|
if (@_ >= 2) { |
|
|
|
100
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
# Two arguments: ($predicate, $default) |
|
815
|
2
|
|
|
|
|
6
|
($predicate, $default) = @_; |
|
816
|
|
|
|
|
|
|
} |
|
817
|
|
|
|
|
|
|
elsif (@_ == 1) { |
|
818
|
|
|
|
|
|
|
# One argument: distinguish CODE (predicate) vs non-CODE (default) |
|
819
|
3
|
100
|
|
|
|
8
|
if (ref($_[0]) eq 'CODE') { |
|
820
|
2
|
|
|
|
|
3
|
$predicate = $_[0]; |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
else { |
|
823
|
1
|
|
|
|
|
2
|
$default = $_[0]; |
|
824
|
|
|
|
|
|
|
} |
|
825
|
|
|
|
|
|
|
} |
|
826
|
|
|
|
|
|
|
|
|
827
|
9
|
|
|
|
|
22
|
my @items = $self->ToArray(); |
|
828
|
|
|
|
|
|
|
|
|
829
|
9
|
100
|
|
|
|
15
|
if ($predicate) { |
|
830
|
3
|
|
|
|
|
12
|
for (my $i = $#items; $i >= 0; $i--) { |
|
831
|
7
|
100
|
|
|
|
49
|
return $items[$i] if $predicate->($items[$i]); |
|
832
|
|
|
|
|
|
|
} |
|
833
|
2
|
|
|
|
|
12
|
return $default; |
|
834
|
|
|
|
|
|
|
} |
|
835
|
|
|
|
|
|
|
else { |
|
836
|
6
|
100
|
|
|
|
24
|
return @items ? $items[-1] : $default; |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
} |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
# Single - return the only element |
|
841
|
|
|
|
|
|
|
sub Single { |
|
842
|
5
|
|
|
5
|
1
|
10
|
my($self, $predicate) = @_; |
|
843
|
5
|
|
|
|
|
11
|
my $iter = $self->iterator; |
|
844
|
5
|
|
|
|
|
8
|
my $found; |
|
845
|
5
|
|
|
|
|
8
|
my $count = 0; |
|
846
|
|
|
|
|
|
|
|
|
847
|
5
|
|
|
|
|
10
|
while (defined(my $item = $iter->())) { |
|
848
|
8
|
100
|
100
|
|
|
24
|
next if $predicate && !$predicate->($item); |
|
849
|
|
|
|
|
|
|
|
|
850
|
6
|
|
|
|
|
18
|
$count++; |
|
851
|
6
|
100
|
|
|
|
13
|
if ($count > 1) { |
|
852
|
2
|
|
|
|
|
19
|
die "Sequence contains more than one element"; |
|
853
|
|
|
|
|
|
|
} |
|
854
|
4
|
|
|
|
|
8
|
$found = $item; |
|
855
|
|
|
|
|
|
|
} |
|
856
|
|
|
|
|
|
|
|
|
857
|
3
|
100
|
|
|
|
18
|
die "Sequence contains no elements" if $count == 0; |
|
858
|
2
|
|
|
|
|
4
|
return $found; |
|
859
|
|
|
|
|
|
|
} |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
# SingleOrDefault - return the only element or undef |
|
862
|
|
|
|
|
|
|
sub SingleOrDefault { |
|
863
|
4
|
|
|
4
|
1
|
9
|
my($self, $predicate) = @_; |
|
864
|
4
|
|
|
|
|
9
|
my $iter = $self->iterator; |
|
865
|
4
|
|
|
|
|
6
|
my $found; |
|
866
|
4
|
|
|
|
|
7
|
my $count = 0; |
|
867
|
|
|
|
|
|
|
|
|
868
|
4
|
|
|
|
|
7
|
while (defined(my $item = $iter->())) { |
|
869
|
6
|
100
|
100
|
|
|
18
|
next if $predicate && !$predicate->($item); |
|
870
|
|
|
|
|
|
|
|
|
871
|
4
|
|
|
|
|
8
|
$count++; |
|
872
|
4
|
100
|
|
|
|
9
|
if ($count > 1) { |
|
873
|
1
|
|
|
|
|
3
|
return undef; # More than one element |
|
874
|
|
|
|
|
|
|
} |
|
875
|
3
|
|
|
|
|
6
|
$found = $item; |
|
876
|
|
|
|
|
|
|
} |
|
877
|
|
|
|
|
|
|
|
|
878
|
3
|
100
|
|
|
|
37
|
return $count == 1 ? $found : undef; |
|
879
|
|
|
|
|
|
|
} |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
# ElementAt - return element at specified index |
|
882
|
|
|
|
|
|
|
sub ElementAt { |
|
883
|
4
|
|
|
4
|
1
|
8
|
my($self, $index) = @_; |
|
884
|
4
|
100
|
|
|
|
43
|
die "Index must be non-negative" if $index < 0; |
|
885
|
|
|
|
|
|
|
|
|
886
|
3
|
|
|
|
|
6
|
my $iter = $self->iterator; |
|
887
|
3
|
|
|
|
|
4
|
my $current = 0; |
|
888
|
|
|
|
|
|
|
|
|
889
|
3
|
|
|
|
|
7
|
while (defined(my $item = $iter->())) { |
|
890
|
5
|
100
|
|
|
|
14
|
return $item if $current == $index; |
|
891
|
3
|
|
|
|
|
24
|
$current++; |
|
892
|
|
|
|
|
|
|
} |
|
893
|
|
|
|
|
|
|
|
|
894
|
1
|
|
|
|
|
10
|
die "Index out of range"; |
|
895
|
|
|
|
|
|
|
} |
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
# ElementAtOrDefault - return element at index or undef |
|
898
|
|
|
|
|
|
|
sub ElementAtOrDefault { |
|
899
|
3
|
|
|
3
|
1
|
5
|
my($self, $index) = @_; |
|
900
|
3
|
100
|
|
|
|
8
|
return undef if $index < 0; |
|
901
|
|
|
|
|
|
|
|
|
902
|
2
|
|
|
|
|
6
|
my $iter = $self->iterator; |
|
903
|
2
|
|
|
|
|
15
|
my $current = 0; |
|
904
|
|
|
|
|
|
|
|
|
905
|
2
|
|
|
|
|
7
|
while (defined(my $item = $iter->())) { |
|
906
|
4
|
100
|
|
|
|
11
|
return $item if $current == $index; |
|
907
|
3
|
|
|
|
|
4
|
$current++; |
|
908
|
|
|
|
|
|
|
} |
|
909
|
|
|
|
|
|
|
|
|
910
|
1
|
|
|
|
|
4
|
return undef; |
|
911
|
|
|
|
|
|
|
} |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
914
|
|
|
|
|
|
|
# Aggregation Methods |
|
915
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
# Count - count elements |
|
918
|
|
|
|
|
|
|
sub Count { |
|
919
|
45
|
|
|
45
|
1
|
145
|
my($self, $predicate) = @_; |
|
920
|
|
|
|
|
|
|
|
|
921
|
45
|
50
|
|
|
|
87
|
if ($predicate) { |
|
922
|
0
|
|
|
|
|
0
|
return $self->Where($predicate)->Count(); |
|
923
|
|
|
|
|
|
|
} |
|
924
|
|
|
|
|
|
|
|
|
925
|
45
|
|
|
|
|
37
|
my $count = 0; |
|
926
|
45
|
|
|
|
|
68
|
my $iter = $self->iterator; |
|
927
|
45
|
|
|
|
|
50
|
$count++ while defined $iter->(); |
|
928
|
45
|
|
|
|
|
204
|
return $count; |
|
929
|
|
|
|
|
|
|
} |
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
# Sum - calculate sum |
|
932
|
|
|
|
|
|
|
sub Sum { |
|
933
|
9
|
|
|
9
|
1
|
21
|
my($self, $selector) = @_; |
|
934
|
9
|
|
66
|
10
|
|
30
|
$selector ||= sub { $_[0] }; |
|
|
10
|
|
|
|
|
40
|
|
|
935
|
|
|
|
|
|
|
|
|
936
|
9
|
|
|
|
|
13
|
my $sum = 0; |
|
937
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
938
|
19
|
|
|
19
|
|
1167
|
$sum += $selector->(shift); |
|
939
|
9
|
|
|
|
|
1027
|
}); |
|
940
|
9
|
|
|
|
|
42
|
return $sum; |
|
941
|
|
|
|
|
|
|
} |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
# Min - find minimum |
|
944
|
|
|
|
|
|
|
sub Min { |
|
945
|
1
|
|
|
1
|
1
|
2
|
my($self, $selector) = @_; |
|
946
|
1
|
|
33
|
5
|
|
5
|
$selector ||= sub { $_[0] }; |
|
|
5
|
|
|
|
|
5
|
|
|
947
|
|
|
|
|
|
|
|
|
948
|
1
|
|
|
|
|
2
|
my $min; |
|
949
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
950
|
5
|
|
|
5
|
|
5
|
my $val = $selector->(shift); |
|
951
|
5
|
100
|
100
|
|
|
13
|
$min = $val if !defined($min) || $val < $min; |
|
952
|
1
|
|
|
|
|
3
|
}); |
|
953
|
1
|
|
|
|
|
3
|
return $min; |
|
954
|
|
|
|
|
|
|
} |
|
955
|
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
# Max - find maximum |
|
957
|
|
|
|
|
|
|
sub Max { |
|
958
|
1
|
|
|
1
|
1
|
2
|
my($self, $selector) = @_; |
|
959
|
1
|
|
33
|
5
|
|
7
|
$selector ||= sub { $_[0] }; |
|
|
5
|
|
|
|
|
4
|
|
|
960
|
|
|
|
|
|
|
|
|
961
|
1
|
|
|
|
|
1
|
my $max; |
|
962
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
963
|
5
|
|
|
5
|
|
6
|
my $val = $selector->(shift); |
|
964
|
5
|
100
|
100
|
|
|
10
|
$max = $val if !defined($max) || $val > $max; |
|
965
|
1
|
|
|
|
|
4
|
}); |
|
966
|
1
|
|
|
|
|
4
|
return $max; |
|
967
|
|
|
|
|
|
|
} |
|
968
|
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
# Average - calculate average |
|
970
|
|
|
|
|
|
|
sub Average { |
|
971
|
1
|
|
|
1
|
1
|
2
|
my($self, $selector) = @_; |
|
972
|
1
|
|
33
|
3
|
|
4
|
$selector ||= sub { $_[0] }; |
|
|
3
|
|
|
|
|
1
|
|
|
973
|
|
|
|
|
|
|
|
|
974
|
1
|
|
|
|
|
1
|
my $sum = 0; |
|
975
|
1
|
|
|
|
|
1
|
my $count = 0; |
|
976
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
977
|
3
|
|
|
3
|
|
4
|
$sum += $selector->(shift); |
|
978
|
3
|
|
|
|
|
9
|
$count++; |
|
979
|
1
|
|
|
|
|
4
|
}); |
|
980
|
|
|
|
|
|
|
|
|
981
|
1
|
50
|
|
|
|
4
|
die "Sequence contains no elements" if $count == 0; |
|
982
|
1
|
|
|
|
|
2
|
return $sum / $count; |
|
983
|
|
|
|
|
|
|
} |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
# AverageOrDefault - calculate average or return undef if empty |
|
986
|
|
|
|
|
|
|
sub AverageOrDefault { |
|
987
|
2
|
|
|
2
|
1
|
12
|
my($self, $selector) = @_; |
|
988
|
2
|
|
33
|
3
|
|
9
|
$selector ||= sub { $_[0] }; |
|
|
3
|
|
|
|
|
5
|
|
|
989
|
|
|
|
|
|
|
|
|
990
|
2
|
|
|
|
|
3
|
my $sum = 0; |
|
991
|
2
|
|
|
|
|
3
|
my $count = 0; |
|
992
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
993
|
3
|
|
|
3
|
|
4
|
$sum += $selector->(shift); |
|
994
|
3
|
|
|
|
|
4
|
$count++; |
|
995
|
2
|
|
|
|
|
7
|
}); |
|
996
|
|
|
|
|
|
|
|
|
997
|
2
|
100
|
|
|
|
8
|
return undef if $count == 0; |
|
998
|
1
|
|
|
|
|
5
|
return $sum / $count; |
|
999
|
|
|
|
|
|
|
} |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
# Aggregate - apply accumulator function over sequence |
|
1002
|
|
|
|
|
|
|
sub Aggregate { |
|
1003
|
7
|
|
|
7
|
1
|
22
|
my($self, @args) = @_; |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
7
|
|
|
|
|
10
|
my($seed, $func, $result_selector); |
|
1006
|
|
|
|
|
|
|
|
|
1007
|
7
|
100
|
|
|
|
47
|
if (@args == 1) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
# Aggregate($func) - use first element as seed |
|
1009
|
2
|
|
|
|
|
4
|
$func = $args[0]; |
|
1010
|
2
|
|
|
|
|
5
|
my $iter = $self->iterator; |
|
1011
|
2
|
|
|
|
|
5
|
$seed = $iter->(); |
|
1012
|
2
|
100
|
|
|
|
18
|
die "Sequence contains no elements" unless defined $seed; |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
# Continue with rest of elements |
|
1015
|
1
|
|
|
|
|
5
|
while (defined(my $item = $iter->())) { |
|
1016
|
3
|
|
|
|
|
8
|
$seed = $func->($seed, $item); |
|
1017
|
|
|
|
|
|
|
} |
|
1018
|
|
|
|
|
|
|
} |
|
1019
|
|
|
|
|
|
|
elsif (@args == 2) { |
|
1020
|
|
|
|
|
|
|
# Aggregate($seed, $func) |
|
1021
|
4
|
|
|
|
|
16
|
($seed, $func) = @args; |
|
1022
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
1023
|
11
|
|
|
11
|
|
24
|
$seed = $func->($seed, shift); |
|
1024
|
4
|
|
|
|
|
58
|
}); |
|
1025
|
|
|
|
|
|
|
} |
|
1026
|
|
|
|
|
|
|
elsif (@args == 3) { |
|
1027
|
|
|
|
|
|
|
# Aggregate($seed, $func, $result_selector) |
|
1028
|
1
|
|
|
|
|
2
|
($seed, $func, $result_selector) = @args; |
|
1029
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
1030
|
3
|
|
|
3
|
|
6
|
$seed = $func->($seed, shift); |
|
1031
|
1
|
|
|
|
|
25
|
}); |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
|
|
|
|
|
|
else { |
|
1034
|
0
|
|
|
|
|
0
|
die "Invalid number of arguments for Aggregate"; |
|
1035
|
|
|
|
|
|
|
} |
|
1036
|
|
|
|
|
|
|
|
|
1037
|
6
|
100
|
|
|
|
32
|
return $result_selector ? $result_selector->($seed) : $seed; |
|
1038
|
|
|
|
|
|
|
} |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
1041
|
|
|
|
|
|
|
# Conversion Methods |
|
1042
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
# ToArray - convert to array |
|
1045
|
|
|
|
|
|
|
sub ToArray { |
|
1046
|
264
|
|
|
264
|
1
|
451
|
my($self) = @_; |
|
1047
|
264
|
|
|
|
|
248
|
my @result; |
|
1048
|
264
|
|
|
|
|
365
|
my $iter = $self->iterator; |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
264
|
|
|
|
|
386
|
while (defined(my $item = $iter->())) { |
|
1051
|
848
|
|
|
|
|
1762
|
push @result, $item; |
|
1052
|
|
|
|
|
|
|
} |
|
1053
|
262
|
|
|
|
|
804
|
return @result; |
|
1054
|
|
|
|
|
|
|
} |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
# ToList - convert to array reference |
|
1057
|
|
|
|
|
|
|
sub ToList { |
|
1058
|
0
|
|
|
0
|
1
|
0
|
my($self) = @_; |
|
1059
|
0
|
|
|
|
|
0
|
return [$self->ToArray()]; |
|
1060
|
|
|
|
|
|
|
} |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
# ToDictionary - convert sequence to hash reference |
|
1063
|
|
|
|
|
|
|
sub ToDictionary { |
|
1064
|
5
|
|
|
5
|
1
|
11
|
my($self, $key_selector, $value_selector) = @_; |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
# Default value selector returns the element itself |
|
1067
|
5
|
|
66
|
2
|
|
40
|
$value_selector ||= sub { $_[0] }; |
|
|
2
|
|
|
|
|
5
|
|
|
1068
|
|
|
|
|
|
|
|
|
1069
|
5
|
|
|
|
|
12
|
my %dictionary = (); |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
1072
|
11
|
|
|
11
|
|
18
|
my $item = shift; |
|
1073
|
11
|
|
|
|
|
20
|
my $key = $key_selector->($item); |
|
1074
|
11
|
|
|
|
|
35
|
my $value = $value_selector->($item); |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
# Convert undef key to empty string |
|
1077
|
11
|
50
|
|
|
|
35
|
$key = '' unless defined $key; |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
# Later values overwrite earlier ones (Perl hash behavior) |
|
1080
|
11
|
|
|
|
|
32
|
$dictionary{$key} = $value; |
|
1081
|
5
|
|
|
|
|
24
|
}); |
|
1082
|
|
|
|
|
|
|
|
|
1083
|
5
|
|
|
|
|
24
|
return \%dictionary; |
|
1084
|
|
|
|
|
|
|
} |
|
1085
|
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
# ToLookup - convert sequence to hash of arrays |
|
1087
|
|
|
|
|
|
|
sub ToLookup { |
|
1088
|
5
|
|
|
5
|
1
|
11
|
my($self, $key_selector, $value_selector) = @_; |
|
1089
|
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
# Default value selector returns the element itself |
|
1091
|
5
|
|
66
|
5
|
|
23
|
$value_selector ||= sub { $_[0] }; |
|
|
5
|
|
|
|
|
8
|
|
|
1092
|
|
|
|
|
|
|
|
|
1093
|
5
|
|
|
|
|
8
|
my %lookup = (); |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
1096
|
9
|
|
|
9
|
|
12
|
my $item = shift; |
|
1097
|
9
|
|
|
|
|
18
|
my $key = $key_selector->($item); |
|
1098
|
9
|
|
|
|
|
29
|
my $value = $value_selector->($item); |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
# Convert undef key to empty string |
|
1101
|
9
|
50
|
|
|
|
26
|
$key = '' unless defined $key; |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
9
|
|
|
|
|
12
|
push @{$lookup{$key}}, $value; |
|
|
9
|
|
|
|
|
31
|
|
|
1104
|
5
|
|
|
|
|
23
|
}); |
|
1105
|
|
|
|
|
|
|
|
|
1106
|
5
|
|
|
|
|
29
|
return \%lookup; |
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
# DefaultIfEmpty - return default value if empty |
|
1110
|
|
|
|
|
|
|
sub DefaultIfEmpty { |
|
1111
|
6
|
|
|
6
|
1
|
11
|
my($self, $default_value) = @_; |
|
1112
|
|
|
|
|
|
|
# default_value defaults to undef |
|
1113
|
6
|
|
|
|
|
14
|
my $has_default_arg = @_ > 1; |
|
1114
|
6
|
100
|
|
|
|
19
|
if (!$has_default_arg) { |
|
1115
|
1
|
|
|
|
|
2
|
$default_value = undef; |
|
1116
|
|
|
|
|
|
|
} |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
6
|
|
|
|
|
9
|
my $class = ref($self); |
|
1119
|
6
|
|
|
|
|
1093
|
my $iter = $self->iterator; |
|
1120
|
6
|
|
|
|
|
9
|
my $has_elements = 0; |
|
1121
|
6
|
|
|
|
|
9
|
my $returned_default = 0; |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
return $class->new(sub { |
|
1124
|
11
|
|
|
11
|
|
22
|
my $item = $iter->(); |
|
1125
|
11
|
100
|
|
|
|
24
|
if (defined $item) { |
|
1126
|
2
|
|
|
|
|
11
|
$has_elements = 1; |
|
1127
|
2
|
|
|
|
|
5
|
return $item; |
|
1128
|
|
|
|
|
|
|
} |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# EOF reached |
|
1131
|
9
|
100
|
100
|
|
|
38
|
if (!$has_elements && !$returned_default) { |
|
1132
|
5
|
|
|
|
|
8
|
$returned_default = 1; |
|
1133
|
5
|
|
|
|
|
41
|
return $default_value; |
|
1134
|
|
|
|
|
|
|
} |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
4
|
|
|
|
|
12
|
return undef; |
|
1137
|
6
|
|
|
|
|
31
|
}); |
|
1138
|
|
|
|
|
|
|
} |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# ToLTSV - write to LTSV file |
|
1141
|
|
|
|
|
|
|
sub ToLTSV { |
|
1142
|
6
|
|
|
6
|
1
|
7
|
my($self, $filename) = @_; |
|
1143
|
|
|
|
|
|
|
|
|
1144
|
6
|
|
|
|
|
11
|
my $fh; |
|
1145
|
6
|
50
|
|
|
|
7
|
if ($] >= 5.006) { |
|
1146
|
|
|
|
|
|
|
# Avoid "Too many arguments for open at" error when running with Perl 5.005_03 |
|
1147
|
6
|
50
|
|
|
|
334
|
eval q{ open($fh, '>', $filename) } or die "Cannot open '$filename': $!"; |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
else { |
|
1150
|
0
|
|
|
|
|
0
|
$fh = \do { local *_ }; |
|
|
0
|
|
|
|
|
0
|
|
|
1151
|
0
|
0
|
|
|
|
0
|
open($fh, "> $filename") or die "Cannot open '$filename': $!"; |
|
1152
|
|
|
|
|
|
|
} |
|
1153
|
6
|
|
|
|
|
26
|
binmode $fh; # Write raw bytes; prevents \r\n translation on Windows |
|
1154
|
|
|
|
|
|
|
# and is consistent with FromLTSV |
|
1155
|
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
$self->ForEach(sub { |
|
1157
|
7
|
|
|
7
|
|
8
|
my $record = shift; |
|
1158
|
|
|
|
|
|
|
# LTSV spec: tab is the field separator; newline terminates the record. |
|
1159
|
|
|
|
|
|
|
# Sanitize values to prevent structural corruption of the output file. |
|
1160
|
|
|
|
|
|
|
my $line = join("\t", map { |
|
1161
|
7
|
100
|
|
|
|
20
|
my $v = defined($record->{$_}) ? $record->{$_} : ''; |
|
|
10
|
|
|
|
|
18
|
|
|
1162
|
10
|
|
|
|
|
22
|
$v =~ s/[\t\n\r]/ /g; |
|
1163
|
10
|
|
|
|
|
27
|
"$_:$v" |
|
1164
|
|
|
|
|
|
|
} sort keys %$record); |
|
1165
|
7
|
|
|
|
|
37
|
print $fh $line, "\n"; |
|
1166
|
6
|
|
|
|
|
34
|
}); |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
6
|
|
|
|
|
307
|
close $fh; |
|
1169
|
6
|
|
|
|
|
25
|
return 1; |
|
1170
|
|
|
|
|
|
|
} |
|
1171
|
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
1173
|
|
|
|
|
|
|
# Utility Methods |
|
1174
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
1175
|
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
# ForEach - execute action for each element |
|
1177
|
|
|
|
|
|
|
sub ForEach { |
|
1178
|
85
|
|
|
85
|
1
|
156
|
my($self, $action) = @_; |
|
1179
|
85
|
|
|
|
|
122
|
my $iter = $self->iterator; |
|
1180
|
|
|
|
|
|
|
|
|
1181
|
85
|
|
|
|
|
120
|
while (defined(my $item = $iter->())) { |
|
1182
|
211
|
|
|
|
|
269
|
$action->($item); |
|
1183
|
|
|
|
|
|
|
} |
|
1184
|
85
|
|
|
|
|
132
|
return; |
|
1185
|
|
|
|
|
|
|
} |
|
1186
|
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
1; |
|
1188
|
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
###################################################################### |
|
1190
|
|
|
|
|
|
|
# |
|
1191
|
|
|
|
|
|
|
# LTSV::LINQ::Ordered - Ordered query supporting ThenBy/ThenByDescending |
|
1192
|
|
|
|
|
|
|
# |
|
1193
|
|
|
|
|
|
|
# Returned by OrderBy* methods. Inherits all LTSV::LINQ methods via @ISA. |
|
1194
|
|
|
|
|
|
|
# ThenBy* methods are only available on this class, mirroring the way |
|
1195
|
|
|
|
|
|
|
# .NET LINQ's IOrderedEnumerable exposes ThenBy/ThenByDescending while |
|
1196
|
|
|
|
|
|
|
# plain IEnumerable does not. |
|
1197
|
|
|
|
|
|
|
# |
|
1198
|
|
|
|
|
|
|
# Stability guarantee: every sort uses a Schwartzian-Transform-style |
|
1199
|
|
|
|
|
|
|
# decorated array that appends the original element index as a final |
|
1200
|
|
|
|
|
|
|
# tie-breaker. This makes the multi-key sort completely stable on all |
|
1201
|
|
|
|
|
|
|
# Perl versions including 5.005_03, where built-in sort stability is not |
|
1202
|
|
|
|
|
|
|
# guaranteed. |
|
1203
|
|
|
|
|
|
|
###################################################################### |
|
1204
|
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
package LTSV::LINQ::Ordered; |
|
1206
|
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
# 5.005_03-compatible inheritance (no 'use parent', no 'our') |
|
1208
|
|
|
|
|
|
|
@LTSV::LINQ::Ordered::ISA = ('LTSV::LINQ'); |
|
1209
|
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
# _new_ordered($items_aref, $specs_aref) - internal constructor |
|
1211
|
|
|
|
|
|
|
# |
|
1212
|
|
|
|
|
|
|
# $specs_aref is an arrayref of sort-spec hashrefs: |
|
1213
|
|
|
|
|
|
|
# { sel => $code_ref, # key selector: ($item) -> $key |
|
1214
|
|
|
|
|
|
|
# dir => 1 or -1, # 1 = ascending, -1 = descending |
|
1215
|
|
|
|
|
|
|
# type => 'smart'|'str'|'num' # comparison family |
|
1216
|
|
|
|
|
|
|
# } |
|
1217
|
|
|
|
|
|
|
sub _new_ordered { |
|
1218
|
97
|
|
|
97
|
|
116
|
my($class, $items, $specs) = @_; |
|
1219
|
|
|
|
|
|
|
# Use _factory so that iterator() returns a fresh sorted iterator on |
|
1220
|
|
|
|
|
|
|
# each call (enables re-iteration, e.g. in GroupJoin result selectors). |
|
1221
|
|
|
|
|
|
|
# Methods like Take/Where/Select that call ref($self)->new(sub{...}) |
|
1222
|
|
|
|
|
|
|
# will create a plain object with an {iterator} field (no _factory), |
|
1223
|
|
|
|
|
|
|
# so they are unaffected by this override. |
|
1224
|
|
|
|
|
|
|
return bless { |
|
1225
|
|
|
|
|
|
|
_items => $items, |
|
1226
|
|
|
|
|
|
|
_specs => $specs, |
|
1227
|
|
|
|
|
|
|
_factory => sub { |
|
1228
|
70
|
|
|
70
|
|
129
|
my @sorted = _perform_sort($items, $specs); |
|
1229
|
70
|
|
|
|
|
73
|
my $i = 0; |
|
1230
|
70
|
100
|
|
|
|
248
|
return sub { $i < scalar(@sorted) ? $sorted[$i++] : undef }; |
|
|
350
|
|
|
|
|
552
|
|
|
1231
|
|
|
|
|
|
|
}, |
|
1232
|
97
|
|
|
|
|
647
|
}, $class; |
|
1233
|
|
|
|
|
|
|
} |
|
1234
|
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
# _perform_sort($items_aref, $specs_aref) - core stable multi-key sort |
|
1236
|
|
|
|
|
|
|
# |
|
1237
|
|
|
|
|
|
|
# Decorated-array (Schwartzian Transform) technique: |
|
1238
|
|
|
|
|
|
|
# 1. Build [ orig_index, [key1, key2, ..., keyN], item ] per element |
|
1239
|
|
|
|
|
|
|
# 2. Sort by key1..keyN in sequence; original index as final tie-breaker |
|
1240
|
|
|
|
|
|
|
# 3. Strip decoration and return plain item list |
|
1241
|
|
|
|
|
|
|
# |
|
1242
|
|
|
|
|
|
|
# The original-index tie-breaker guarantees stability on every Perl version. |
|
1243
|
|
|
|
|
|
|
sub _perform_sort { |
|
1244
|
70
|
|
|
70
|
|
84
|
my($items, $specs) = @_; |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
# Step 1: decorate |
|
1247
|
|
|
|
|
|
|
my @decorated = map { |
|
1248
|
309
|
|
|
|
|
230
|
my $idx = $_; |
|
1249
|
309
|
|
|
|
|
255
|
my $item = $items->[$idx]; |
|
1250
|
309
|
|
|
|
|
235
|
my @keys = map { _extract_key($_->{sel}->($item), $_->{type}) } @{$specs}; |
|
|
464
|
|
|
|
|
581
|
|
|
|
309
|
|
|
|
|
277
|
|
|
1251
|
309
|
|
|
|
|
501
|
[$idx, \@keys, $item] |
|
1252
|
70
|
|
|
|
|
118
|
} 0 .. $#{$items}; |
|
|
70
|
|
|
|
|
134
|
|
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
# Step 2: sort |
|
1255
|
|
|
|
|
|
|
my @sorted_dec = sort { |
|
1256
|
70
|
|
|
|
|
206
|
my $r = 0; |
|
|
457
|
|
|
|
|
470
|
|
|
1257
|
457
|
|
|
|
|
362
|
for my $i (0 .. $#{$specs}) { |
|
|
457
|
|
|
|
|
539
|
|
|
1258
|
563
|
|
|
|
|
687
|
my $cmp = _compare_keys($a->[1][$i], $b->[1][$i], $specs->[$i]{type}); |
|
1259
|
563
|
100
|
|
|
|
701
|
if ($specs->[$i]{dir} < 0) { $cmp = -$cmp } |
|
|
88
|
|
|
|
|
71
|
|
|
1260
|
563
|
100
|
|
|
|
675
|
if ($cmp != 0) { $r = $cmp; last } |
|
|
438
|
|
|
|
|
305
|
|
|
|
438
|
|
|
|
|
387
|
|
|
1261
|
|
|
|
|
|
|
} |
|
1262
|
457
|
100
|
|
|
|
632
|
$r != 0 ? $r : ($a->[0] <=> $b->[0]); |
|
1263
|
|
|
|
|
|
|
} @decorated; |
|
1264
|
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
# Step 3: undecorate |
|
1266
|
70
|
|
|
|
|
83
|
return map { $_->[2] } @sorted_dec; |
|
|
309
|
|
|
|
|
501
|
|
|
1267
|
|
|
|
|
|
|
} |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
# _extract_key($raw_value, $type) - normalise one sort key |
|
1270
|
|
|
|
|
|
|
# |
|
1271
|
|
|
|
|
|
|
# Returns a scalar (num/str) or a two-element arrayref [flag, value] |
|
1272
|
|
|
|
|
|
|
# for 'smart' type: |
|
1273
|
|
|
|
|
|
|
# [0, $numeric_val] - key is numeric |
|
1274
|
|
|
|
|
|
|
# [1, $string_val ] - key is string |
|
1275
|
|
|
|
|
|
|
sub _extract_key { |
|
1276
|
464
|
|
|
464
|
|
999
|
my($val, $type) = @_; |
|
1277
|
464
|
100
|
|
|
|
536
|
$val = '' unless defined $val; |
|
1278
|
464
|
100
|
|
|
|
590
|
if ($type eq 'num') { |
|
|
|
100
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
# Force numeric; undef/empty/non-numeric treated as 0 |
|
1280
|
113
|
100
|
66
|
|
|
421
|
return defined($val) && length($val) ? $val + 0 : 0; |
|
1281
|
|
|
|
|
|
|
} |
|
1282
|
|
|
|
|
|
|
elsif ($type eq 'str') { |
|
1283
|
155
|
|
|
|
|
233
|
return "$val"; |
|
1284
|
|
|
|
|
|
|
} |
|
1285
|
|
|
|
|
|
|
else { |
|
1286
|
|
|
|
|
|
|
# smart: detect whether value looks like a number |
|
1287
|
196
|
|
|
|
|
208
|
my $t = $val; |
|
1288
|
196
|
|
|
|
|
491
|
$t =~ s/^\s+|\s+$//g; |
|
1289
|
196
|
100
|
|
|
|
400
|
if ($t =~ /^[+-]?(?:\d+\.?\d*|\d*\.\d+)(?:[eE][+-]?\d+)?$/) { |
|
1290
|
45
|
|
|
|
|
154
|
return [0, $t + 0]; |
|
1291
|
|
|
|
|
|
|
} |
|
1292
|
|
|
|
|
|
|
else { |
|
1293
|
151
|
|
|
|
|
304
|
return [1, "$val"]; |
|
1294
|
|
|
|
|
|
|
} |
|
1295
|
|
|
|
|
|
|
} |
|
1296
|
|
|
|
|
|
|
} |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
# _compare_keys($ka, $kb, $type) - compare two extracted keys |
|
1299
|
|
|
|
|
|
|
sub _compare_keys { |
|
1300
|
563
|
|
|
563
|
|
588
|
my($ka, $kb, $type) = @_; |
|
1301
|
563
|
100
|
|
|
|
708
|
if ($type eq 'num') { |
|
|
|
100
|
|
|
|
|
|
|
1302
|
124
|
|
|
|
|
131
|
return $ka <=> $kb; |
|
1303
|
|
|
|
|
|
|
} |
|
1304
|
|
|
|
|
|
|
elsif ($type eq 'str') { |
|
1305
|
178
|
|
|
|
|
189
|
return $ka cmp $kb; |
|
1306
|
|
|
|
|
|
|
} |
|
1307
|
|
|
|
|
|
|
else { |
|
1308
|
|
|
|
|
|
|
# smart: both are [flag, value] arrayrefs |
|
1309
|
261
|
|
|
|
|
286
|
my $fa = $ka->[0]; my $va = $ka->[1]; |
|
|
261
|
|
|
|
|
235
|
|
|
1310
|
261
|
|
|
|
|
209
|
my $fb = $kb->[0]; my $vb = $kb->[1]; |
|
|
261
|
|
|
|
|
206
|
|
|
1311
|
261
|
100
|
66
|
|
|
648
|
if ($fa == 0 && $fb == 0) { return $va <=> $vb } # both numeric |
|
|
60
|
50
|
33
|
|
|
87
|
|
|
1312
|
201
|
|
|
|
|
286
|
elsif ($fa == 1 && $fb == 1) { return $va cmp $vb } # both string |
|
1313
|
0
|
|
|
|
|
0
|
else { return $fa <=> $fb } # mixed: numeric before string |
|
1314
|
|
|
|
|
|
|
} |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
# (No iterator() override needed: _factory in {_items,_specs,_factory} objects |
|
1318
|
|
|
|
|
|
|
# is handled by LTSV::LINQ::iterator(), which calls _factory->() each time. |
|
1319
|
|
|
|
|
|
|
# Objects produced by Take/Where/Select etc. via ref($self)->new(sub{...}) |
|
1320
|
|
|
|
|
|
|
# store their closure in {iterator} and do not have _factory, so they use |
|
1321
|
|
|
|
|
|
|
# the normal non-re-entrant path.) |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
# _thenby($key_selector, $dir, $type) - shared implementation for all ThenBy* |
|
1324
|
|
|
|
|
|
|
# |
|
1325
|
|
|
|
|
|
|
# Non-destructive: builds a new spec list and returns a new |
|
1326
|
|
|
|
|
|
|
# LTSV::LINQ::Ordered object. The original object is unchanged, so |
|
1327
|
|
|
|
|
|
|
# branching sort chains work correctly: |
|
1328
|
|
|
|
|
|
|
# |
|
1329
|
|
|
|
|
|
|
# my $by_dept = From(\@data)->OrderBy(sub { $_[0]{dept} }); |
|
1330
|
|
|
|
|
|
|
# my $by_dept_name = $by_dept->ThenBy(sub { $_[0]{name} }); |
|
1331
|
|
|
|
|
|
|
# my $by_dept_salary = $by_dept->ThenByNum(sub { $_[0]{salary} }); |
|
1332
|
|
|
|
|
|
|
# # $by_dept_name and $by_dept_salary are independent queries |
|
1333
|
|
|
|
|
|
|
sub _thenby { |
|
1334
|
27
|
|
|
27
|
|
37
|
my($self, $key_selector, $dir, $type) = @_; |
|
1335
|
27
|
|
|
|
|
27
|
my @new_specs = (@{$self->{_specs}}, { sel => $key_selector, dir => $dir, type => $type }); |
|
|
27
|
|
|
|
|
70
|
|
|
1336
|
27
|
|
|
|
|
39
|
return LTSV::LINQ::Ordered->_new_ordered($self->{_items}, \@new_specs); |
|
1337
|
|
|
|
|
|
|
} |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
# ThenBy - ascending secondary key, smart comparison |
|
1340
|
12
|
|
|
12
|
|
19
|
sub ThenBy { my($s,$k)=@_; $s->_thenby($k, 1, 'smart') } |
|
|
12
|
|
|
|
|
25
|
|
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
# ThenByDescending - descending secondary key, smart comparison |
|
1343
|
1
|
|
|
1
|
|
2
|
sub ThenByDescending { my($s,$k)=@_; $s->_thenby($k, -1, 'smart') } |
|
|
1
|
|
|
|
|
4
|
|
|
1344
|
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
# ThenByStr - ascending secondary key, string comparison |
|
1346
|
6
|
|
|
6
|
|
9
|
sub ThenByStr { my($s,$k)=@_; $s->_thenby($k, 1, 'str') } |
|
|
6
|
|
|
|
|
12
|
|
|
1347
|
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
# ThenByStrDescending - descending secondary key, string comparison |
|
1349
|
1
|
|
|
1
|
|
3
|
sub ThenByStrDescending { my($s,$k)=@_; $s->_thenby($k, -1, 'str') } |
|
|
1
|
|
|
|
|
4
|
|
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
# ThenByNum - ascending secondary key, numeric comparison |
|
1352
|
5
|
|
|
5
|
|
30
|
sub ThenByNum { my($s,$k)=@_; $s->_thenby($k, 1, 'num') } |
|
|
5
|
|
|
|
|
12
|
|
|
1353
|
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
# ThenByNumDescending - descending secondary key, numeric comparison |
|
1355
|
2
|
|
|
2
|
|
3
|
sub ThenByNumDescending { my($s,$k)=@_; $s->_thenby($k, -1, 'num') } |
|
|
2
|
|
|
|
|
5
|
|
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
1; |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=encoding utf8 |
|
1360
|
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=head1 NAME |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
LTSV::LINQ - LINQ-style query interface for LTSV files |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=head1 VERSION |
|
1366
|
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
Version 1.05 |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
1370
|
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
use LTSV::LINQ; |
|
1372
|
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
# Read LTSV file and query |
|
1374
|
|
|
|
|
|
|
my @results = LTSV::LINQ->FromLTSV("access.log") |
|
1375
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} eq '200' }) |
|
1376
|
|
|
|
|
|
|
->Select(sub { $_[0]{url} }) |
|
1377
|
|
|
|
|
|
|
->Distinct() |
|
1378
|
|
|
|
|
|
|
->ToArray(); |
|
1379
|
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
# DSL syntax for simple filtering |
|
1381
|
|
|
|
|
|
|
my @errors = LTSV::LINQ->FromLTSV("access.log") |
|
1382
|
|
|
|
|
|
|
->Where(status => '404') |
|
1383
|
|
|
|
|
|
|
->ToArray(); |
|
1384
|
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
# Grouping and aggregation |
|
1386
|
|
|
|
|
|
|
my @stats = LTSV::LINQ->FromLTSV("access.log") |
|
1387
|
|
|
|
|
|
|
->GroupBy(sub { $_[0]{status} }) |
|
1388
|
|
|
|
|
|
|
->Select(sub { |
|
1389
|
|
|
|
|
|
|
my $g = shift; |
|
1390
|
|
|
|
|
|
|
return { |
|
1391
|
|
|
|
|
|
|
Status => $g->{Key}, |
|
1392
|
|
|
|
|
|
|
Count => scalar(@{$g->{Elements}}) |
|
1393
|
|
|
|
|
|
|
}; |
|
1394
|
|
|
|
|
|
|
}) |
|
1395
|
|
|
|
|
|
|
->OrderByDescending(sub { $_[0]{Count} }) |
|
1396
|
|
|
|
|
|
|
->ToArray(); |
|
1397
|
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
=head1 TABLE OF CONTENTS |
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
=over 4 |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
=item * L |
|
1403
|
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
=item * L - Complete method reference (60 methods) |
|
1405
|
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=item * L - 8 practical examples |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
=item * L - Lazy evaluation, method chaining, DSL |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
=item * L - Iterator design, execution flow |
|
1411
|
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
=item * L - Memory usage, optimization tips |
|
1413
|
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
=item * L - Perl 5.005+ support, pure Perl |
|
1415
|
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
=item * L - Error messages |
|
1417
|
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
=item * L - Common questions and answers |
|
1419
|
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=item * L - Common patterns |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
=item * L - Related resources |
|
1423
|
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
=item * L |
|
1425
|
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
=item * L |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=back |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
LTSV::LINQ provides a LINQ-style query interface for LTSV (Labeled |
|
1433
|
|
|
|
|
|
|
Tab-Separated Values) files. It offers a fluent, chainable API for |
|
1434
|
|
|
|
|
|
|
filtering, transforming, and aggregating LTSV data. |
|
1435
|
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
Key features: |
|
1437
|
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=over 4 |
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=item * B - O(1) memory usage for most operations |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
=item * B - Fluent, readable query composition |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
=item * B - Simple key-value filtering |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
=item * B<60 LINQ methods> - Comprehensive query capabilities |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
=item * B - No XS dependencies |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
=item * B - Works on ancient and modern Perl |
|
1451
|
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=back |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=head2 What is LTSV? |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
LTSV (Labeled Tab-Separated Values) is a text format for structured logs and |
|
1457
|
|
|
|
|
|
|
data records. Each line consists of tab-separated fields, where each field is |
|
1458
|
|
|
|
|
|
|
a C pair. A single LTSV record occupies exactly one line. |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
B |
|
1461
|
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
time:2026-02-13T10:00:00 host:192.0.2.1 status:200 url:/index.html bytes:1024 |
|
1463
|
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
=head3 LTSV Characteristics |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
=over 4 |
|
1467
|
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
=item * B |
|
1469
|
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
A complete record is always a single newline-terminated line. This makes |
|
1471
|
|
|
|
|
|
|
streaming processing trivial: read a line, parse it, process it, discard it. |
|
1472
|
|
|
|
|
|
|
There is no multi-line quoting problem, no block parser required. |
|
1473
|
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
=item * B |
|
1475
|
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
Fields are separated by a single horizontal tab character (C<0x09>). |
|
1477
|
|
|
|
|
|
|
The tab is a C0 control character in the ASCII range (C<0x00>-C<0x7F>), |
|
1478
|
|
|
|
|
|
|
which has an important consequence for multibyte character encodings. |
|
1479
|
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
=item * B |
|
1481
|
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
Within each field, the label and value are separated by a single colon |
|
1483
|
|
|
|
|
|
|
(C<0x3A>, US-ASCII C<:>). This is also a plain ASCII character with the same |
|
1484
|
|
|
|
|
|
|
multibyte-safety guarantees as the tab. |
|
1485
|
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
=back |
|
1487
|
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
=head3 LTSV Advantages |
|
1489
|
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
=over 4 |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
=item * B |
|
1493
|
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
This is perhaps the most important technical advantage of LTSV over formats |
|
1495
|
|
|
|
|
|
|
such as CSV (comma-delimited) or TSV without labels. |
|
1496
|
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
In many multibyte character encodings used across Asia and beyond, a |
|
1498
|
|
|
|
|
|
|
single logical character is represented by a sequence of two or more bytes. |
|
1499
|
|
|
|
|
|
|
The danger in older encodings is that a byte within a multibyte sequence can |
|
1500
|
|
|
|
|
|
|
coincidentally equal the byte value of an ASCII delimiter, causing a naive |
|
1501
|
|
|
|
|
|
|
byte-level parser to split the field in the wrong place. |
|
1502
|
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
The following table shows well-known encodings and their byte ranges: |
|
1504
|
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
Encoding First byte range Following byte range |
|
1506
|
|
|
|
|
|
|
---------- -------------------- ------------------------------- |
|
1507
|
|
|
|
|
|
|
Big5 0x81-0xFE 0x40-0x7E, 0xA1-0xFE |
|
1508
|
|
|
|
|
|
|
Big5-HKSCS 0x81-0xFE 0x40-0x7E, 0xA1-0xFE |
|
1509
|
|
|
|
|
|
|
CP932X 0x81-0x9F, 0xE0-0xFC 0x40-0x7E, 0x80-0xFC |
|
1510
|
|
|
|
|
|
|
EUC-JP 0x8E-0x8F, 0xA1-0xFE 0xA1-0xFE |
|
1511
|
|
|
|
|
|
|
GB 18030 0x81-0xFE 0x30-0x39, 0x40-0x7E, 0x80-0xFE |
|
1512
|
|
|
|
|
|
|
GBK 0x81-0xFE 0x40-0x7E, 0x80-0xFE |
|
1513
|
|
|
|
|
|
|
Shift_JIS 0x81-0x9F, 0xE0-0xFC 0x40-0x7E, 0x80-0xFC |
|
1514
|
|
|
|
|
|
|
RFC 2279 0xC2-0xF4 0x80-0xBF |
|
1515
|
|
|
|
|
|
|
UHC 0x81-0xFE 0x41-0x5A, 0x61-0x7A, 0x81-0xFE |
|
1516
|
|
|
|
|
|
|
UTF-8 0xC2-0xF4 0x80-0xBF |
|
1517
|
|
|
|
|
|
|
WTF-8 0xC2-0xF4 0x80-0xBF |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
The tab character is C<0x09>. The colon is C<0x3A>. Both values are |
|
1520
|
|
|
|
|
|
|
strictly below C<0x40>, the lower bound of any following byte in the encodings |
|
1521
|
|
|
|
|
|
|
listed above. Neither C<0x09> nor C<0x3A> appears anywhere as a first byte |
|
1522
|
|
|
|
|
|
|
either. Therefore: |
|
1523
|
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
TAB (0x09) never appears as a byte within any multibyte character |
|
1525
|
|
|
|
|
|
|
in Big5, Big5-HKSCS, CP932X, EUC-JP, GB 18030, GBK, Shift_JIS, |
|
1526
|
|
|
|
|
|
|
RFC 2279, UHC, UTF-8, or WTF-8. |
|
1527
|
|
|
|
|
|
|
':' (0x3A) never appears as a byte within any multibyte character |
|
1528
|
|
|
|
|
|
|
in the same set of encodings. |
|
1529
|
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
This means that LTSV files containing values in B of those encodings |
|
1531
|
|
|
|
|
|
|
can be parsed correctly by a B on tab and colon, |
|
1532
|
|
|
|
|
|
|
with no knowledge of the encoding whatsoever. There is no need to decode |
|
1533
|
|
|
|
|
|
|
the text before parsing, and no risk of a misidentified delimiter. |
|
1534
|
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
By contrast, CSV has encoding problems of a different kind. |
|
1536
|
|
|
|
|
|
|
The comma (C<0x2C>) and the double-quote (C<0x22>) do B appear as |
|
1537
|
|
|
|
|
|
|
following bytes in Shift_JIS or Big5, so they are not directly confused with |
|
1538
|
|
|
|
|
|
|
multibyte character content. However, the backslash (C<0x5C>) B |
|
1539
|
|
|
|
|
|
|
appear as a valid following byte in both Shift_JIS (following byte range |
|
1540
|
|
|
|
|
|
|
C<0x40>-C<0x7E> includes C<0x5C>) and Big5 (same range). Many CSV |
|
1541
|
|
|
|
|
|
|
parsers and the C runtime on Windows use backslash or backslash-like |
|
1542
|
|
|
|
|
|
|
sequences for escaping, so a naive byte-level search for the escape |
|
1543
|
|
|
|
|
|
|
character can be misled by a multibyte character whose second byte is |
|
1544
|
|
|
|
|
|
|
C<0x5C>. Beyond this, CSV's quoting rules are underspecified (RFC 4180 |
|
1545
|
|
|
|
|
|
|
vs. Excel vs. custom dialects differ), which makes writing a correct, |
|
1546
|
|
|
|
|
|
|
encoding-aware CSV parser considerably harder than parsing LTSV. |
|
1547
|
|
|
|
|
|
|
LTSV sidesteps all of these issues by choosing delimiters (tab and colon) |
|
1548
|
|
|
|
|
|
|
that fall below C<0x40>, outside every following-byte range of every traditional |
|
1549
|
|
|
|
|
|
|
multibyte encoding. |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
UTF-8 is safe for all ASCII delimiters because continuation bytes are |
|
1552
|
|
|
|
|
|
|
always in the range C<0x80>-C<0xBF>, never overlapping ASCII. But LTSV's |
|
1553
|
|
|
|
|
|
|
choice of tab and colon also makes it safe for the traditional multibyte |
|
1554
|
|
|
|
|
|
|
encodings that predate Unicode, which is critical for systems that still |
|
1555
|
|
|
|
|
|
|
operate on traditional-encoded data. |
|
1556
|
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
=item * B |
|
1558
|
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
Every field carries its own label. A record is human-readable without a |
|
1560
|
|
|
|
|
|
|
separate schema or header line. Fields can appear in any order, and |
|
1561
|
|
|
|
|
|
|
optional fields can simply be omitted. Adding a new field to some records |
|
1562
|
|
|
|
|
|
|
does not break parsers that do not know about it. |
|
1563
|
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
=item * B |
|
1565
|
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
Because each record is one line, LTSV files can be processed with line-by-line |
|
1567
|
|
|
|
|
|
|
streaming. Memory usage is proportional to the longest single record, not |
|
1568
|
|
|
|
|
|
|
the total file size. This is why C in this module uses a lazy |
|
1569
|
|
|
|
|
|
|
iterator rather than loading the whole file. |
|
1570
|
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
=item * B |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
Standard Unix text tools (C, C, C, C, C) work |
|
1574
|
|
|
|
|
|
|
naturally on LTSV files. A field can be located with a pattern like |
|
1575
|
|
|
|
|
|
|
C without any special parser. This makes ad-hoc |
|
1576
|
|
|
|
|
|
|
analysis and shell scripting straightforward. |
|
1577
|
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
=item * B |
|
1579
|
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
CSV requires quoting fields that contain commas or newlines, and the quoting |
|
1581
|
|
|
|
|
|
|
rules differ between implementations (RFC 4180 vs. Microsoft Excel vs. others). |
|
1582
|
|
|
|
|
|
|
LTSV has no quoting: the tab delimiter and the colon separator do not appear |
|
1583
|
|
|
|
|
|
|
inside values in any of the supported encodings (by the multibyte-safety |
|
1584
|
|
|
|
|
|
|
argument above), so no escaping mechanism is needed. |
|
1585
|
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
=item * B |
|
1587
|
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
LTSV originated in the Japanese web industry as a structured log format for |
|
1589
|
|
|
|
|
|
|
HTTP access logs. Many web servers (Apache, Nginx) and log aggregation tools |
|
1590
|
|
|
|
|
|
|
support LTSV output or parsing. The format is particularly popular for |
|
1591
|
|
|
|
|
|
|
application and infrastructure logging where grep-ability and streaming |
|
1592
|
|
|
|
|
|
|
analysis matter. |
|
1593
|
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
=back |
|
1595
|
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
For the formal LTSV specification, see L. |
|
1597
|
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
=head2 What is LINQ? |
|
1599
|
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
LINQ (Language Integrated Query) is a set of query capabilities introduced |
|
1601
|
|
|
|
|
|
|
in the .NET Framework 3.5 (C# 3.0, 2007) by Microsoft. It defines a |
|
1602
|
|
|
|
|
|
|
unified model for querying and transforming data from diverse sources -- |
|
1603
|
|
|
|
|
|
|
in-memory collections, relational databases (LINQ to SQL), XML documents |
|
1604
|
|
|
|
|
|
|
(LINQ to XML), and more -- using a single, consistent API. |
|
1605
|
|
|
|
|
|
|
|
|
1606
|
|
|
|
|
|
|
This module brings LINQ-style querying to Perl, applied specifically to |
|
1607
|
|
|
|
|
|
|
LTSV data sources. |
|
1608
|
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
=head3 LINQ Characteristics |
|
1610
|
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
=over 4 |
|
1612
|
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
=item * B |
|
1614
|
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
LINQ provides a single set of operators that works uniformly across |
|
1616
|
|
|
|
|
|
|
data sources. Whether the source is an array, a file, or a database, |
|
1617
|
|
|
|
|
|
|
the same C, C |
|
1618
|
|
|
|
|
|
|
LTSV::LINQ follows this principle: the same methods work on in-memory |
|
1619
|
|
|
|
|
|
|
arrays (C) and LTSV files (C) alike. |
|
1620
|
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
=item * B |
|
1622
|
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
LINQ queries express I to retrieve, not I to retrieve it. |
|
1624
|
|
|
|
|
|
|
A query like C<-EWhere(sub { $_[0]{status} >= 400 })-ESelect(...)> |
|
1625
|
|
|
|
|
|
|
describes the intent clearly, without explicit loop management. |
|
1626
|
|
|
|
|
|
|
This reduces cognitive overhead and makes queries easier to read and verify. |
|
1627
|
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
=item * B |
|
1629
|
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
Each LINQ operator takes a sequence and returns a new sequence (or a |
|
1631
|
|
|
|
|
|
|
scalar result for terminal operators). Because operators are ordinary |
|
1632
|
|
|
|
|
|
|
method calls that return objects, they compose naturally: |
|
1633
|
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
$query->Where(...)->Select(...)->OrderBy(...)->GroupBy(...)->ToArray() |
|
1635
|
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
Any intermediate result is itself a valid query object, ready for |
|
1637
|
|
|
|
|
|
|
further transformation or immediate consumption. |
|
1638
|
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
=item * B |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
Intermediate operators (C, C |
|
1642
|
|
|
|
|
|
|
execute immediately. They construct a chain of iterator closures. |
|
1643
|
|
|
|
|
|
|
Evaluation is deferred until a terminal operator (C, C, |
|
1644
|
|
|
|
|
|
|
C, C, C, etc.) pulls items through the chain. |
|
1645
|
|
|
|
|
|
|
This means: |
|
1646
|
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
=over 4 |
|
1648
|
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
=item - Memory usage is bounded by the window of data in flight, not by the |
|
1650
|
|
|
|
|
|
|
total data size. A CSelect-ETake(10)> over a million-line |
|
1651
|
|
|
|
|
|
|
file reads at most 10 records past the first matching one. |
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
=item - Short-circuiting is free. C stops at the first match. |
|
1654
|
|
|
|
|
|
|
C stops as soon as one match is found. |
|
1655
|
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
=item - Pipelines can be built without executing them, and executed |
|
1657
|
|
|
|
|
|
|
multiple times by wrapping in a factory (see C<_from_snapshot>). |
|
1658
|
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
=back |
|
1660
|
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
=item * B |
|
1662
|
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
LINQ's design makes chaining natural. In C# this is supported by |
|
1664
|
|
|
|
|
|
|
extension methods; in Perl it is supported by returning C<$self>-class |
|
1665
|
|
|
|
|
|
|
objects from every intermediate operator. The result is readable, |
|
1666
|
|
|
|
|
|
|
left-to-right query expressions. |
|
1667
|
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
=item * B |
|
1669
|
|
|
|
|
|
|
|
|
1670
|
|
|
|
|
|
|
A LINQ query object is a description of a computation, not its result. |
|
1671
|
|
|
|
|
|
|
You can pass query objects around, inspect them, extend them, and decide |
|
1672
|
|
|
|
|
|
|
later when to execute them. This separation is valuable in library and |
|
1673
|
|
|
|
|
|
|
framework code. |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
=back |
|
1676
|
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
=head3 LINQ Advantages for LTSV Processing |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
=over 4 |
|
1680
|
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
=item * B |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
LTSV log analysis often involves the same logical steps: filter records |
|
1684
|
|
|
|
|
|
|
by a condition, extract a field, aggregate. LINQ methods map directly |
|
1685
|
|
|
|
|
|
|
onto these steps, making the code read like a description of the analysis. |
|
1686
|
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
=item * B |
|
1688
|
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
Web server access logs can be gigabytes in size. LTSV::LINQ's lazy |
|
1690
|
|
|
|
|
|
|
C iterator reads one line at a time. Combined with C |
|
1691
|
|
|
|
|
|
|
and C, only the needed records are ever in memory simultaneously. |
|
1692
|
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
=item * B |
|
1694
|
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
Unlike C# LINQ (which has query comprehension syntax C
|
|
1696
|
|
|
|
|
|
|
select ...>), LTSV::LINQ works with ordinary Perl method calls and |
|
1697
|
|
|
|
|
|
|
anonymous subroutines. There is no source filter, no parser extension, |
|
1698
|
|
|
|
|
|
|
and no dependency on modern Perl features. The same code runs on Perl |
|
1699
|
|
|
|
|
|
|
5.005_03 and Perl 5.40. |
|
1700
|
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
=item * B |
|
1702
|
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
A C clause stored in a variable can be applied to multiple |
|
1704
|
|
|
|
|
|
|
data sources. Query logic can be parameterized and reused across scripts. |
|
1705
|
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
=back |
|
1707
|
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
For the original LINQ documentation, see |
|
1709
|
|
|
|
|
|
|
L. |
|
1710
|
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
=head1 METHODS |
|
1712
|
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
=head2 Complete Method Reference |
|
1714
|
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
This module implements 60 LINQ-style methods organized into 15 categories: |
|
1716
|
|
|
|
|
|
|
|
|
1717
|
|
|
|
|
|
|
=over 4 |
|
1718
|
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
=item * B: From, FromLTSV, Range, Empty, Repeat |
|
1720
|
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
=item * B: Where (with DSL) |
|
1722
|
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
=item * B: Select, SelectMany |
|
1724
|
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
=item * B: Concat, Zip |
|
1726
|
|
|
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
=item * B: Take, Skip, TakeWhile, SkipWhile |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
=item * B: OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending, OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending, ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
=item * B: GroupBy |
|
1732
|
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=item * B: Distinct, Union, Intersect, Except |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
=item * B: Join, GroupJoin |
|
1736
|
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
=item * B: All, Any, Contains |
|
1738
|
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
=item * B: SequenceEqual |
|
1740
|
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
=item * B: First, FirstOrDefault, Last, LastOrDefault, Single, SingleOrDefault, ElementAt, ElementAtOrDefault |
|
1742
|
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
=item * B: Count, Sum, Min, Max, Average, AverageOrDefault, Aggregate |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
=item * B: ToArray, ToList, ToDictionary, ToLookup, ToLTSV, DefaultIfEmpty |
|
1746
|
|
|
|
|
|
|
|
|
1747
|
|
|
|
|
|
|
=item * B: ForEach |
|
1748
|
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
=back |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
B |
|
1752
|
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
Method Category Lazy? Returns |
|
1754
|
|
|
|
|
|
|
===================== ============== ===== ================ |
|
1755
|
|
|
|
|
|
|
From Data Source Yes Query |
|
1756
|
|
|
|
|
|
|
FromLTSV Data Source Yes Query |
|
1757
|
|
|
|
|
|
|
Range Data Source Yes Query |
|
1758
|
|
|
|
|
|
|
Empty Data Source Yes Query |
|
1759
|
|
|
|
|
|
|
Repeat Data Source Yes Query |
|
1760
|
|
|
|
|
|
|
Where Filtering Yes Query |
|
1761
|
|
|
|
|
|
|
Select Projection Yes Query |
|
1762
|
|
|
|
|
|
|
SelectMany Projection Yes Query |
|
1763
|
|
|
|
|
|
|
Concat Concatenation Yes Query |
|
1764
|
|
|
|
|
|
|
Zip Concatenation Yes Query |
|
1765
|
|
|
|
|
|
|
Take Partitioning Yes Query |
|
1766
|
|
|
|
|
|
|
Skip Partitioning Yes Query |
|
1767
|
|
|
|
|
|
|
TakeWhile Partitioning Yes Query |
|
1768
|
|
|
|
|
|
|
SkipWhile Partitioning Yes Query |
|
1769
|
|
|
|
|
|
|
OrderBy Ordering No* OrderedQuery |
|
1770
|
|
|
|
|
|
|
OrderByDescending Ordering No* OrderedQuery |
|
1771
|
|
|
|
|
|
|
OrderByStr Ordering No* OrderedQuery |
|
1772
|
|
|
|
|
|
|
OrderByStrDescending Ordering No* OrderedQuery |
|
1773
|
|
|
|
|
|
|
OrderByNum Ordering No* OrderedQuery |
|
1774
|
|
|
|
|
|
|
OrderByNumDescending Ordering No* OrderedQuery |
|
1775
|
|
|
|
|
|
|
Reverse Ordering No* Query |
|
1776
|
|
|
|
|
|
|
ThenBy Ordering No* OrderedQuery |
|
1777
|
|
|
|
|
|
|
ThenByDescending Ordering No* OrderedQuery |
|
1778
|
|
|
|
|
|
|
ThenByStr Ordering No* OrderedQuery |
|
1779
|
|
|
|
|
|
|
ThenByStrDescending Ordering No* OrderedQuery |
|
1780
|
|
|
|
|
|
|
ThenByNum Ordering No* OrderedQuery |
|
1781
|
|
|
|
|
|
|
ThenByNumDescending Ordering No* OrderedQuery |
|
1782
|
|
|
|
|
|
|
GroupBy Grouping No* Query |
|
1783
|
|
|
|
|
|
|
Distinct Set Operation Yes Query |
|
1784
|
|
|
|
|
|
|
Union Set Operation No* Query |
|
1785
|
|
|
|
|
|
|
Intersect Set Operation No* Query |
|
1786
|
|
|
|
|
|
|
Except Set Operation No* Query |
|
1787
|
|
|
|
|
|
|
Join Join No* Query |
|
1788
|
|
|
|
|
|
|
GroupJoin Join No* Query |
|
1789
|
|
|
|
|
|
|
All Quantifier No Boolean |
|
1790
|
|
|
|
|
|
|
Any Quantifier No Boolean |
|
1791
|
|
|
|
|
|
|
Contains Quantifier No Boolean |
|
1792
|
|
|
|
|
|
|
SequenceEqual Comparison No Boolean |
|
1793
|
|
|
|
|
|
|
First Element Access No Element |
|
1794
|
|
|
|
|
|
|
FirstOrDefault Element Access No Element |
|
1795
|
|
|
|
|
|
|
Last Element Access No* Element |
|
1796
|
|
|
|
|
|
|
LastOrDefault Element Access No* Element or undef |
|
1797
|
|
|
|
|
|
|
Single Element Access No* Element |
|
1798
|
|
|
|
|
|
|
SingleOrDefault Element Access No* Element or undef |
|
1799
|
|
|
|
|
|
|
ElementAt Element Access No* Element |
|
1800
|
|
|
|
|
|
|
ElementAtOrDefault Element Access No* Element or undef |
|
1801
|
|
|
|
|
|
|
Count Aggregation No Integer |
|
1802
|
|
|
|
|
|
|
Sum Aggregation No Number |
|
1803
|
|
|
|
|
|
|
Min Aggregation No Number |
|
1804
|
|
|
|
|
|
|
Max Aggregation No Number |
|
1805
|
|
|
|
|
|
|
Average Aggregation No Number |
|
1806
|
|
|
|
|
|
|
AverageOrDefault Aggregation No Number or undef |
|
1807
|
|
|
|
|
|
|
Aggregate Aggregation No Any |
|
1808
|
|
|
|
|
|
|
DefaultIfEmpty Conversion Yes Query |
|
1809
|
|
|
|
|
|
|
ToArray Conversion No Array |
|
1810
|
|
|
|
|
|
|
ToList Conversion No ArrayRef |
|
1811
|
|
|
|
|
|
|
ToDictionary Conversion No HashRef |
|
1812
|
|
|
|
|
|
|
ToLookup Conversion No HashRef |
|
1813
|
|
|
|
|
|
|
ToLTSV Conversion No Boolean |
|
1814
|
|
|
|
|
|
|
ForEach Utility No Void |
|
1815
|
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
* Materializing operation (loads all data into memory) |
|
1817
|
|
|
|
|
|
|
OrderedQuery = LTSV::LINQ::Ordered (subclass of LTSV::LINQ; |
|
1818
|
|
|
|
|
|
|
all LTSV::LINQ methods available plus ThenBy* methods) |
|
1819
|
|
|
|
|
|
|
|
|
1820
|
|
|
|
|
|
|
=head2 Data Source Methods |
|
1821
|
|
|
|
|
|
|
|
|
1822
|
|
|
|
|
|
|
=over 4 |
|
1823
|
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
=item B |
|
1825
|
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
Create a query from an array. |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
my $query = LTSV::LINQ->From([{name => 'Alice'}, {name => 'Bob'}]); |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
=item B |
|
1831
|
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
Create a query from an LTSV file. |
|
1833
|
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
my $query = LTSV::LINQ->FromLTSV("access.log"); |
|
1835
|
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
B C opens the file immediately and |
|
1837
|
|
|
|
|
|
|
holds the file handle open until the iterator reaches end-of-file. |
|
1838
|
|
|
|
|
|
|
If the query is not fully consumed (e.g. you call C or C |
|
1839
|
|
|
|
|
|
|
and stop early), the file handle remains open until the query object |
|
1840
|
|
|
|
|
|
|
is garbage collected. |
|
1841
|
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
This is harmless for a small number of files, but if you open many |
|
1843
|
|
|
|
|
|
|
LTSV files concurrently without consuming them fully, you may exhaust |
|
1844
|
|
|
|
|
|
|
the OS file descriptor limit. In such cases, consume the query fully |
|
1845
|
|
|
|
|
|
|
or use C to materialise the data and close the file |
|
1846
|
|
|
|
|
|
|
immediately: |
|
1847
|
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
# File closed as soon as all records are loaded |
|
1849
|
|
|
|
|
|
|
my @records = LTSV::LINQ->FromLTSV("access.log")->ToArray(); |
|
1850
|
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
=item B |
|
1852
|
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
Generate a sequence of integers. |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
my $query = LTSV::LINQ->Range(1, 10); # 1, 2, ..., 10 |
|
1856
|
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
=item B |
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
Create an empty sequence. |
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
B Empty LTSV::LINQ query |
|
1862
|
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
B |
|
1864
|
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
my $empty = LTSV::LINQ->Empty(); |
|
1866
|
|
|
|
|
|
|
$empty->Count(); # 0 |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
# Conditional empty sequence |
|
1869
|
|
|
|
|
|
|
my $result = $condition ? $query : LTSV::LINQ->Empty(); |
|
1870
|
|
|
|
|
|
|
|
|
1871
|
|
|
|
|
|
|
B Equivalent to C but more explicit. |
|
1872
|
|
|
|
|
|
|
|
|
1873
|
|
|
|
|
|
|
=item B |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
Repeat the same element a specified number of times. |
|
1876
|
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
B |
|
1878
|
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
=over 4 |
|
1880
|
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
=item * C<$element> - Element to repeat |
|
1882
|
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
=item * C<$count> - Number of times to repeat |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
=back |
|
1886
|
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
B LTSV::LINQ query with repeated elements |
|
1888
|
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
B |
|
1890
|
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
# Repeat scalar |
|
1892
|
|
|
|
|
|
|
LTSV::LINQ->Repeat('x', 5)->ToArray(); # ('x', 'x', 'x', 'x', 'x') |
|
1893
|
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
# Repeat reference (same reference repeated) |
|
1895
|
|
|
|
|
|
|
my $item = {id => 1}; |
|
1896
|
|
|
|
|
|
|
LTSV::LINQ->Repeat($item, 3)->ToArray(); # ($item, $item, $item) |
|
1897
|
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
# Generate default values |
|
1899
|
|
|
|
|
|
|
LTSV::LINQ->Repeat(0, 10)->ToArray(); # (0, 0, 0, ..., 0) |
|
1900
|
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
B The element reference is repeated, not cloned. |
|
1902
|
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
=back |
|
1904
|
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
=head2 Filtering Methods |
|
1906
|
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
=over 4 |
|
1908
|
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
=item B |
|
1910
|
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
=item B value, ...)> |
|
1912
|
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
Filter elements. Accepts either a code reference or DSL form. |
|
1914
|
|
|
|
|
|
|
|
|
1915
|
|
|
|
|
|
|
B |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} == 200 }) |
|
1918
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} >= 400 && $_[0]{bytes} > 1000 }) |
|
1919
|
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
The code reference receives each element as C<$_[0]> and should return |
|
1921
|
|
|
|
|
|
|
true to include the element, false to exclude it. |
|
1922
|
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
B |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
The DSL (Domain Specific Language) form provides a concise syntax for |
|
1926
|
|
|
|
|
|
|
simple equality comparisons. All conditions are combined with AND logic. |
|
1927
|
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
# Single condition |
|
1929
|
|
|
|
|
|
|
->Where(status => '200') |
|
1930
|
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
# Multiple conditions (AND) |
|
1932
|
|
|
|
|
|
|
->Where(status => '200', method => 'GET') |
|
1933
|
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
# Equivalent to: |
|
1935
|
|
|
|
|
|
|
->Where(sub { |
|
1936
|
|
|
|
|
|
|
$_[0]{status} eq '200' && $_[0]{method} eq 'GET' |
|
1937
|
|
|
|
|
|
|
}) |
|
1938
|
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
B |
|
1940
|
|
|
|
|
|
|
|
|
1941
|
|
|
|
|
|
|
=over 4 |
|
1942
|
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
=item * Arguments must be an even number of C value> pairs |
|
1944
|
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
The DSL form interprets its arguments as a flat list of key-value pairs. |
|
1946
|
|
|
|
|
|
|
Passing an odd number of arguments produces a Perl warning |
|
1947
|
|
|
|
|
|
|
(C) and the unpaired key |
|
1948
|
|
|
|
|
|
|
receives C as its value, which will never match. Always use |
|
1949
|
|
|
|
|
|
|
complete pairs: |
|
1950
|
|
|
|
|
|
|
|
|
1951
|
|
|
|
|
|
|
->Where(status => '200') # correct: 1 pair |
|
1952
|
|
|
|
|
|
|
->Where(status => '200', method => 'GET') # correct: 2 pairs |
|
1953
|
|
|
|
|
|
|
->Where(status => '200', 'method') # wrong: 3 args, Perl warning |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
=item * All comparisons are string equality (C) |
|
1956
|
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
=item * All conditions are combined with AND |
|
1958
|
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
=item * Undefined values are treated as failures |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
=item * For numeric or OR logic, use code reference form |
|
1962
|
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
=back |
|
1964
|
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
B |
|
1966
|
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
# DSL: Simple and readable |
|
1968
|
|
|
|
|
|
|
->Where(status => '200') |
|
1969
|
|
|
|
|
|
|
->Where(user => 'alice', role => 'admin') |
|
1970
|
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
# Code ref: Complex logic |
|
1972
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} >= 400 && $_[0]{status} < 500 }) |
|
1973
|
|
|
|
|
|
|
->Where(sub { $_[0]{user} eq 'alice' || $_[0]{user} eq 'bob' }) |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
=back |
|
1976
|
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
=head2 Projection Methods |
|
1978
|
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
=over 4 |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
=item B |
|
1982
|
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
Transform each element using the provided selector function. |
|
1984
|
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
The selector receives each element as C<$_[0]> and should return |
|
1986
|
|
|
|
|
|
|
the transformed value. |
|
1987
|
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
B |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
=over 4 |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
=item * C<$selector> - Code reference that transforms each element |
|
1993
|
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=back |
|
1995
|
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
B New query with transformed elements (lazy) |
|
1997
|
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
B |
|
1999
|
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
# Extract single field |
|
2001
|
|
|
|
|
|
|
->Select(sub { $_[0]{url} }) |
|
2002
|
|
|
|
|
|
|
|
|
2003
|
|
|
|
|
|
|
# Transform to new structure |
|
2004
|
|
|
|
|
|
|
->Select(sub { |
|
2005
|
|
|
|
|
|
|
{ |
|
2006
|
|
|
|
|
|
|
path => $_[0]{url}, |
|
2007
|
|
|
|
|
|
|
code => $_[0]{status} |
|
2008
|
|
|
|
|
|
|
} |
|
2009
|
|
|
|
|
|
|
}) |
|
2010
|
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
# Calculate derived values |
|
2012
|
|
|
|
|
|
|
->Select(sub { $_[0]{bytes} * 8 }) # bytes to bits |
|
2013
|
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
B Select preserves one-to-one mapping. For one-to-many, use |
|
2015
|
|
|
|
|
|
|
SelectMany. |
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
=item B |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
Flatten nested sequences into a single sequence. |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
The selector should return an array reference. All arrays are flattened |
|
2022
|
|
|
|
|
|
|
into a single sequence. |
|
2023
|
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
B |
|
2025
|
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
=over 4 |
|
2027
|
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
=item * C<$selector> - Code reference returning array reference |
|
2029
|
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
=back |
|
2031
|
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
B New query with flattened elements (lazy) |
|
2033
|
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
B |
|
2035
|
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
# Flatten array of arrays |
|
2037
|
|
|
|
|
|
|
my @nested = ([1, 2], [3, 4], [5]); |
|
2038
|
|
|
|
|
|
|
LTSV::LINQ->From(\@nested) |
|
2039
|
|
|
|
|
|
|
->SelectMany(sub { $_[0] }) |
|
2040
|
|
|
|
|
|
|
->ToArray(); # (1, 2, 3, 4, 5) |
|
2041
|
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
# Expand related records |
|
2043
|
|
|
|
|
|
|
->SelectMany(sub { |
|
2044
|
|
|
|
|
|
|
my $user = shift; |
|
2045
|
|
|
|
|
|
|
return [ map { |
|
2046
|
|
|
|
|
|
|
{ user => $user->{name}, role => $_ } |
|
2047
|
|
|
|
|
|
|
} @{$user->{roles}} ]; |
|
2048
|
|
|
|
|
|
|
}) |
|
2049
|
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
B |
|
2051
|
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
=over 4 |
|
2053
|
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
=item * Flattening nested arrays |
|
2055
|
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
=item * Expanding one-to-many relationships |
|
2057
|
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
=item * Generating multiple outputs per input |
|
2059
|
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
=back |
|
2061
|
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
B The selector B return an ARRAY reference. If it returns |
|
2063
|
|
|
|
|
|
|
any other value (e.g. a hashref or scalar), this method throws an exception: |
|
2064
|
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
die "SelectMany: selector must return an ARRAY reference" |
|
2066
|
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
This matches the behaviour of .NET LINQ's C, which requires |
|
2068
|
|
|
|
|
|
|
the selector to return an C. Always wrap results in C<[...]>: |
|
2069
|
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
->SelectMany(sub { [ $_[0]{items} ] }) # correct: arrayref |
|
2071
|
|
|
|
|
|
|
->SelectMany(sub { $_[0]{items} }) # wrong: dies at runtime |
|
2072
|
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
=back |
|
2074
|
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
=head2 Concatenation Methods |
|
2076
|
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
=over 4 |
|
2078
|
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
=item B |
|
2080
|
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
Concatenate two sequences into one. |
|
2082
|
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
B |
|
2084
|
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
=over 4 |
|
2086
|
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
=item * C<$second> - Second sequence (LTSV::LINQ object) |
|
2088
|
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
=back |
|
2090
|
|
|
|
|
|
|
|
|
2091
|
|
|
|
|
|
|
B New query with both sequences concatenated (lazy) |
|
2092
|
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
B |
|
2094
|
|
|
|
|
|
|
|
|
2095
|
|
|
|
|
|
|
# Combine two data sources |
|
2096
|
|
|
|
|
|
|
my $q1 = LTSV::LINQ->From([1, 2, 3]); |
|
2097
|
|
|
|
|
|
|
my $q2 = LTSV::LINQ->From([4, 5, 6]); |
|
2098
|
|
|
|
|
|
|
$q1->Concat($q2)->ToArray(); # (1, 2, 3, 4, 5, 6) |
|
2099
|
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
# Merge LTSV files |
|
2101
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("jan.log") |
|
2102
|
|
|
|
|
|
|
->Concat(LTSV::LINQ->FromLTSV("feb.log")) |
|
2103
|
|
|
|
|
|
|
->Where(status => '500') |
|
2104
|
|
|
|
|
|
|
|
|
2105
|
|
|
|
|
|
|
B This operation is lazy - sequences are read on-demand. |
|
2106
|
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
=item B |
|
2108
|
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
Combine two sequences element-wise using a result selector function. |
|
2110
|
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
B |
|
2112
|
|
|
|
|
|
|
|
|
2113
|
|
|
|
|
|
|
=over 4 |
|
2114
|
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
=item * C<$second> - Second sequence (LTSV::LINQ object) |
|
2116
|
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
=item * C<$result_selector> - Function to combine elements: ($first, $second) -> $result |
|
2118
|
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
=back |
|
2120
|
|
|
|
|
|
|
|
|
2121
|
|
|
|
|
|
|
B New query with combined elements (lazy) |
|
2122
|
|
|
|
|
|
|
|
|
2123
|
|
|
|
|
|
|
B |
|
2124
|
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
# Combine numbers |
|
2126
|
|
|
|
|
|
|
my $numbers = LTSV::LINQ->From([1, 2, 3]); |
|
2127
|
|
|
|
|
|
|
my $letters = LTSV::LINQ->From(['a', 'b', 'c']); |
|
2128
|
|
|
|
|
|
|
$numbers->Zip($letters, sub { |
|
2129
|
|
|
|
|
|
|
my($num, $letter) = @_; |
|
2130
|
|
|
|
|
|
|
return "$num-$letter"; |
|
2131
|
|
|
|
|
|
|
})->ToArray(); # ('1-a', '2-b', '3-c') |
|
2132
|
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
# Create key-value pairs |
|
2134
|
|
|
|
|
|
|
my $keys = LTSV::LINQ->From(['name', 'age', 'city']); |
|
2135
|
|
|
|
|
|
|
my $values = LTSV::LINQ->From(['Alice', 30, 'NYC']); |
|
2136
|
|
|
|
|
|
|
$keys->Zip($values, sub { |
|
2137
|
|
|
|
|
|
|
return {$_[0] => $_[1]}; |
|
2138
|
|
|
|
|
|
|
})->ToArray(); |
|
2139
|
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
# Stops at shorter sequence |
|
2141
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3, 4]) |
|
2142
|
|
|
|
|
|
|
->Zip(LTSV::LINQ->From(['a', 'b']), sub { [$_[0], $_[1]] }) |
|
2143
|
|
|
|
|
|
|
->ToArray(); # ([1, 'a'], [2, 'b']) |
|
2144
|
|
|
|
|
|
|
|
|
2145
|
|
|
|
|
|
|
B Iteration stops when either sequence ends. |
|
2146
|
|
|
|
|
|
|
|
|
2147
|
|
|
|
|
|
|
=back |
|
2148
|
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
=head2 Partitioning Methods |
|
2150
|
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
=over 4 |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
=item B |
|
2154
|
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
Take the first N elements from the sequence. |
|
2156
|
|
|
|
|
|
|
|
|
2157
|
|
|
|
|
|
|
B |
|
2158
|
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
=over 4 |
|
2160
|
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
=item * C<$count> - Number of elements to take (integer >= 0) |
|
2162
|
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
=back |
|
2164
|
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
B New query limited to first N elements (lazy) |
|
2166
|
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
B |
|
2168
|
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
# Top 10 results |
|
2170
|
|
|
|
|
|
|
->OrderByDescending(sub { $_[0]{score} }) |
|
2171
|
|
|
|
|
|
|
->Take(10) |
|
2172
|
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
# First record only |
|
2174
|
|
|
|
|
|
|
->Take(1)->ToArray() |
|
2175
|
|
|
|
|
|
|
|
|
2176
|
|
|
|
|
|
|
# Limit large file processing |
|
2177
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("huge.log")->Take(1000) |
|
2178
|
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
B Take(0) returns empty sequence. Negative values treated as 0. |
|
2180
|
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
=item B |
|
2182
|
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
Skip the first N elements, return the rest. |
|
2184
|
|
|
|
|
|
|
|
|
2185
|
|
|
|
|
|
|
B |
|
2186
|
|
|
|
|
|
|
|
|
2187
|
|
|
|
|
|
|
=over 4 |
|
2188
|
|
|
|
|
|
|
|
|
2189
|
|
|
|
|
|
|
=item * C<$count> - Number of elements to skip (integer >= 0) |
|
2190
|
|
|
|
|
|
|
|
|
2191
|
|
|
|
|
|
|
=back |
|
2192
|
|
|
|
|
|
|
|
|
2193
|
|
|
|
|
|
|
B New query skipping first N elements (lazy) |
|
2194
|
|
|
|
|
|
|
|
|
2195
|
|
|
|
|
|
|
B |
|
2196
|
|
|
|
|
|
|
|
|
2197
|
|
|
|
|
|
|
# Skip header row |
|
2198
|
|
|
|
|
|
|
->Skip(1) |
|
2199
|
|
|
|
|
|
|
|
|
2200
|
|
|
|
|
|
|
# Pagination: page 3, size 20 |
|
2201
|
|
|
|
|
|
|
->Skip(40)->Take(20) |
|
2202
|
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
# Skip first batch |
|
2204
|
|
|
|
|
|
|
->Skip(1000)->ForEach(sub { ... }) |
|
2205
|
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
B |
|
2207
|
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
=over 4 |
|
2209
|
|
|
|
|
|
|
|
|
2210
|
|
|
|
|
|
|
=item * Pagination |
|
2211
|
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
=item * Skipping header rows |
|
2213
|
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
=item * Processing in batches |
|
2215
|
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
=back |
|
2217
|
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
=item B |
|
2219
|
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
Take elements while the predicate is true. Stops at first false. |
|
2221
|
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
B |
|
2223
|
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
=over 4 |
|
2225
|
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
=item * C<$predicate> - Code reference returning boolean |
|
2227
|
|
|
|
|
|
|
|
|
2228
|
|
|
|
|
|
|
=back |
|
2229
|
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
B New query taking elements while predicate holds (lazy) |
|
2231
|
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
B |
|
2233
|
|
|
|
|
|
|
|
|
2234
|
|
|
|
|
|
|
# Take while value is small |
|
2235
|
|
|
|
|
|
|
->TakeWhile(sub { $_[0]{count} < 100 }) |
|
2236
|
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
# Take while timestamp is in range |
|
2238
|
|
|
|
|
|
|
->TakeWhile(sub { $_[0]{time} lt '2026-02-01' }) |
|
2239
|
|
|
|
|
|
|
|
|
2240
|
|
|
|
|
|
|
# Process until error |
|
2241
|
|
|
|
|
|
|
->TakeWhile(sub { $_[0]{status} < 400 }) |
|
2242
|
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
B TakeWhile stops immediately when predicate returns false. |
|
2244
|
|
|
|
|
|
|
It does NOT filter - it terminates the sequence. |
|
2245
|
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
# Different from Where: |
|
2247
|
|
|
|
|
|
|
->TakeWhile(sub { $_[0] < 5 }) # 1,2,3,4 then STOP |
|
2248
|
|
|
|
|
|
|
->Where(sub { $_[0] < 5 }) # 1,2,3,4 (checks all) |
|
2249
|
|
|
|
|
|
|
|
|
2250
|
|
|
|
|
|
|
=item B |
|
2251
|
|
|
|
|
|
|
|
|
2252
|
|
|
|
|
|
|
Skip elements while the predicate is true. Returns rest after first false. |
|
2253
|
|
|
|
|
|
|
|
|
2254
|
|
|
|
|
|
|
B |
|
2255
|
|
|
|
|
|
|
|
|
2256
|
|
|
|
|
|
|
=over 4 |
|
2257
|
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
=item * C<$predicate> - Code reference returning boolean |
|
2259
|
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=back |
|
2261
|
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
B New query skipping initial elements (lazy) |
|
2263
|
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
B |
|
2265
|
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
# Skip header lines |
|
2267
|
|
|
|
|
|
|
->SkipWhile(sub { $_[0]{line} =~ /^#/ }) |
|
2268
|
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
# Skip while value is small |
|
2270
|
|
|
|
|
|
|
->SkipWhile(sub { $_[0]{count} < 100 }) |
|
2271
|
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
# Process after certain timestamp |
|
2273
|
|
|
|
|
|
|
->SkipWhile(sub { $_[0]{time} lt '2026-02-01' }) |
|
2274
|
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
B SkipWhile only skips initial elements. Once predicate is |
|
2276
|
|
|
|
|
|
|
false, all remaining elements are included. |
|
2277
|
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
[1,2,3,4,5,2,1]->SkipWhile(sub { $_[0] < 4 }) # (4,5,2,1) |
|
2279
|
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
=back |
|
2281
|
|
|
|
|
|
|
|
|
2282
|
|
|
|
|
|
|
=head2 Ordering Methods |
|
2283
|
|
|
|
|
|
|
|
|
2284
|
|
|
|
|
|
|
B C and C use a Schwartzian-Transform |
|
2285
|
|
|
|
|
|
|
decorated-array technique that appends the original element index as a |
|
2286
|
|
|
|
|
|
|
final tie-breaker. This guarantees completely stable multi-key sorting on |
|
2287
|
|
|
|
|
|
|
B, where built-in C stability |
|
2288
|
|
|
|
|
|
|
is not guaranteed. |
|
2289
|
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
B LTSV::LINQ provides three families: |
|
2291
|
|
|
|
|
|
|
|
|
2292
|
|
|
|
|
|
|
=over 4 |
|
2293
|
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
=item * C / C / C / C |
|
2295
|
|
|
|
|
|
|
|
|
2296
|
|
|
|
|
|
|
Smart comparison: numeric (C=E>) when both keys look numeric, |
|
2297
|
|
|
|
|
|
|
string (C) otherwise. Convenient for LTSV data where field values |
|
2298
|
|
|
|
|
|
|
are always strings but commonly hold numbers. |
|
2299
|
|
|
|
|
|
|
|
|
2300
|
|
|
|
|
|
|
=item * C / C / C / C |
|
2301
|
|
|
|
|
|
|
|
|
2302
|
|
|
|
|
|
|
Unconditional string comparison (C). Use when keys must sort |
|
2303
|
|
|
|
|
|
|
lexicographically regardless of content (e.g. version strings, codes). |
|
2304
|
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
=item * C / C / C / C |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
Unconditional numeric comparison (C=E>). Use when keys are |
|
2308
|
|
|
|
|
|
|
always numeric. Undefined or empty values are treated as C<0>. |
|
2309
|
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
=back |
|
2311
|
|
|
|
|
|
|
|
|
2312
|
|
|
|
|
|
|
B C methods return a C |
|
2313
|
|
|
|
|
|
|
object (a subclass of C). This mirrors the way .NET LINQ's |
|
2314
|
|
|
|
|
|
|
C returns C>, which exposes C and |
|
2315
|
|
|
|
|
|
|
C. All C methods (C, C |
|
2316
|
|
|
|
|
|
|
C, etc.) are available on the returned object through inheritance. |
|
2317
|
|
|
|
|
|
|
C methods are B available on C objects, |
|
2318
|
|
|
|
|
|
|
not on plain C objects. |
|
2319
|
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
B C always returns a B C |
|
2321
|
|
|
|
|
|
|
object; the original is unchanged. Branching sort chains work correctly: |
|
2322
|
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
my $by_dept = LTSV::LINQ->From(\@data)->OrderBy(sub { $_[0]{dept} }); |
|
2324
|
|
|
|
|
|
|
my $asc = $by_dept->ThenBy(sub { $_[0]{name} }); |
|
2325
|
|
|
|
|
|
|
my $desc = $by_dept->ThenByNum(sub { $_[0]{salary} }); |
|
2326
|
|
|
|
|
|
|
# $asc and $desc are completely independent queries |
|
2327
|
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
=over 4 |
|
2329
|
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
=item B |
|
2331
|
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
Sort in ascending order using smart comparison: if both keys look like |
|
2333
|
|
|
|
|
|
|
numbers (integers, decimals, negative, or exponential notation), numeric |
|
2334
|
|
|
|
|
|
|
comparison (C=E>) is used; otherwise string comparison (C) |
|
2335
|
|
|
|
|
|
|
is used. Returns a C object. |
|
2336
|
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
->OrderBy(sub { $_[0]{timestamp} }) # string keys: lexicographic |
|
2338
|
|
|
|
|
|
|
->OrderBy(sub { $_[0]{bytes} }) # "1024", "256" -> numeric (256, 1024) |
|
2339
|
|
|
|
|
|
|
|
|
2340
|
|
|
|
|
|
|
B When you need explicit control over the comparison type, use |
|
2341
|
|
|
|
|
|
|
C (always C) or C (always C=E>). |
|
2342
|
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
=item B |
|
2344
|
|
|
|
|
|
|
|
|
2345
|
|
|
|
|
|
|
Sort in descending order using the same smart comparison as C. |
|
2346
|
|
|
|
|
|
|
Returns a C object. |
|
2347
|
|
|
|
|
|
|
|
|
2348
|
|
|
|
|
|
|
->OrderByDescending(sub { $_[0]{count} }) |
|
2349
|
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
=item B |
|
2351
|
|
|
|
|
|
|
|
|
2352
|
|
|
|
|
|
|
Sort in ascending order using string comparison (C) unconditionally. |
|
2353
|
|
|
|
|
|
|
Returns a C object. |
|
2354
|
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
->OrderByStr(sub { $_[0]{code} }) # "10" lt "9" (lexicographic) |
|
2356
|
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
=item B |
|
2358
|
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
Sort in descending order using string comparison (C) unconditionally. |
|
2360
|
|
|
|
|
|
|
Returns a C object. |
|
2361
|
|
|
|
|
|
|
|
|
2362
|
|
|
|
|
|
|
->OrderByStrDescending(sub { $_[0]{name} }) |
|
2363
|
|
|
|
|
|
|
|
|
2364
|
|
|
|
|
|
|
=item B |
|
2365
|
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
Sort in ascending order using numeric comparison (C=E>) |
|
2367
|
|
|
|
|
|
|
unconditionally. Returns a C object. |
|
2368
|
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
->OrderByNum(sub { $_[0]{bytes} }) # 9 < 10 (numeric) |
|
2370
|
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
B Undefined or empty values are treated as C<0>. |
|
2372
|
|
|
|
|
|
|
|
|
2373
|
|
|
|
|
|
|
=item B |
|
2374
|
|
|
|
|
|
|
|
|
2375
|
|
|
|
|
|
|
Sort in descending order using numeric comparison (C=E>) |
|
2376
|
|
|
|
|
|
|
unconditionally. Returns a C object. |
|
2377
|
|
|
|
|
|
|
|
|
2378
|
|
|
|
|
|
|
->OrderByNumDescending(sub { $_[0]{response_time} }) |
|
2379
|
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
=item B |
|
2381
|
|
|
|
|
|
|
|
|
2382
|
|
|
|
|
|
|
Reverse the order. |
|
2383
|
|
|
|
|
|
|
|
|
2384
|
|
|
|
|
|
|
->Reverse() |
|
2385
|
|
|
|
|
|
|
|
|
2386
|
|
|
|
|
|
|
=item B |
|
2387
|
|
|
|
|
|
|
|
|
2388
|
|
|
|
|
|
|
Add an ascending secondary sort key using smart comparison. Must be |
|
2389
|
|
|
|
|
|
|
called on a C object (i.e., after C). |
|
2390
|
|
|
|
|
|
|
Returns a new C object; the original is unchanged. |
|
2391
|
|
|
|
|
|
|
|
|
2392
|
|
|
|
|
|
|
->OrderBy(sub { $_[0]{dept} })->ThenBy(sub { $_[0]{name} }) |
|
2393
|
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
=item B |
|
2395
|
|
|
|
|
|
|
|
|
2396
|
|
|
|
|
|
|
Add a descending secondary sort key using smart comparison. |
|
2397
|
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
->OrderBy(sub { $_[0]{dept} })->ThenByDescending(sub { $_[0]{salary} }) |
|
2399
|
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
=item B |
|
2401
|
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
Add an ascending secondary sort key using string comparison (C). |
|
2403
|
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
->OrderByStr(sub { $_[0]{dept} })->ThenByStr(sub { $_[0]{code} }) |
|
2405
|
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
=item B |
|
2407
|
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
Add a descending secondary sort key using string comparison (C). |
|
2409
|
|
|
|
|
|
|
|
|
2410
|
|
|
|
|
|
|
->OrderByStr(sub { $_[0]{dept} })->ThenByStrDescending(sub { $_[0]{name} }) |
|
2411
|
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
=item B |
|
2413
|
|
|
|
|
|
|
|
|
2414
|
|
|
|
|
|
|
Add an ascending secondary sort key using numeric comparison (C=E>). |
|
2415
|
|
|
|
|
|
|
|
|
2416
|
|
|
|
|
|
|
->OrderByStr(sub { $_[0]{dept} })->ThenByNum(sub { $_[0]{salary} }) |
|
2417
|
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
=item B |
|
2419
|
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
Add a descending secondary sort key using numeric comparison (C=E>). |
|
2421
|
|
|
|
|
|
|
Undefined or empty values are treated as C<0>. |
|
2422
|
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
->OrderByStr(sub { $_[0]{host} })->ThenByNumDescending(sub { $_[0]{bytes} }) |
|
2424
|
|
|
|
|
|
|
|
|
2425
|
|
|
|
|
|
|
=back |
|
2426
|
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
=head2 Grouping Methods |
|
2428
|
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
=over 4 |
|
2430
|
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
=item B |
|
2432
|
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
Group elements by key. |
|
2434
|
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
B New query where each element is a hashref with two fields: |
|
2436
|
|
|
|
|
|
|
|
|
2437
|
|
|
|
|
|
|
=over 4 |
|
2438
|
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
=item * C - The group key (string) |
|
2440
|
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
=item * C - Array reference of elements in the group |
|
2442
|
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
=back |
|
2444
|
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
B This operation is eager - the entire sequence is loaded into memory |
|
2446
|
|
|
|
|
|
|
immediately. Groups are returned in the order their keys first appear in |
|
2447
|
|
|
|
|
|
|
the source sequence, matching the behaviour of .NET LINQ's C. |
|
2448
|
|
|
|
|
|
|
|
|
2449
|
|
|
|
|
|
|
B |
|
2450
|
|
|
|
|
|
|
|
|
2451
|
|
|
|
|
|
|
# Group access log by status code |
|
2452
|
|
|
|
|
|
|
my @groups = LTSV::LINQ->FromLTSV('access.log') |
|
2453
|
|
|
|
|
|
|
->GroupBy(sub { $_[0]{status} }) |
|
2454
|
|
|
|
|
|
|
->ToArray(); |
|
2455
|
|
|
|
|
|
|
|
|
2456
|
|
|
|
|
|
|
for my $g (@groups) { |
|
2457
|
|
|
|
|
|
|
printf "status=%s count=%d\n", $g->{Key}, scalar @{$g->{Elements}}; |
|
2458
|
|
|
|
|
|
|
} |
|
2459
|
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
# With element selector |
|
2461
|
|
|
|
|
|
|
->GroupBy(sub { $_[0]{status} }, sub { $_[0]{path} }) |
|
2462
|
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
B C is a plain array reference, not a LTSV::LINQ object. |
|
2464
|
|
|
|
|
|
|
To apply further LINQ operations on a group, wrap it with C: |
|
2465
|
|
|
|
|
|
|
|
|
2466
|
|
|
|
|
|
|
for my $g (@groups) { |
|
2467
|
|
|
|
|
|
|
my $total = LTSV::LINQ->From($g->{Elements}) |
|
2468
|
|
|
|
|
|
|
->Sum(sub { $_[0]{bytes} }); |
|
2469
|
|
|
|
|
|
|
printf "status=%s total_bytes=%d\n", $g->{Key}, $total; |
|
2470
|
|
|
|
|
|
|
} |
|
2471
|
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
=back |
|
2473
|
|
|
|
|
|
|
|
|
2474
|
|
|
|
|
|
|
=head2 Set Operations |
|
2475
|
|
|
|
|
|
|
|
|
2476
|
|
|
|
|
|
|
B |
|
2477
|
|
|
|
|
|
|
|
|
2478
|
|
|
|
|
|
|
=over 4 |
|
2479
|
|
|
|
|
|
|
|
|
2480
|
|
|
|
|
|
|
=item * C is fully lazy: elements are tested one by one as the |
|
2481
|
|
|
|
|
|
|
output sequence is consumed. |
|
2482
|
|
|
|
|
|
|
|
|
2483
|
|
|
|
|
|
|
=item * C, C, C are B: when |
|
2484
|
|
|
|
|
|
|
the method is called, the B sequence is consumed in full and |
|
2485
|
|
|
|
|
|
|
stored in an in-memory hash for O(1) lookup. The B sequence is |
|
2486
|
|
|
|
|
|
|
then iterated lazily. This matches the behaviour of .NET LINQ, which |
|
2487
|
|
|
|
|
|
|
also buffers the second (hash-side) sequence up front. |
|
2488
|
|
|
|
|
|
|
|
|
2489
|
|
|
|
|
|
|
=back |
|
2490
|
|
|
|
|
|
|
|
|
2491
|
|
|
|
|
|
|
=over 4 |
|
2492
|
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
=item B |
|
2494
|
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
Remove duplicate elements. |
|
2496
|
|
|
|
|
|
|
|
|
2497
|
|
|
|
|
|
|
B |
|
2498
|
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
=over 4 |
|
2500
|
|
|
|
|
|
|
|
|
2501
|
|
|
|
|
|
|
=item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>. |
|
2502
|
|
|
|
|
|
|
Extracts a comparison key from each element. This is a single-argument |
|
2503
|
|
|
|
|
|
|
function (unlike Perl's C comparator), and is I a two-argument |
|
2504
|
|
|
|
|
|
|
comparison function. |
|
2505
|
|
|
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
=back |
|
2507
|
|
|
|
|
|
|
|
|
2508
|
|
|
|
|
|
|
->Distinct() |
|
2509
|
|
|
|
|
|
|
->Distinct(sub { lc($_[0]) }) # case-insensitive strings |
|
2510
|
|
|
|
|
|
|
->Distinct(sub { $_[0]{id} }) # hashref: dedupe by field |
|
2511
|
|
|
|
|
|
|
|
|
2512
|
|
|
|
|
|
|
=item B |
|
2513
|
|
|
|
|
|
|
|
|
2514
|
|
|
|
|
|
|
Produce set union of two sequences (no duplicates). |
|
2515
|
|
|
|
|
|
|
|
|
2516
|
|
|
|
|
|
|
B |
|
2517
|
|
|
|
|
|
|
|
|
2518
|
|
|
|
|
|
|
=over 4 |
|
2519
|
|
|
|
|
|
|
|
|
2520
|
|
|
|
|
|
|
=item * C<$second> - Second sequence (LTSV::LINQ object) |
|
2521
|
|
|
|
|
|
|
|
|
2522
|
|
|
|
|
|
|
=item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>. |
|
2523
|
|
|
|
|
|
|
Single-argument key extraction function (not a two-argument sort comparator). |
|
2524
|
|
|
|
|
|
|
|
|
2525
|
|
|
|
|
|
|
=back |
|
2526
|
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
B New query with elements from both sequences (distinct) |
|
2528
|
|
|
|
|
|
|
|
|
2529
|
|
|
|
|
|
|
B B The first sequence is iterated lazily; |
|
2530
|
|
|
|
|
|
|
the second is fully consumed at call time and stored in memory. |
|
2531
|
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
B |
|
2533
|
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
# Simple union |
|
2535
|
|
|
|
|
|
|
my $q1 = LTSV::LINQ->From([1, 2, 3]); |
|
2536
|
|
|
|
|
|
|
my $q2 = LTSV::LINQ->From([3, 4, 5]); |
|
2537
|
|
|
|
|
|
|
$q1->Union($q2)->ToArray(); # (1, 2, 3, 4, 5) |
|
2538
|
|
|
|
|
|
|
|
|
2539
|
|
|
|
|
|
|
# Case-insensitive union |
|
2540
|
|
|
|
|
|
|
->Union($other, sub { lc($_[0]) }) |
|
2541
|
|
|
|
|
|
|
|
|
2542
|
|
|
|
|
|
|
B Equivalent to Concat()->Distinct(). Automatically removes duplicates. |
|
2543
|
|
|
|
|
|
|
|
|
2544
|
|
|
|
|
|
|
=item B |
|
2545
|
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
Produce set intersection of two sequences. |
|
2547
|
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
B |
|
2549
|
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
=over 4 |
|
2551
|
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
=item * C<$second> - Second sequence (LTSV::LINQ object) |
|
2553
|
|
|
|
|
|
|
|
|
2554
|
|
|
|
|
|
|
=item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>. |
|
2555
|
|
|
|
|
|
|
Single-argument key extraction function (not a two-argument sort comparator). |
|
2556
|
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
=back |
|
2558
|
|
|
|
|
|
|
|
|
2559
|
|
|
|
|
|
|
B New query with common elements only (distinct) |
|
2560
|
|
|
|
|
|
|
|
|
2561
|
|
|
|
|
|
|
B B The second sequence is fully consumed |
|
2562
|
|
|
|
|
|
|
at call time and stored in a hash; the first is iterated lazily. |
|
2563
|
|
|
|
|
|
|
|
|
2564
|
|
|
|
|
|
|
B |
|
2565
|
|
|
|
|
|
|
|
|
2566
|
|
|
|
|
|
|
# Common elements |
|
2567
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3]) |
|
2568
|
|
|
|
|
|
|
->Intersect(LTSV::LINQ->From([2, 3, 4])) |
|
2569
|
|
|
|
|
|
|
->ToArray(); # (2, 3) |
|
2570
|
|
|
|
|
|
|
|
|
2571
|
|
|
|
|
|
|
# Find users in both lists |
|
2572
|
|
|
|
|
|
|
$users1->Intersect($users2, sub { $_[0]{id} }) |
|
2573
|
|
|
|
|
|
|
|
|
2574
|
|
|
|
|
|
|
B Only includes elements present in both sequences. |
|
2575
|
|
|
|
|
|
|
|
|
2576
|
|
|
|
|
|
|
=item B |
|
2577
|
|
|
|
|
|
|
|
|
2578
|
|
|
|
|
|
|
Produce set difference (elements in first but not in second). |
|
2579
|
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
B |
|
2581
|
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
=over 4 |
|
2583
|
|
|
|
|
|
|
|
|
2584
|
|
|
|
|
|
|
=item * C<$second> - Second sequence (LTSV::LINQ object) |
|
2585
|
|
|
|
|
|
|
|
|
2586
|
|
|
|
|
|
|
=item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>. |
|
2587
|
|
|
|
|
|
|
Single-argument key extraction function (not a two-argument sort comparator). |
|
2588
|
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
=back |
|
2590
|
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
B New query with elements only in first sequence (distinct) |
|
2592
|
|
|
|
|
|
|
|
|
2593
|
|
|
|
|
|
|
B B The second sequence is fully consumed |
|
2594
|
|
|
|
|
|
|
at call time and stored in a hash; the first is iterated lazily. |
|
2595
|
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
B |
|
2597
|
|
|
|
|
|
|
|
|
2598
|
|
|
|
|
|
|
# Set difference |
|
2599
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3]) |
|
2600
|
|
|
|
|
|
|
->Except(LTSV::LINQ->From([2, 3, 4])) |
|
2601
|
|
|
|
|
|
|
->ToArray(); # (1) |
|
2602
|
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
# Find users in first list but not second |
|
2604
|
|
|
|
|
|
|
$all_users->Except($inactive_users, sub { $_[0]{id} }) |
|
2605
|
|
|
|
|
|
|
|
|
2606
|
|
|
|
|
|
|
B Returns elements from first sequence not present in second. |
|
2607
|
|
|
|
|
|
|
|
|
2608
|
|
|
|
|
|
|
=back |
|
2609
|
|
|
|
|
|
|
|
|
2610
|
|
|
|
|
|
|
=head2 Join Operations |
|
2611
|
|
|
|
|
|
|
|
|
2612
|
|
|
|
|
|
|
B Both C and C are B: |
|
2613
|
|
|
|
|
|
|
when the method is called, the B sequence is consumed in full and |
|
2614
|
|
|
|
|
|
|
stored in an in-memory lookup table (hash of arrays, keyed by inner key). |
|
2615
|
|
|
|
|
|
|
The B sequence is then iterated lazily, producing results on demand. |
|
2616
|
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
This matches the behaviour of .NET LINQ's hash-join implementation. |
|
2618
|
|
|
|
|
|
|
The memory cost is O(inner size); for very large inner sequences, consider |
|
2619
|
|
|
|
|
|
|
reversing the join or pre-filtering the inner sequence before passing it. |
|
2620
|
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
=over 4 |
|
2622
|
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
=item B |
|
2624
|
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
Correlate elements of two sequences based on matching keys (inner join). |
|
2626
|
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
B |
|
2628
|
|
|
|
|
|
|
|
|
2629
|
|
|
|
|
|
|
=over 4 |
|
2630
|
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
=item * C<$inner> - Inner sequence (LTSV::LINQ object) |
|
2632
|
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
=item * C<$outer_key_selector> - Function to extract key from outer element |
|
2634
|
|
|
|
|
|
|
|
|
2635
|
|
|
|
|
|
|
=item * C<$inner_key_selector> - Function to extract key from inner element |
|
2636
|
|
|
|
|
|
|
|
|
2637
|
|
|
|
|
|
|
=item * C<$result_selector> - Function to create result: ($outer_item, $inner_item) -> $result |
|
2638
|
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
=back |
|
2640
|
|
|
|
|
|
|
|
|
2641
|
|
|
|
|
|
|
B Query with joined results |
|
2642
|
|
|
|
|
|
|
|
|
2643
|
|
|
|
|
|
|
B |
|
2644
|
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
# Join users with their orders |
|
2646
|
|
|
|
|
|
|
my $users = LTSV::LINQ->From([ |
|
2647
|
|
|
|
|
|
|
{id => 1, name => 'Alice'}, |
|
2648
|
|
|
|
|
|
|
{id => 2, name => 'Bob'} |
|
2649
|
|
|
|
|
|
|
]); |
|
2650
|
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
my $orders = LTSV::LINQ->From([ |
|
2652
|
|
|
|
|
|
|
{user_id => 1, product => 'Book'}, |
|
2653
|
|
|
|
|
|
|
{user_id => 1, product => 'Pen'}, |
|
2654
|
|
|
|
|
|
|
{user_id => 2, product => 'Notebook'} |
|
2655
|
|
|
|
|
|
|
]); |
|
2656
|
|
|
|
|
|
|
|
|
2657
|
|
|
|
|
|
|
$users->Join( |
|
2658
|
|
|
|
|
|
|
$orders, |
|
2659
|
|
|
|
|
|
|
sub { $_[0]{id} }, # outer key |
|
2660
|
|
|
|
|
|
|
sub { $_[0]{user_id} }, # inner key |
|
2661
|
|
|
|
|
|
|
sub { |
|
2662
|
|
|
|
|
|
|
my($user, $order) = @_; |
|
2663
|
|
|
|
|
|
|
return { |
|
2664
|
|
|
|
|
|
|
name => $user->{name}, |
|
2665
|
|
|
|
|
|
|
product => $order->{product} |
|
2666
|
|
|
|
|
|
|
}; |
|
2667
|
|
|
|
|
|
|
} |
|
2668
|
|
|
|
|
|
|
)->ToArray(); |
|
2669
|
|
|
|
|
|
|
# [{name => 'Alice', product => 'Book'}, |
|
2670
|
|
|
|
|
|
|
# {name => 'Alice', product => 'Pen'}, |
|
2671
|
|
|
|
|
|
|
# {name => 'Bob', product => 'Notebook'}] |
|
2672
|
|
|
|
|
|
|
|
|
2673
|
|
|
|
|
|
|
# Join LTSV files by request ID |
|
2674
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV('access.log')->Join( |
|
2675
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV('error.log'), |
|
2676
|
|
|
|
|
|
|
sub { $_[0]{request_id} }, |
|
2677
|
|
|
|
|
|
|
sub { $_[0]{request_id} }, |
|
2678
|
|
|
|
|
|
|
sub { |
|
2679
|
|
|
|
|
|
|
my($access, $error) = @_; |
|
2680
|
|
|
|
|
|
|
return { |
|
2681
|
|
|
|
|
|
|
url => $access->{url}, |
|
2682
|
|
|
|
|
|
|
error => $error->{message} |
|
2683
|
|
|
|
|
|
|
}; |
|
2684
|
|
|
|
|
|
|
} |
|
2685
|
|
|
|
|
|
|
) |
|
2686
|
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
B This is an inner join - only matching elements are returned. |
|
2688
|
|
|
|
|
|
|
The inner sequence is fully loaded into memory. |
|
2689
|
|
|
|
|
|
|
|
|
2690
|
|
|
|
|
|
|
=item B |
|
2691
|
|
|
|
|
|
|
|
|
2692
|
|
|
|
|
|
|
Correlates elements of two sequences with group join (LEFT OUTER JOIN-like). |
|
2693
|
|
|
|
|
|
|
Each outer element is matched with a group of inner elements (possibly empty). |
|
2694
|
|
|
|
|
|
|
|
|
2695
|
|
|
|
|
|
|
B |
|
2696
|
|
|
|
|
|
|
|
|
2697
|
|
|
|
|
|
|
=over 4 |
|
2698
|
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
=item * C<$inner> - Inner sequence (LTSV::LINQ object) |
|
2700
|
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
=item * C<$outer_key_selector> - Function to extract key from outer element |
|
2702
|
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
=item * C<$inner_key_selector> - Function to extract key from inner element |
|
2704
|
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
=item * C<$result_selector> - Function: ($outer_item, $inner_group) -> $result. |
|
2706
|
|
|
|
|
|
|
The C<$inner_group> is a LTSV::LINQ object containing matched inner elements |
|
2707
|
|
|
|
|
|
|
(empty sequence if no matches). |
|
2708
|
|
|
|
|
|
|
|
|
2709
|
|
|
|
|
|
|
=back |
|
2710
|
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
B New query with one result per outer element (lazy) |
|
2712
|
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
B |
|
2714
|
|
|
|
|
|
|
|
|
2715
|
|
|
|
|
|
|
# Order count per user (including users with no orders) |
|
2716
|
|
|
|
|
|
|
my $users = LTSV::LINQ->From([ |
|
2717
|
|
|
|
|
|
|
{id => 1, name => 'Alice'}, |
|
2718
|
|
|
|
|
|
|
{id => 2, name => 'Bob'}, |
|
2719
|
|
|
|
|
|
|
{id => 3, name => 'Carol'} |
|
2720
|
|
|
|
|
|
|
]); |
|
2721
|
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
my $orders = LTSV::LINQ->From([ |
|
2723
|
|
|
|
|
|
|
{user_id => 1, product => 'Book', amount => 10}, |
|
2724
|
|
|
|
|
|
|
{user_id => 1, product => 'Pen', amount => 5}, |
|
2725
|
|
|
|
|
|
|
{user_id => 2, product => 'Notebook', amount => 15} |
|
2726
|
|
|
|
|
|
|
]); |
|
2727
|
|
|
|
|
|
|
|
|
2728
|
|
|
|
|
|
|
$users->GroupJoin( |
|
2729
|
|
|
|
|
|
|
$orders, |
|
2730
|
|
|
|
|
|
|
sub { $_[0]{id} }, |
|
2731
|
|
|
|
|
|
|
sub { $_[0]{user_id} }, |
|
2732
|
|
|
|
|
|
|
sub { |
|
2733
|
|
|
|
|
|
|
my($user, $orders) = @_; |
|
2734
|
|
|
|
|
|
|
return { |
|
2735
|
|
|
|
|
|
|
name => $user->{name}, |
|
2736
|
|
|
|
|
|
|
count => $orders->Count(), |
|
2737
|
|
|
|
|
|
|
total => $orders->Sum(sub { $_[0]{amount} }) |
|
2738
|
|
|
|
|
|
|
}; |
|
2739
|
|
|
|
|
|
|
} |
|
2740
|
|
|
|
|
|
|
)->ToArray(); |
|
2741
|
|
|
|
|
|
|
# [ |
|
2742
|
|
|
|
|
|
|
# {name => 'Alice', count => 2, total => 15}, |
|
2743
|
|
|
|
|
|
|
# {name => 'Bob', count => 1, total => 15}, |
|
2744
|
|
|
|
|
|
|
# {name => 'Carol', count => 0, total => 0}, # no orders |
|
2745
|
|
|
|
|
|
|
# ] |
|
2746
|
|
|
|
|
|
|
|
|
2747
|
|
|
|
|
|
|
# Flat list with no-match rows included (LEFT OUTER JOIN, cf. Join for inner join) |
|
2748
|
|
|
|
|
|
|
$users->GroupJoin( |
|
2749
|
|
|
|
|
|
|
$orders, |
|
2750
|
|
|
|
|
|
|
sub { $_[0]{id} }, |
|
2751
|
|
|
|
|
|
|
sub { $_[0]{user_id} }, |
|
2752
|
|
|
|
|
|
|
sub { |
|
2753
|
|
|
|
|
|
|
my($user, $user_orders) = @_; |
|
2754
|
|
|
|
|
|
|
my @rows = $user_orders->ToArray(); |
|
2755
|
|
|
|
|
|
|
return @rows |
|
2756
|
|
|
|
|
|
|
? [ map { {name => $user->{name}, product => $_->{product}} } @rows ] |
|
2757
|
|
|
|
|
|
|
: [ {name => $user->{name}, product => 'none'} ]; |
|
2758
|
|
|
|
|
|
|
} |
|
2759
|
|
|
|
|
|
|
)->SelectMany(sub { $_[0] }) # Flatten the array references |
|
2760
|
|
|
|
|
|
|
->ToArray(); |
|
2761
|
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
B Unlike Join, every outer element appears in the result even when |
|
2763
|
|
|
|
|
|
|
there are no matching inner elements (LEFT OUTER JOIN semantics). |
|
2764
|
|
|
|
|
|
|
The inner sequence is fully loaded into memory. |
|
2765
|
|
|
|
|
|
|
|
|
2766
|
|
|
|
|
|
|
B The C<$inner_group> LTSV::LINQ object is highly flexible. |
|
2767
|
|
|
|
|
|
|
It is specifically designed to be iterated multiple times within the |
|
2768
|
|
|
|
|
|
|
result selector (e.g., calling C followed by C) because |
|
2769
|
|
|
|
|
|
|
it generates a fresh iterator for every terminal operation. |
|
2770
|
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
=back |
|
2772
|
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
=head2 Quantifier Methods |
|
2774
|
|
|
|
|
|
|
|
|
2775
|
|
|
|
|
|
|
=over 4 |
|
2776
|
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
=item B |
|
2778
|
|
|
|
|
|
|
|
|
2779
|
|
|
|
|
|
|
Test if all elements satisfy condition. |
|
2780
|
|
|
|
|
|
|
|
|
2781
|
|
|
|
|
|
|
->All(sub { $_[0]{status} == 200 }) |
|
2782
|
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
=item B |
|
2784
|
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
Test if any element satisfies condition. |
|
2786
|
|
|
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
->Any(sub { $_[0]{status} >= 400 }) |
|
2788
|
|
|
|
|
|
|
->Any() # Test if sequence is non-empty |
|
2789
|
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
=item B |
|
2791
|
|
|
|
|
|
|
|
|
2792
|
|
|
|
|
|
|
Check if sequence contains specified element. |
|
2793
|
|
|
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
B |
|
2795
|
|
|
|
|
|
|
|
|
2796
|
|
|
|
|
|
|
=over 4 |
|
2797
|
|
|
|
|
|
|
|
|
2798
|
|
|
|
|
|
|
=item * C<$value> - Value to search for |
|
2799
|
|
|
|
|
|
|
|
|
2800
|
|
|
|
|
|
|
=item * C<$comparer> - (Optional) Custom comparison function |
|
2801
|
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
=back |
|
2803
|
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
B Boolean (1 or 0) |
|
2805
|
|
|
|
|
|
|
|
|
2806
|
|
|
|
|
|
|
B |
|
2807
|
|
|
|
|
|
|
|
|
2808
|
|
|
|
|
|
|
# Simple search |
|
2809
|
|
|
|
|
|
|
->Contains(5) # 1 if found, 0 otherwise |
|
2810
|
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
# Case-insensitive search |
|
2812
|
|
|
|
|
|
|
->Contains('foo', sub { lc($_[0]) eq lc($_[1]) }) |
|
2813
|
|
|
|
|
|
|
|
|
2814
|
|
|
|
|
|
|
# Check for undef |
|
2815
|
|
|
|
|
|
|
->Contains(undef) |
|
2816
|
|
|
|
|
|
|
|
|
2817
|
|
|
|
|
|
|
=item B |
|
2818
|
|
|
|
|
|
|
|
|
2819
|
|
|
|
|
|
|
Determine if two sequences are equal (same elements in same order). |
|
2820
|
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
B |
|
2822
|
|
|
|
|
|
|
|
|
2823
|
|
|
|
|
|
|
=over 4 |
|
2824
|
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
=item * C<$second> - Second sequence (LTSV::LINQ object) |
|
2826
|
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
=item * C<$comparer> - (Optional) Comparison function ($a, $b) -> boolean |
|
2828
|
|
|
|
|
|
|
|
|
2829
|
|
|
|
|
|
|
=back |
|
2830
|
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
B Boolean (1 if equal, 0 otherwise) |
|
2832
|
|
|
|
|
|
|
|
|
2833
|
|
|
|
|
|
|
B |
|
2834
|
|
|
|
|
|
|
|
|
2835
|
|
|
|
|
|
|
# Same sequences |
|
2836
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3]) |
|
2837
|
|
|
|
|
|
|
->SequenceEqual(LTSV::LINQ->From([1, 2, 3])) # 1 (true) |
|
2838
|
|
|
|
|
|
|
|
|
2839
|
|
|
|
|
|
|
# Different elements |
|
2840
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3]) |
|
2841
|
|
|
|
|
|
|
->SequenceEqual(LTSV::LINQ->From([1, 2, 4])) # 0 (false) |
|
2842
|
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
# Different lengths |
|
2844
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2]) |
|
2845
|
|
|
|
|
|
|
->SequenceEqual(LTSV::LINQ->From([1, 2, 3])) # 0 (false) |
|
2846
|
|
|
|
|
|
|
|
|
2847
|
|
|
|
|
|
|
# Case-insensitive comparison |
|
2848
|
|
|
|
|
|
|
$seq1->SequenceEqual($seq2, sub { lc($_[0]) eq lc($_[1]) }) |
|
2849
|
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
B Order matters. Both content AND order must match. |
|
2851
|
|
|
|
|
|
|
|
|
2852
|
|
|
|
|
|
|
=back |
|
2853
|
|
|
|
|
|
|
|
|
2854
|
|
|
|
|
|
|
=head2 Element Access Methods |
|
2855
|
|
|
|
|
|
|
|
|
2856
|
|
|
|
|
|
|
=over 4 |
|
2857
|
|
|
|
|
|
|
|
|
2858
|
|
|
|
|
|
|
=item B |
|
2859
|
|
|
|
|
|
|
|
|
2860
|
|
|
|
|
|
|
Get first element. Dies if empty. |
|
2861
|
|
|
|
|
|
|
|
|
2862
|
|
|
|
|
|
|
->First() |
|
2863
|
|
|
|
|
|
|
->First(sub { $_[0]{status} == 404 }) |
|
2864
|
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
=item B |
|
2866
|
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
Get first element or default value. |
|
2868
|
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
->FirstOrDefault(undef, {}) |
|
2870
|
|
|
|
|
|
|
|
|
2871
|
|
|
|
|
|
|
=item B |
|
2872
|
|
|
|
|
|
|
|
|
2873
|
|
|
|
|
|
|
Get last element. Dies if empty. |
|
2874
|
|
|
|
|
|
|
|
|
2875
|
|
|
|
|
|
|
->Last() |
|
2876
|
|
|
|
|
|
|
|
|
2877
|
|
|
|
|
|
|
=item B |
|
2878
|
|
|
|
|
|
|
|
|
2879
|
|
|
|
|
|
|
Get last element or default value. Never throws exceptions. |
|
2880
|
|
|
|
|
|
|
|
|
2881
|
|
|
|
|
|
|
B |
|
2882
|
|
|
|
|
|
|
|
|
2883
|
|
|
|
|
|
|
=over 4 |
|
2884
|
|
|
|
|
|
|
|
|
2885
|
|
|
|
|
|
|
=item * C<$predicate> - (Optional) Condition |
|
2886
|
|
|
|
|
|
|
|
|
2887
|
|
|
|
|
|
|
=item * C<$default> - (Optional) Value to return when no element is found. |
|
2888
|
|
|
|
|
|
|
Defaults to C when omitted. |
|
2889
|
|
|
|
|
|
|
|
|
2890
|
|
|
|
|
|
|
=back |
|
2891
|
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
B Last element or C<$default> |
|
2893
|
|
|
|
|
|
|
|
|
2894
|
|
|
|
|
|
|
B |
|
2895
|
|
|
|
|
|
|
|
|
2896
|
|
|
|
|
|
|
# Get last element (undef if empty) |
|
2897
|
|
|
|
|
|
|
->LastOrDefault() |
|
2898
|
|
|
|
|
|
|
|
|
2899
|
|
|
|
|
|
|
# Specify a default value |
|
2900
|
|
|
|
|
|
|
LTSV::LINQ->From([])->LastOrDefault(undef, 0) # 0 |
|
2901
|
|
|
|
|
|
|
|
|
2902
|
|
|
|
|
|
|
# With predicate and default |
|
2903
|
|
|
|
|
|
|
->LastOrDefault(sub { $_[0] % 2 == 0 }, -1) # Last even, or -1 |
|
2904
|
|
|
|
|
|
|
|
|
2905
|
|
|
|
|
|
|
=item B |
|
2906
|
|
|
|
|
|
|
|
|
2907
|
|
|
|
|
|
|
Get the only element. Dies if sequence has zero or more than one element. |
|
2908
|
|
|
|
|
|
|
|
|
2909
|
|
|
|
|
|
|
B |
|
2910
|
|
|
|
|
|
|
|
|
2911
|
|
|
|
|
|
|
=over 4 |
|
2912
|
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
=item * C<$predicate> - (Optional) Condition |
|
2914
|
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
=back |
|
2916
|
|
|
|
|
|
|
|
|
2917
|
|
|
|
|
|
|
B Single element |
|
2918
|
|
|
|
|
|
|
|
|
2919
|
|
|
|
|
|
|
B |
|
2920
|
|
|
|
|
|
|
- Dies with "Sequence contains no elements" if empty |
|
2921
|
|
|
|
|
|
|
- Dies with "Sequence contains more than one element" if multiple elements |
|
2922
|
|
|
|
|
|
|
|
|
2923
|
|
|
|
|
|
|
B<.NET LINQ Compatibility:> Exception messages match .NET LINQ behavior exactly. |
|
2924
|
|
|
|
|
|
|
|
|
2925
|
|
|
|
|
|
|
B Uses lazy evaluation. Stops iterating immediately when |
|
2926
|
|
|
|
|
|
|
second element is found (does not load entire sequence). |
|
2927
|
|
|
|
|
|
|
|
|
2928
|
|
|
|
|
|
|
B |
|
2929
|
|
|
|
|
|
|
|
|
2930
|
|
|
|
|
|
|
# Exactly one element |
|
2931
|
|
|
|
|
|
|
LTSV::LINQ->From([5])->Single() # 5 |
|
2932
|
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
# With predicate |
|
2934
|
|
|
|
|
|
|
->Single(sub { $_[0] > 10 }) |
|
2935
|
|
|
|
|
|
|
|
|
2936
|
|
|
|
|
|
|
# Memory-efficient: stops at 2nd element |
|
2937
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("huge.log")->Single(sub { $_[0]{id} eq '999' }) |
|
2938
|
|
|
|
|
|
|
|
|
2939
|
|
|
|
|
|
|
=item B |
|
2940
|
|
|
|
|
|
|
|
|
2941
|
|
|
|
|
|
|
Get the only element, or undef if zero or multiple elements. |
|
2942
|
|
|
|
|
|
|
|
|
2943
|
|
|
|
|
|
|
B Single element or undef (if 0 or 2+ elements) |
|
2944
|
|
|
|
|
|
|
|
|
2945
|
|
|
|
|
|
|
B<.NET LINQ Compatibility:> B .NET's C throws |
|
2946
|
|
|
|
|
|
|
C when the sequence contains more than one |
|
2947
|
|
|
|
|
|
|
element. LTSV::LINQ returns C in that case instead of throwing, |
|
2948
|
|
|
|
|
|
|
which makes it more convenient for Perl code that checks return values. |
|
2949
|
|
|
|
|
|
|
If you require the strict .NET behaviour (exception on multiple elements), |
|
2950
|
|
|
|
|
|
|
use C wrapped in C. |
|
2951
|
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
B Uses lazy evaluation. Memory-efficient. |
|
2953
|
|
|
|
|
|
|
|
|
2954
|
|
|
|
|
|
|
B |
|
2955
|
|
|
|
|
|
|
|
|
2956
|
|
|
|
|
|
|
LTSV::LINQ->From([5])->SingleOrDefault() # 5 |
|
2957
|
|
|
|
|
|
|
LTSV::LINQ->From([])->SingleOrDefault() # undef (empty) |
|
2958
|
|
|
|
|
|
|
LTSV::LINQ->From([1,2])->SingleOrDefault() # undef (multiple) |
|
2959
|
|
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
=item B |
|
2961
|
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
Get element at specified index. Dies if out of range. |
|
2963
|
|
|
|
|
|
|
|
|
2964
|
|
|
|
|
|
|
B |
|
2965
|
|
|
|
|
|
|
|
|
2966
|
|
|
|
|
|
|
=over 4 |
|
2967
|
|
|
|
|
|
|
|
|
2968
|
|
|
|
|
|
|
=item * C<$index> - Zero-based index |
|
2969
|
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
=back |
|
2971
|
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
B Element at index |
|
2973
|
|
|
|
|
|
|
|
|
2974
|
|
|
|
|
|
|
B Dies if index is negative or out of range |
|
2975
|
|
|
|
|
|
|
|
|
2976
|
|
|
|
|
|
|
B Uses lazy evaluation (iterator-based). Does NOT load |
|
2977
|
|
|
|
|
|
|
entire sequence into memory. Stops iterating once target index is reached. |
|
2978
|
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
B |
|
2980
|
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
->ElementAt(0) # First element |
|
2982
|
|
|
|
|
|
|
->ElementAt(2) # Third element |
|
2983
|
|
|
|
|
|
|
|
|
2984
|
|
|
|
|
|
|
# Memory-efficient for large files |
|
2985
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("huge.log")->ElementAt(10) # Reads only 11 lines |
|
2986
|
|
|
|
|
|
|
|
|
2987
|
|
|
|
|
|
|
=item B |
|
2988
|
|
|
|
|
|
|
|
|
2989
|
|
|
|
|
|
|
Get element at index, or undef if out of range. |
|
2990
|
|
|
|
|
|
|
|
|
2991
|
|
|
|
|
|
|
B Element or undef |
|
2992
|
|
|
|
|
|
|
|
|
2993
|
|
|
|
|
|
|
B Uses lazy evaluation (iterator-based). Memory-efficient. |
|
2994
|
|
|
|
|
|
|
|
|
2995
|
|
|
|
|
|
|
B |
|
2996
|
|
|
|
|
|
|
|
|
2997
|
|
|
|
|
|
|
->ElementAtOrDefault(0) # First element |
|
2998
|
|
|
|
|
|
|
->ElementAtOrDefault(99) # undef if out of range |
|
2999
|
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
=back |
|
3001
|
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
=head2 Aggregation Methods |
|
3003
|
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
All aggregation methods are B - they consume the |
|
3005
|
|
|
|
|
|
|
entire sequence and return a scalar value. |
|
3006
|
|
|
|
|
|
|
|
|
3007
|
|
|
|
|
|
|
=over 4 |
|
3008
|
|
|
|
|
|
|
|
|
3009
|
|
|
|
|
|
|
=item B |
|
3010
|
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
Count the number of elements. |
|
3012
|
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
B |
|
3014
|
|
|
|
|
|
|
|
|
3015
|
|
|
|
|
|
|
=over 4 |
|
3016
|
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
=item * C<$predicate> - (Optional) Code reference to filter elements |
|
3018
|
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
=back |
|
3020
|
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
B Integer count |
|
3022
|
|
|
|
|
|
|
|
|
3023
|
|
|
|
|
|
|
B |
|
3024
|
|
|
|
|
|
|
|
|
3025
|
|
|
|
|
|
|
# Count all |
|
3026
|
|
|
|
|
|
|
->Count() # 1000 |
|
3027
|
|
|
|
|
|
|
|
|
3028
|
|
|
|
|
|
|
# Count with condition |
|
3029
|
|
|
|
|
|
|
->Count(sub { $_[0]{status} >= 400 }) # 42 |
|
3030
|
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
# Equivalent to |
|
3032
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} >= 400 })->Count() |
|
3033
|
|
|
|
|
|
|
|
|
3034
|
|
|
|
|
|
|
B O(n) - must iterate entire sequence |
|
3035
|
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
=item B |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
Calculate sum of numeric values. |
|
3039
|
|
|
|
|
|
|
|
|
3040
|
|
|
|
|
|
|
B |
|
3041
|
|
|
|
|
|
|
|
|
3042
|
|
|
|
|
|
|
=over 4 |
|
3043
|
|
|
|
|
|
|
|
|
3044
|
|
|
|
|
|
|
=item * C<$selector> - (Optional) Code reference to extract value. |
|
3045
|
|
|
|
|
|
|
Default: identity function |
|
3046
|
|
|
|
|
|
|
|
|
3047
|
|
|
|
|
|
|
=back |
|
3048
|
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
B Numeric sum |
|
3050
|
|
|
|
|
|
|
|
|
3051
|
|
|
|
|
|
|
B |
|
3052
|
|
|
|
|
|
|
|
|
3053
|
|
|
|
|
|
|
# Sum of values |
|
3054
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3, 4, 5])->Sum() # 15 |
|
3055
|
|
|
|
|
|
|
|
|
3056
|
|
|
|
|
|
|
# Sum of field |
|
3057
|
|
|
|
|
|
|
->Sum(sub { $_[0]{bytes} }) |
|
3058
|
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
# Sum with transformation |
|
3060
|
|
|
|
|
|
|
->Sum(sub { $_[0]{price} * $_[0]{quantity} }) |
|
3061
|
|
|
|
|
|
|
|
|
3062
|
|
|
|
|
|
|
B Non-numeric values may produce warnings. Use numeric context. |
|
3063
|
|
|
|
|
|
|
|
|
3064
|
|
|
|
|
|
|
B Returns C<0>. |
|
3065
|
|
|
|
|
|
|
|
|
3066
|
|
|
|
|
|
|
=item B |
|
3067
|
|
|
|
|
|
|
|
|
3068
|
|
|
|
|
|
|
Find minimum value. |
|
3069
|
|
|
|
|
|
|
|
|
3070
|
|
|
|
|
|
|
B |
|
3071
|
|
|
|
|
|
|
|
|
3072
|
|
|
|
|
|
|
=over 4 |
|
3073
|
|
|
|
|
|
|
|
|
3074
|
|
|
|
|
|
|
=item * C<$selector> - (Optional) Code reference to extract value |
|
3075
|
|
|
|
|
|
|
|
|
3076
|
|
|
|
|
|
|
=back |
|
3077
|
|
|
|
|
|
|
|
|
3078
|
|
|
|
|
|
|
B Minimum value, or C if sequence is empty. |
|
3079
|
|
|
|
|
|
|
|
|
3080
|
|
|
|
|
|
|
B |
|
3081
|
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
# Minimum of values |
|
3083
|
|
|
|
|
|
|
->Min() |
|
3084
|
|
|
|
|
|
|
|
|
3085
|
|
|
|
|
|
|
# Minimum of field |
|
3086
|
|
|
|
|
|
|
->Min(sub { $_[0]{response_time} }) |
|
3087
|
|
|
|
|
|
|
|
|
3088
|
|
|
|
|
|
|
# Oldest timestamp |
|
3089
|
|
|
|
|
|
|
->Min(sub { $_[0]{timestamp} }) |
|
3090
|
|
|
|
|
|
|
|
|
3091
|
|
|
|
|
|
|
=item B |
|
3092
|
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
Find maximum value. |
|
3094
|
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
B |
|
3096
|
|
|
|
|
|
|
|
|
3097
|
|
|
|
|
|
|
=over 4 |
|
3098
|
|
|
|
|
|
|
|
|
3099
|
|
|
|
|
|
|
=item * C<$selector> - (Optional) Code reference to extract value |
|
3100
|
|
|
|
|
|
|
|
|
3101
|
|
|
|
|
|
|
=back |
|
3102
|
|
|
|
|
|
|
|
|
3103
|
|
|
|
|
|
|
B Maximum value, or C if sequence is empty. |
|
3104
|
|
|
|
|
|
|
|
|
3105
|
|
|
|
|
|
|
B |
|
3106
|
|
|
|
|
|
|
|
|
3107
|
|
|
|
|
|
|
# Maximum of values |
|
3108
|
|
|
|
|
|
|
->Max() |
|
3109
|
|
|
|
|
|
|
|
|
3110
|
|
|
|
|
|
|
# Maximum of field |
|
3111
|
|
|
|
|
|
|
->Max(sub { $_[0]{bytes} }) |
|
3112
|
|
|
|
|
|
|
|
|
3113
|
|
|
|
|
|
|
# Latest timestamp |
|
3114
|
|
|
|
|
|
|
->Max(sub { $_[0]{timestamp} }) |
|
3115
|
|
|
|
|
|
|
|
|
3116
|
|
|
|
|
|
|
=item B |
|
3117
|
|
|
|
|
|
|
|
|
3118
|
|
|
|
|
|
|
Calculate arithmetic mean. |
|
3119
|
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
B |
|
3121
|
|
|
|
|
|
|
|
|
3122
|
|
|
|
|
|
|
=over 4 |
|
3123
|
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
=item * C<$selector> - (Optional) Code reference to extract value |
|
3125
|
|
|
|
|
|
|
|
|
3126
|
|
|
|
|
|
|
=back |
|
3127
|
|
|
|
|
|
|
|
|
3128
|
|
|
|
|
|
|
B Numeric average (floating point) |
|
3129
|
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
B |
|
3131
|
|
|
|
|
|
|
|
|
3132
|
|
|
|
|
|
|
# Average of values |
|
3133
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3, 4, 5])->Average() # 3 |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
# Average of field |
|
3136
|
|
|
|
|
|
|
->Average(sub { $_[0]{bytes} }) |
|
3137
|
|
|
|
|
|
|
|
|
3138
|
|
|
|
|
|
|
# Average response time |
|
3139
|
|
|
|
|
|
|
->Average(sub { $_[0]{response_time} }) |
|
3140
|
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
B Dies with "Sequence contains no elements". |
|
3142
|
|
|
|
|
|
|
Unlike C (returns 0) and C/C (return C), C |
|
3143
|
|
|
|
|
|
|
throws on an empty sequence. Use C to avoid the exception. |
|
3144
|
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
B Returns floating point. Use C for integer result. |
|
3146
|
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
=item B |
|
3148
|
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
Calculate arithmetic mean, or return undef if sequence is empty. |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
B |
|
3152
|
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
=over 4 |
|
3154
|
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
=item * C<$selector> - (Optional) Code reference to extract value |
|
3156
|
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
=back |
|
3158
|
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
B Numeric average (floating point), or undef if empty |
|
3160
|
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
B |
|
3162
|
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
# Safe average - returns undef for empty sequence |
|
3164
|
|
|
|
|
|
|
my @empty = (); |
|
3165
|
|
|
|
|
|
|
my $avg = LTSV::LINQ->From(\@empty)->AverageOrDefault(); # undef |
|
3166
|
|
|
|
|
|
|
|
|
3167
|
|
|
|
|
|
|
# With data |
|
3168
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3])->AverageOrDefault(); # 2 |
|
3169
|
|
|
|
|
|
|
|
|
3170
|
|
|
|
|
|
|
# With selector |
|
3171
|
|
|
|
|
|
|
->AverageOrDefault(sub { $_[0]{value} }) |
|
3172
|
|
|
|
|
|
|
|
|
3173
|
|
|
|
|
|
|
B Unlike Average(), this method never throws an exception. |
|
3174
|
|
|
|
|
|
|
|
|
3175
|
|
|
|
|
|
|
=item B |
|
3176
|
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
Apply an accumulator function over a sequence. |
|
3178
|
|
|
|
|
|
|
|
|
3179
|
|
|
|
|
|
|
B |
|
3180
|
|
|
|
|
|
|
|
|
3181
|
|
|
|
|
|
|
=over 4 |
|
3182
|
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
=item * C - Use first element as seed |
|
3184
|
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
=item * C - Explicit seed value |
|
3186
|
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
=item * C - Transform result |
|
3188
|
|
|
|
|
|
|
|
|
3189
|
|
|
|
|
|
|
=back |
|
3190
|
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
B |
|
3192
|
|
|
|
|
|
|
|
|
3193
|
|
|
|
|
|
|
=over 4 |
|
3194
|
|
|
|
|
|
|
|
|
3195
|
|
|
|
|
|
|
=item * C<$seed> - Initial accumulator value (optional for first signature) |
|
3196
|
|
|
|
|
|
|
|
|
3197
|
|
|
|
|
|
|
=item * C<$func> - Code reference: ($accumulator, $element) -> $new_accumulator |
|
3198
|
|
|
|
|
|
|
|
|
3199
|
|
|
|
|
|
|
=item * C<$result_selector> - (Optional) Transform final result |
|
3200
|
|
|
|
|
|
|
|
|
3201
|
|
|
|
|
|
|
=back |
|
3202
|
|
|
|
|
|
|
|
|
3203
|
|
|
|
|
|
|
B Accumulated value |
|
3204
|
|
|
|
|
|
|
|
|
3205
|
|
|
|
|
|
|
B |
|
3206
|
|
|
|
|
|
|
|
|
3207
|
|
|
|
|
|
|
# Sum (without seed) |
|
3208
|
|
|
|
|
|
|
LTSV::LINQ->From([1,2,3,4])->Aggregate(sub { $_[0] + $_[1] }) # 10 |
|
3209
|
|
|
|
|
|
|
|
|
3210
|
|
|
|
|
|
|
# Product (with seed) |
|
3211
|
|
|
|
|
|
|
LTSV::LINQ->From([2,3,4])->Aggregate(1, sub { $_[0] * $_[1] }) # 24 |
|
3212
|
|
|
|
|
|
|
|
|
3213
|
|
|
|
|
|
|
# Concatenate strings |
|
3214
|
|
|
|
|
|
|
LTSV::LINQ->From(['a','b','c']) |
|
3215
|
|
|
|
|
|
|
->Aggregate('', sub { $_[0] ? "$_[0],$_[1]" : $_[1] }) # 'a,b,c' |
|
3216
|
|
|
|
|
|
|
|
|
3217
|
|
|
|
|
|
|
# With result selector |
|
3218
|
|
|
|
|
|
|
LTSV::LINQ->From([1,2,3]) |
|
3219
|
|
|
|
|
|
|
->Aggregate(0, |
|
3220
|
|
|
|
|
|
|
sub { $_[0] + $_[1] }, # accumulate |
|
3221
|
|
|
|
|
|
|
sub { "Sum: $_[0]" }) # transform result |
|
3222
|
|
|
|
|
|
|
# "Sum: 6" |
|
3223
|
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
# Build complex structure |
|
3225
|
|
|
|
|
|
|
->Aggregate([], sub { |
|
3226
|
|
|
|
|
|
|
my($list, $item) = @_; |
|
3227
|
|
|
|
|
|
|
push @$list, uc($item); |
|
3228
|
|
|
|
|
|
|
return $list; |
|
3229
|
|
|
|
|
|
|
}) |
|
3230
|
|
|
|
|
|
|
|
|
3231
|
|
|
|
|
|
|
B<.NET LINQ Compatibility:> Supports all three .NET signatures. |
|
3232
|
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
=back |
|
3234
|
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
=head2 Conversion Methods |
|
3236
|
|
|
|
|
|
|
|
|
3237
|
|
|
|
|
|
|
=over 4 |
|
3238
|
|
|
|
|
|
|
|
|
3239
|
|
|
|
|
|
|
=item B |
|
3240
|
|
|
|
|
|
|
|
|
3241
|
|
|
|
|
|
|
Convert to array. |
|
3242
|
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
my @array = $query->ToArray(); |
|
3244
|
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
=item B |
|
3246
|
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
Convert to array reference. |
|
3248
|
|
|
|
|
|
|
|
|
3249
|
|
|
|
|
|
|
my $arrayref = $query->ToList(); |
|
3250
|
|
|
|
|
|
|
|
|
3251
|
|
|
|
|
|
|
=item B |
|
3252
|
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
Convert sequence to hash reference with unique keys. |
|
3254
|
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
B |
|
3256
|
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
=over 4 |
|
3258
|
|
|
|
|
|
|
|
|
3259
|
|
|
|
|
|
|
=item * C<$key_selector> - Function to extract key from element |
|
3260
|
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
=item * C<$value_selector> - (Optional) Function to extract value, defaults to element itself |
|
3262
|
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
=back |
|
3264
|
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
B Hash reference |
|
3266
|
|
|
|
|
|
|
|
|
3267
|
|
|
|
|
|
|
B |
|
3268
|
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
# ID to name mapping |
|
3270
|
|
|
|
|
|
|
my $users = LTSV::LINQ->From([ |
|
3271
|
|
|
|
|
|
|
{id => 1, name => 'Alice'}, |
|
3272
|
|
|
|
|
|
|
{id => 2, name => 'Bob'} |
|
3273
|
|
|
|
|
|
|
]); |
|
3274
|
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
my $dict = $users->ToDictionary( |
|
3276
|
|
|
|
|
|
|
sub { $_[0]{id} }, |
|
3277
|
|
|
|
|
|
|
sub { $_[0]{name} } |
|
3278
|
|
|
|
|
|
|
); |
|
3279
|
|
|
|
|
|
|
# {1 => 'Alice', 2 => 'Bob'} |
|
3280
|
|
|
|
|
|
|
|
|
3281
|
|
|
|
|
|
|
# Without value selector (stores entire element) |
|
3282
|
|
|
|
|
|
|
my $dict = $users->ToDictionary(sub { $_[0]{id} }); |
|
3283
|
|
|
|
|
|
|
# {1 => {id => 1, name => 'Alice'}, 2 => {id => 2, name => 'Bob'}} |
|
3284
|
|
|
|
|
|
|
|
|
3285
|
|
|
|
|
|
|
# Quick lookup table |
|
3286
|
|
|
|
|
|
|
my $status_codes = LTSV::LINQ->FromLTSV('access.log') |
|
3287
|
|
|
|
|
|
|
->Select(sub { $_[0]{status} }) |
|
3288
|
|
|
|
|
|
|
->Distinct() |
|
3289
|
|
|
|
|
|
|
->ToDictionary(sub { $_ }, sub { 1 }); |
|
3290
|
|
|
|
|
|
|
|
|
3291
|
|
|
|
|
|
|
B If duplicate keys exist, later values overwrite earlier ones. |
|
3292
|
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
B<.NET LINQ Compatibility:> .NET's C throws C |
|
3294
|
|
|
|
|
|
|
on duplicate keys. This module silently overwrites with the later value, |
|
3295
|
|
|
|
|
|
|
following Perl hash semantics. Use C if you need to preserve all |
|
3296
|
|
|
|
|
|
|
values for each key. |
|
3297
|
|
|
|
|
|
|
|
|
3298
|
|
|
|
|
|
|
=item B |
|
3299
|
|
|
|
|
|
|
|
|
3300
|
|
|
|
|
|
|
Convert sequence to hash reference with grouped values (multi-value dictionary). |
|
3301
|
|
|
|
|
|
|
|
|
3302
|
|
|
|
|
|
|
B |
|
3303
|
|
|
|
|
|
|
|
|
3304
|
|
|
|
|
|
|
=over 4 |
|
3305
|
|
|
|
|
|
|
|
|
3306
|
|
|
|
|
|
|
=item * C<$key_selector> - Function to extract key from element |
|
3307
|
|
|
|
|
|
|
|
|
3308
|
|
|
|
|
|
|
=item * C<$value_selector> - (Optional) Function to extract value, defaults to element itself |
|
3309
|
|
|
|
|
|
|
|
|
3310
|
|
|
|
|
|
|
=back |
|
3311
|
|
|
|
|
|
|
|
|
3312
|
|
|
|
|
|
|
B Hash reference where values are array references |
|
3313
|
|
|
|
|
|
|
|
|
3314
|
|
|
|
|
|
|
B |
|
3315
|
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
# Group orders by user ID |
|
3317
|
|
|
|
|
|
|
my $orders = LTSV::LINQ->From([ |
|
3318
|
|
|
|
|
|
|
{user_id => 1, product => 'Book'}, |
|
3319
|
|
|
|
|
|
|
{user_id => 1, product => 'Pen'}, |
|
3320
|
|
|
|
|
|
|
{user_id => 2, product => 'Notebook'} |
|
3321
|
|
|
|
|
|
|
]); |
|
3322
|
|
|
|
|
|
|
|
|
3323
|
|
|
|
|
|
|
my $lookup = $orders->ToLookup( |
|
3324
|
|
|
|
|
|
|
sub { $_[0]{user_id} }, |
|
3325
|
|
|
|
|
|
|
sub { $_[0]{product} } |
|
3326
|
|
|
|
|
|
|
); |
|
3327
|
|
|
|
|
|
|
# { |
|
3328
|
|
|
|
|
|
|
# 1 => ['Book', 'Pen'], |
|
3329
|
|
|
|
|
|
|
# 2 => ['Notebook'] |
|
3330
|
|
|
|
|
|
|
# } |
|
3331
|
|
|
|
|
|
|
|
|
3332
|
|
|
|
|
|
|
# Group LTSV by status code |
|
3333
|
|
|
|
|
|
|
my $by_status = LTSV::LINQ->FromLTSV('access.log') |
|
3334
|
|
|
|
|
|
|
->ToLookup(sub { $_[0]{status} }); |
|
3335
|
|
|
|
|
|
|
# { |
|
3336
|
|
|
|
|
|
|
# '200' => [{...}, {...}, ...], |
|
3337
|
|
|
|
|
|
|
# '404' => [{...}, ...], |
|
3338
|
|
|
|
|
|
|
# '500' => [{...}] |
|
3339
|
|
|
|
|
|
|
# } |
|
3340
|
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
B Unlike ToDictionary, this preserves all values for each key. |
|
3342
|
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
=item B |
|
3344
|
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
Return default value if sequence is empty, otherwise return the sequence. |
|
3346
|
|
|
|
|
|
|
|
|
3347
|
|
|
|
|
|
|
B |
|
3348
|
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
=over 4 |
|
3350
|
|
|
|
|
|
|
|
|
3351
|
|
|
|
|
|
|
=item * C<$default_value> - (Optional) Default value, defaults to undef |
|
3352
|
|
|
|
|
|
|
|
|
3353
|
|
|
|
|
|
|
=back |
|
3354
|
|
|
|
|
|
|
|
|
3355
|
|
|
|
|
|
|
B New query with default value if empty (lazy) |
|
3356
|
|
|
|
|
|
|
|
|
3357
|
|
|
|
|
|
|
B |
|
3358
|
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
# Return 0 if empty |
|
3360
|
|
|
|
|
|
|
->DefaultIfEmpty(0)->ToArray() # (0) if empty, or original data |
|
3361
|
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
# With undef default |
|
3363
|
|
|
|
|
|
|
->DefaultIfEmpty()->First() # undef if empty |
|
3364
|
|
|
|
|
|
|
|
|
3365
|
|
|
|
|
|
|
# Useful for left joins |
|
3366
|
|
|
|
|
|
|
->Where(condition)->DefaultIfEmpty({id => 0, name => 'None'}) |
|
3367
|
|
|
|
|
|
|
|
|
3368
|
|
|
|
|
|
|
B This is useful for ensuring a sequence always has at least |
|
3369
|
|
|
|
|
|
|
one element. |
|
3370
|
|
|
|
|
|
|
|
|
3371
|
|
|
|
|
|
|
=item B |
|
3372
|
|
|
|
|
|
|
|
|
3373
|
|
|
|
|
|
|
Write to LTSV file. |
|
3374
|
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
$query->ToLTSV("output.ltsv"); |
|
3376
|
|
|
|
|
|
|
|
|
3377
|
|
|
|
|
|
|
=back |
|
3378
|
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
=head2 Utility Methods |
|
3380
|
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
=over 4 |
|
3382
|
|
|
|
|
|
|
|
|
3383
|
|
|
|
|
|
|
=item B |
|
3384
|
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
Execute action for each element. |
|
3386
|
|
|
|
|
|
|
|
|
3387
|
|
|
|
|
|
|
$query->ForEach(sub { print $_[0]{url}, "\n" }); |
|
3388
|
|
|
|
|
|
|
|
|
3389
|
|
|
|
|
|
|
=back |
|
3390
|
|
|
|
|
|
|
|
|
3391
|
|
|
|
|
|
|
=head1 EXAMPLES |
|
3392
|
|
|
|
|
|
|
|
|
3393
|
|
|
|
|
|
|
=head2 Basic Filtering |
|
3394
|
|
|
|
|
|
|
|
|
3395
|
|
|
|
|
|
|
use LTSV::LINQ; |
|
3396
|
|
|
|
|
|
|
|
|
3397
|
|
|
|
|
|
|
# DSL syntax |
|
3398
|
|
|
|
|
|
|
my @successful = LTSV::LINQ->FromLTSV("access.log") |
|
3399
|
|
|
|
|
|
|
->Where(status => '200') |
|
3400
|
|
|
|
|
|
|
->ToArray(); |
|
3401
|
|
|
|
|
|
|
|
|
3402
|
|
|
|
|
|
|
# Code reference |
|
3403
|
|
|
|
|
|
|
my @errors = LTSV::LINQ->FromLTSV("access.log") |
|
3404
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} >= 400 }) |
|
3405
|
|
|
|
|
|
|
->ToArray(); |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
=head2 Aggregation |
|
3408
|
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
# Count errors |
|
3410
|
|
|
|
|
|
|
my $error_count = LTSV::LINQ->FromLTSV("access.log") |
|
3411
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} >= 400 }) |
|
3412
|
|
|
|
|
|
|
->Count(); |
|
3413
|
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
# Average bytes for successful requests |
|
3415
|
|
|
|
|
|
|
my $avg_bytes = LTSV::LINQ->FromLTSV("access.log") |
|
3416
|
|
|
|
|
|
|
->Where(status => '200') |
|
3417
|
|
|
|
|
|
|
->Average(sub { $_[0]{bytes} }); |
|
3418
|
|
|
|
|
|
|
|
|
3419
|
|
|
|
|
|
|
print "Average bytes: $avg_bytes\n"; |
|
3420
|
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
=head2 Grouping and Ordering |
|
3422
|
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
# Top 10 URLs by request count |
|
3424
|
|
|
|
|
|
|
my @top_urls = LTSV::LINQ->FromLTSV("access.log") |
|
3425
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} eq '200' }) |
|
3426
|
|
|
|
|
|
|
->GroupBy(sub { $_[0]{url} }) |
|
3427
|
|
|
|
|
|
|
->Select(sub { |
|
3428
|
|
|
|
|
|
|
my $g = shift; |
|
3429
|
|
|
|
|
|
|
return { |
|
3430
|
|
|
|
|
|
|
URL => $g->{Key}, |
|
3431
|
|
|
|
|
|
|
Count => scalar(@{$g->{Elements}}), |
|
3432
|
|
|
|
|
|
|
TotalBytes => LTSV::LINQ->From($g->{Elements}) |
|
3433
|
|
|
|
|
|
|
->Sum(sub { $_[0]{bytes} }) |
|
3434
|
|
|
|
|
|
|
}; |
|
3435
|
|
|
|
|
|
|
}) |
|
3436
|
|
|
|
|
|
|
->OrderByDescending(sub { $_[0]{Count} }) |
|
3437
|
|
|
|
|
|
|
->Take(10) |
|
3438
|
|
|
|
|
|
|
->ToArray(); |
|
3439
|
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
for my $stat (@top_urls) { |
|
3441
|
|
|
|
|
|
|
printf "%5d requests - %s (%d bytes)\n", |
|
3442
|
|
|
|
|
|
|
$stat->{Count}, $stat->{URL}, $stat->{TotalBytes}; |
|
3443
|
|
|
|
|
|
|
} |
|
3444
|
|
|
|
|
|
|
|
|
3445
|
|
|
|
|
|
|
=head2 Complex Query Chain |
|
3446
|
|
|
|
|
|
|
|
|
3447
|
|
|
|
|
|
|
# Multi-step analysis |
|
3448
|
|
|
|
|
|
|
my @result = LTSV::LINQ->FromLTSV("access.log") |
|
3449
|
|
|
|
|
|
|
->Where(status => '200') # Filter successful |
|
3450
|
|
|
|
|
|
|
->Select(sub { $_[0]{bytes} }) # Extract bytes |
|
3451
|
|
|
|
|
|
|
->Where(sub { $_[0] > 1000 }) # Large responses only |
|
3452
|
|
|
|
|
|
|
->OrderByDescending(sub { $_[0] }) # Sort descending |
|
3453
|
|
|
|
|
|
|
->Take(100) # Top 100 |
|
3454
|
|
|
|
|
|
|
->ToArray(); |
|
3455
|
|
|
|
|
|
|
|
|
3456
|
|
|
|
|
|
|
print "Largest 100 successful responses:\n"; |
|
3457
|
|
|
|
|
|
|
print " ", join(", ", @result), "\n"; |
|
3458
|
|
|
|
|
|
|
|
|
3459
|
|
|
|
|
|
|
=head2 Lazy Processing of Large Files |
|
3460
|
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
# Process huge file with constant memory |
|
3462
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("huge.log") |
|
3463
|
|
|
|
|
|
|
->Where(sub { $_[0]{level} eq 'ERROR' }) |
|
3464
|
|
|
|
|
|
|
->ForEach(sub { |
|
3465
|
|
|
|
|
|
|
my $rec = shift; |
|
3466
|
|
|
|
|
|
|
print "ERROR at $rec->{time}: $rec->{message}\n"; |
|
3467
|
|
|
|
|
|
|
}); |
|
3468
|
|
|
|
|
|
|
|
|
3469
|
|
|
|
|
|
|
=head2 Quantifiers |
|
3470
|
|
|
|
|
|
|
|
|
3471
|
|
|
|
|
|
|
# Check if all requests are successful |
|
3472
|
|
|
|
|
|
|
my $all_ok = LTSV::LINQ->FromLTSV("access.log") |
|
3473
|
|
|
|
|
|
|
->All(sub { $_[0]{status} < 400 }); |
|
3474
|
|
|
|
|
|
|
|
|
3475
|
|
|
|
|
|
|
print $all_ok ? "All OK\n" : "Some errors\n"; |
|
3476
|
|
|
|
|
|
|
|
|
3477
|
|
|
|
|
|
|
# Check if any errors exist |
|
3478
|
|
|
|
|
|
|
my $has_errors = LTSV::LINQ->FromLTSV("access.log") |
|
3479
|
|
|
|
|
|
|
->Any(sub { $_[0]{status} >= 500 }); |
|
3480
|
|
|
|
|
|
|
|
|
3481
|
|
|
|
|
|
|
print "Server errors detected\n" if $has_errors; |
|
3482
|
|
|
|
|
|
|
|
|
3483
|
|
|
|
|
|
|
=head2 Data Transformation |
|
3484
|
|
|
|
|
|
|
|
|
3485
|
|
|
|
|
|
|
# Read LTSV, transform, write back |
|
3486
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("input.ltsv") |
|
3487
|
|
|
|
|
|
|
->Select(sub { |
|
3488
|
|
|
|
|
|
|
my $rec = shift; |
|
3489
|
|
|
|
|
|
|
return { |
|
3490
|
|
|
|
|
|
|
%$rec, |
|
3491
|
|
|
|
|
|
|
processed => 1, |
|
3492
|
|
|
|
|
|
|
timestamp => time(), |
|
3493
|
|
|
|
|
|
|
}; |
|
3494
|
|
|
|
|
|
|
}) |
|
3495
|
|
|
|
|
|
|
->ToLTSV("output.ltsv"); |
|
3496
|
|
|
|
|
|
|
|
|
3497
|
|
|
|
|
|
|
=head2 Working with Arrays |
|
3498
|
|
|
|
|
|
|
|
|
3499
|
|
|
|
|
|
|
# Query in-memory data |
|
3500
|
|
|
|
|
|
|
my @data = ( |
|
3501
|
|
|
|
|
|
|
{name => 'Alice', age => 30, city => 'Tokyo'}, |
|
3502
|
|
|
|
|
|
|
{name => 'Bob', age => 25, city => 'Osaka'}, |
|
3503
|
|
|
|
|
|
|
{name => 'Carol', age => 35, city => 'Tokyo'}, |
|
3504
|
|
|
|
|
|
|
); |
|
3505
|
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
my @tokyo_residents = LTSV::LINQ->From(\@data) |
|
3507
|
|
|
|
|
|
|
->Where(city => 'Tokyo') |
|
3508
|
|
|
|
|
|
|
->OrderBy(sub { $_[0]{age} }) |
|
3509
|
|
|
|
|
|
|
->ToArray(); |
|
3510
|
|
|
|
|
|
|
|
|
3511
|
|
|
|
|
|
|
=head1 FEATURES |
|
3512
|
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
=head2 Lazy Evaluation |
|
3514
|
|
|
|
|
|
|
|
|
3515
|
|
|
|
|
|
|
All query operations use lazy evaluation via iterators. Data is |
|
3516
|
|
|
|
|
|
|
processed on-demand, not all at once. |
|
3517
|
|
|
|
|
|
|
|
|
3518
|
|
|
|
|
|
|
# Only reads 10 records from file |
|
3519
|
|
|
|
|
|
|
my @top10 = LTSV::LINQ->FromLTSV("huge.log") |
|
3520
|
|
|
|
|
|
|
->Take(10) |
|
3521
|
|
|
|
|
|
|
->ToArray(); |
|
3522
|
|
|
|
|
|
|
|
|
3523
|
|
|
|
|
|
|
=head2 Method Chaining |
|
3524
|
|
|
|
|
|
|
|
|
3525
|
|
|
|
|
|
|
All methods (except terminal operations like ToArray) return a new |
|
3526
|
|
|
|
|
|
|
query object, enabling fluent method chaining. |
|
3527
|
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
->Where(...)->Select(...)->OrderBy(...)->Take(10) |
|
3529
|
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
=head2 DSL Syntax |
|
3531
|
|
|
|
|
|
|
|
|
3532
|
|
|
|
|
|
|
Simple key-value filtering without code references. |
|
3533
|
|
|
|
|
|
|
|
|
3534
|
|
|
|
|
|
|
# Readable and concise |
|
3535
|
|
|
|
|
|
|
->Where(status => '200', method => 'GET') |
|
3536
|
|
|
|
|
|
|
|
|
3537
|
|
|
|
|
|
|
# Instead of |
|
3538
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} eq '200' && $_[0]{method} eq 'GET' }) |
|
3539
|
|
|
|
|
|
|
|
|
3540
|
|
|
|
|
|
|
=head1 ARCHITECTURE |
|
3541
|
|
|
|
|
|
|
|
|
3542
|
|
|
|
|
|
|
=head2 Iterator-Based Design |
|
3543
|
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
LTSV::LINQ uses an iterator-based architecture for lazy evaluation. |
|
3545
|
|
|
|
|
|
|
|
|
3546
|
|
|
|
|
|
|
B |
|
3547
|
|
|
|
|
|
|
|
|
3548
|
|
|
|
|
|
|
Each query operation returns a new query object wrapping an iterator |
|
3549
|
|
|
|
|
|
|
(a code reference that produces one element per call). |
|
3550
|
|
|
|
|
|
|
|
|
3551
|
|
|
|
|
|
|
my $iter = sub { |
|
3552
|
|
|
|
|
|
|
# Read next element |
|
3553
|
|
|
|
|
|
|
# Apply transformation |
|
3554
|
|
|
|
|
|
|
# Return element or undef |
|
3555
|
|
|
|
|
|
|
}; |
|
3556
|
|
|
|
|
|
|
|
|
3557
|
|
|
|
|
|
|
my $query = LTSV::LINQ->new($iter); |
|
3558
|
|
|
|
|
|
|
|
|
3559
|
|
|
|
|
|
|
B |
|
3560
|
|
|
|
|
|
|
|
|
3561
|
|
|
|
|
|
|
=over 4 |
|
3562
|
|
|
|
|
|
|
|
|
3563
|
|
|
|
|
|
|
=item * B - O(1) memory for most operations |
|
3564
|
|
|
|
|
|
|
|
|
3565
|
|
|
|
|
|
|
=item * B - Elements computed on-demand |
|
3566
|
|
|
|
|
|
|
|
|
3567
|
|
|
|
|
|
|
=item * B - Iterators chain naturally |
|
3568
|
|
|
|
|
|
|
|
|
3569
|
|
|
|
|
|
|
=item * B - Stop processing when done |
|
3570
|
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
=back |
|
3572
|
|
|
|
|
|
|
|
|
3573
|
|
|
|
|
|
|
=head2 Method Categories |
|
3574
|
|
|
|
|
|
|
|
|
3575
|
|
|
|
|
|
|
The table below shows, for every method, whether it is lazy or eager, |
|
3576
|
|
|
|
|
|
|
and what it returns. Knowing this prevents surprises about memory use |
|
3577
|
|
|
|
|
|
|
and iterator consumption. |
|
3578
|
|
|
|
|
|
|
|
|
3579
|
|
|
|
|
|
|
Method Category Evaluation Returns |
|
3580
|
|
|
|
|
|
|
------ -------- ---------- ------- |
|
3581
|
|
|
|
|
|
|
From Source Lazy (factory) Query |
|
3582
|
|
|
|
|
|
|
FromLTSV Source Lazy (factory) Query |
|
3583
|
|
|
|
|
|
|
Range Source Lazy Query |
|
3584
|
|
|
|
|
|
|
Empty Source Lazy Query |
|
3585
|
|
|
|
|
|
|
Repeat Source Lazy Query |
|
3586
|
|
|
|
|
|
|
Where Filter Lazy Query |
|
3587
|
|
|
|
|
|
|
Select Projection Lazy Query |
|
3588
|
|
|
|
|
|
|
SelectMany Projection Lazy Query |
|
3589
|
|
|
|
|
|
|
Concat Concatenation Lazy Query |
|
3590
|
|
|
|
|
|
|
Zip Concatenation Lazy Query |
|
3591
|
|
|
|
|
|
|
Take Partitioning Lazy Query |
|
3592
|
|
|
|
|
|
|
Skip Partitioning Lazy Query |
|
3593
|
|
|
|
|
|
|
TakeWhile Partitioning Lazy Query |
|
3594
|
|
|
|
|
|
|
SkipWhile Partitioning Lazy Query |
|
3595
|
|
|
|
|
|
|
Distinct Set Operation Lazy (1st seq) Query |
|
3596
|
|
|
|
|
|
|
DefaultIfEmpty Conversion Lazy Query |
|
3597
|
|
|
|
|
|
|
OrderBy Ordering Eager (full) Query |
|
3598
|
|
|
|
|
|
|
OrderByDescending Ordering Eager (full) Query |
|
3599
|
|
|
|
|
|
|
OrderByStr Ordering Eager (full) Query |
|
3600
|
|
|
|
|
|
|
OrderByStrDescending Ordering Eager (full) Query |
|
3601
|
|
|
|
|
|
|
OrderByNum Ordering Eager (full) Query |
|
3602
|
|
|
|
|
|
|
OrderByNumDescending Ordering Eager (full) Query |
|
3603
|
|
|
|
|
|
|
Reverse Ordering Eager (full) Query |
|
3604
|
|
|
|
|
|
|
GroupBy Grouping Eager (full) Query |
|
3605
|
|
|
|
|
|
|
Union Set Operation Eager (2nd seq) Query |
|
3606
|
|
|
|
|
|
|
Intersect Set Operation Eager (2nd seq) Query |
|
3607
|
|
|
|
|
|
|
Except Set Operation Eager (2nd seq) Query |
|
3608
|
|
|
|
|
|
|
Join Join Eager (inner seq) Query |
|
3609
|
|
|
|
|
|
|
GroupJoin Join Eager (inner seq) Query |
|
3610
|
|
|
|
|
|
|
All Quantifier Lazy (early exit) Boolean |
|
3611
|
|
|
|
|
|
|
Any Quantifier Lazy (early exit) Boolean |
|
3612
|
|
|
|
|
|
|
Contains Quantifier Lazy (early exit) Boolean |
|
3613
|
|
|
|
|
|
|
SequenceEqual Comparison Lazy (early exit) Boolean |
|
3614
|
|
|
|
|
|
|
First Element Access Lazy (early exit) Element |
|
3615
|
|
|
|
|
|
|
FirstOrDefault Element Access Lazy (early exit) Element |
|
3616
|
|
|
|
|
|
|
Last Element Access Eager (full) Element |
|
3617
|
|
|
|
|
|
|
LastOrDefault Element Access Eager (full) Element |
|
3618
|
|
|
|
|
|
|
Single Element Access Lazy (stops at 2) Element |
|
3619
|
|
|
|
|
|
|
SingleOrDefault Element Access Lazy (stops at 2) Element |
|
3620
|
|
|
|
|
|
|
ElementAt Element Access Lazy (early exit) Element |
|
3621
|
|
|
|
|
|
|
ElementAtOrDefault Element Access Lazy (early exit) Element |
|
3622
|
|
|
|
|
|
|
Count Aggregation Eager (full) Integer |
|
3623
|
|
|
|
|
|
|
Sum Aggregation Eager (full) Number |
|
3624
|
|
|
|
|
|
|
Min Aggregation Eager (full) Number |
|
3625
|
|
|
|
|
|
|
Max Aggregation Eager (full) Number |
|
3626
|
|
|
|
|
|
|
Average Aggregation Eager (full) Number |
|
3627
|
|
|
|
|
|
|
AverageOrDefault Aggregation Eager (full) Number or undef |
|
3628
|
|
|
|
|
|
|
Aggregate Aggregation Eager (full) Scalar |
|
3629
|
|
|
|
|
|
|
ToArray Conversion Eager (full) Array |
|
3630
|
|
|
|
|
|
|
ToList Conversion Eager (full) ArrayRef |
|
3631
|
|
|
|
|
|
|
ToDictionary Conversion Eager (full) HashRef |
|
3632
|
|
|
|
|
|
|
ToLookup Conversion Eager (full) HashRef |
|
3633
|
|
|
|
|
|
|
ToLTSV Conversion Eager (full) (file written) |
|
3634
|
|
|
|
|
|
|
ForEach Utility Eager (full) (void) |
|
3635
|
|
|
|
|
|
|
|
|
3636
|
|
|
|
|
|
|
B |
|
3637
|
|
|
|
|
|
|
|
|
3638
|
|
|
|
|
|
|
=over 4 |
|
3639
|
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
=item * B - returns a new Query immediately; no data is read yet. |
|
3641
|
|
|
|
|
|
|
|
|
3642
|
|
|
|
|
|
|
=item * B - reads only as many elements as needed, then stops. |
|
3643
|
|
|
|
|
|
|
|
|
3644
|
|
|
|
|
|
|
=item * B - reads until it finds a second match, then stops. |
|
3645
|
|
|
|
|
|
|
|
|
3646
|
|
|
|
|
|
|
=item * B - must read the entire input sequence before returning. |
|
3647
|
|
|
|
|
|
|
|
|
3648
|
|
|
|
|
|
|
=item * B - the indicated sequence is read |
|
3649
|
|
|
|
|
|
|
in full up front; the other sequence remains lazy. |
|
3650
|
|
|
|
|
|
|
|
|
3651
|
|
|
|
|
|
|
=back |
|
3652
|
|
|
|
|
|
|
|
|
3653
|
|
|
|
|
|
|
B |
|
3654
|
|
|
|
|
|
|
|
|
3655
|
|
|
|
|
|
|
=over 4 |
|
3656
|
|
|
|
|
|
|
|
|
3657
|
|
|
|
|
|
|
=item * Chain lazy operations freely - no cost until a terminal is called. |
|
3658
|
|
|
|
|
|
|
|
|
3659
|
|
|
|
|
|
|
=item * Each terminal operation exhausts the iterator; to reuse data, call |
|
3660
|
|
|
|
|
|
|
C first and rebuild with C. |
|
3661
|
|
|
|
|
|
|
|
|
3662
|
|
|
|
|
|
|
=item * For very large files, avoid eager operations (C, C, |
|
3663
|
|
|
|
|
|
|
C, etc.) unless the data fits in memory, or pre-filter with C |
|
3664
|
|
|
|
|
|
|
to reduce the working set first. |
|
3665
|
|
|
|
|
|
|
|
|
3666
|
|
|
|
|
|
|
=back |
|
3667
|
|
|
|
|
|
|
|
|
3668
|
|
|
|
|
|
|
=head2 Query Execution Flow |
|
3669
|
|
|
|
|
|
|
|
|
3670
|
|
|
|
|
|
|
# Build query (lazy - no execution yet) |
|
3671
|
|
|
|
|
|
|
my $query = LTSV::LINQ->FromLTSV("access.log") |
|
3672
|
|
|
|
|
|
|
->Where(status => '200') # Lazy |
|
3673
|
|
|
|
|
|
|
->Select(sub { $_[0]{url} }) # Lazy |
|
3674
|
|
|
|
|
|
|
->Distinct(); # Lazy |
|
3675
|
|
|
|
|
|
|
|
|
3676
|
|
|
|
|
|
|
# Execute query (terminal operation) |
|
3677
|
|
|
|
|
|
|
my @results = $query->ToArray(); # Now executes entire chain |
|
3678
|
|
|
|
|
|
|
|
|
3679
|
|
|
|
|
|
|
B |
|
3680
|
|
|
|
|
|
|
|
|
3681
|
|
|
|
|
|
|
1. FromLTSV opens file and creates iterator |
|
3682
|
|
|
|
|
|
|
2. Where wraps iterator with filter |
|
3683
|
|
|
|
|
|
|
3. Select wraps with transformation |
|
3684
|
|
|
|
|
|
|
4. Distinct wraps with deduplication |
|
3685
|
|
|
|
|
|
|
5. ToArray pulls elements through chain |
|
3686
|
|
|
|
|
|
|
|
|
3687
|
|
|
|
|
|
|
Each element flows through the entire chain before the next element |
|
3688
|
|
|
|
|
|
|
is read. |
|
3689
|
|
|
|
|
|
|
|
|
3690
|
|
|
|
|
|
|
=head2 Memory Characteristics |
|
3691
|
|
|
|
|
|
|
|
|
3692
|
|
|
|
|
|
|
B |
|
3693
|
|
|
|
|
|
|
|
|
3694
|
|
|
|
|
|
|
These hold at most one element in memory at a time: |
|
3695
|
|
|
|
|
|
|
|
|
3696
|
|
|
|
|
|
|
=over 4 |
|
3697
|
|
|
|
|
|
|
|
|
3698
|
|
|
|
|
|
|
=item * Where, Select, SelectMany, Concat, Zip |
|
3699
|
|
|
|
|
|
|
|
|
3700
|
|
|
|
|
|
|
=item * Take, Skip, TakeWhile, SkipWhile |
|
3701
|
|
|
|
|
|
|
|
|
3702
|
|
|
|
|
|
|
=item * DefaultIfEmpty |
|
3703
|
|
|
|
|
|
|
|
|
3704
|
|
|
|
|
|
|
=item * ForEach, Count, Sum, Min, Max, Average, AverageOrDefault |
|
3705
|
|
|
|
|
|
|
|
|
3706
|
|
|
|
|
|
|
=item * First, FirstOrDefault, Any, All, Contains |
|
3707
|
|
|
|
|
|
|
|
|
3708
|
|
|
|
|
|
|
=item * Single, SingleOrDefault, ElementAt, ElementAtOrDefault |
|
3709
|
|
|
|
|
|
|
|
|
3710
|
|
|
|
|
|
|
=back |
|
3711
|
|
|
|
|
|
|
|
|
3712
|
|
|
|
|
|
|
B |
|
3713
|
|
|
|
|
|
|
|
|
3714
|
|
|
|
|
|
|
=over 4 |
|
3715
|
|
|
|
|
|
|
|
|
3716
|
|
|
|
|
|
|
=item * Distinct - hash grows with the number of distinct keys seen |
|
3717
|
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
=back |
|
3719
|
|
|
|
|
|
|
|
|
3720
|
|
|
|
|
|
|
B |
|
3721
|
|
|
|
|
|
|
|
|
3722
|
|
|
|
|
|
|
The following are partially eager: one sequence is buffered in full, |
|
3723
|
|
|
|
|
|
|
the other is streamed: |
|
3724
|
|
|
|
|
|
|
|
|
3725
|
|
|
|
|
|
|
=over 4 |
|
3726
|
|
|
|
|
|
|
|
|
3727
|
|
|
|
|
|
|
=item * Union, Intersect, Except - second sequence is fully loaded |
|
3728
|
|
|
|
|
|
|
|
|
3729
|
|
|
|
|
|
|
=item * Join, GroupJoin - inner sequence is fully loaded |
|
3730
|
|
|
|
|
|
|
|
|
3731
|
|
|
|
|
|
|
=back |
|
3732
|
|
|
|
|
|
|
|
|
3733
|
|
|
|
|
|
|
B |
|
3734
|
|
|
|
|
|
|
|
|
3735
|
|
|
|
|
|
|
=over 4 |
|
3736
|
|
|
|
|
|
|
|
|
3737
|
|
|
|
|
|
|
=item * ToArray, ToList, ToDictionary, ToLookup, ToLTSV (O(n)) |
|
3738
|
|
|
|
|
|
|
|
|
3739
|
|
|
|
|
|
|
=item * OrderBy, OrderByDescending and Str/Num variants, Reverse (O(n)) |
|
3740
|
|
|
|
|
|
|
|
|
3741
|
|
|
|
|
|
|
=item * GroupBy (O(n)) |
|
3742
|
|
|
|
|
|
|
|
|
3743
|
|
|
|
|
|
|
=item * Last, LastOrDefault (O(n)) |
|
3744
|
|
|
|
|
|
|
|
|
3745
|
|
|
|
|
|
|
=item * Aggregate (O(n), O(1) intermediate accumulator) |
|
3746
|
|
|
|
|
|
|
|
|
3747
|
|
|
|
|
|
|
=back |
|
3748
|
|
|
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
=head1 PERFORMANCE |
|
3750
|
|
|
|
|
|
|
|
|
3751
|
|
|
|
|
|
|
=head2 Memory Efficiency |
|
3752
|
|
|
|
|
|
|
|
|
3753
|
|
|
|
|
|
|
Lazy evaluation means memory usage is O(1) for most operations, |
|
3754
|
|
|
|
|
|
|
regardless of input size. |
|
3755
|
|
|
|
|
|
|
|
|
3756
|
|
|
|
|
|
|
# Processes 1GB file with constant memory |
|
3757
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("1gb.log") |
|
3758
|
|
|
|
|
|
|
->Where(status => '500') |
|
3759
|
|
|
|
|
|
|
->ForEach(sub { print $_[0]{url}, "\n" }); |
|
3760
|
|
|
|
|
|
|
|
|
3761
|
|
|
|
|
|
|
=head2 Terminal Operations |
|
3762
|
|
|
|
|
|
|
|
|
3763
|
|
|
|
|
|
|
These operations materialize the entire result set: |
|
3764
|
|
|
|
|
|
|
|
|
3765
|
|
|
|
|
|
|
=over 4 |
|
3766
|
|
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
=item * ToArray, ToList |
|
3768
|
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
=item * OrderBy, OrderByDescending, Reverse |
|
3770
|
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
=item * GroupBy |
|
3772
|
|
|
|
|
|
|
|
|
3773
|
|
|
|
|
|
|
=item * Last |
|
3774
|
|
|
|
|
|
|
|
|
3775
|
|
|
|
|
|
|
=back |
|
3776
|
|
|
|
|
|
|
|
|
3777
|
|
|
|
|
|
|
For large datasets, use these operations carefully. |
|
3778
|
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
=head2 Optimization Tips |
|
3780
|
|
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
=over 4 |
|
3782
|
|
|
|
|
|
|
|
|
3783
|
|
|
|
|
|
|
=item * Filter early: Place Where clauses first |
|
3784
|
|
|
|
|
|
|
|
|
3785
|
|
|
|
|
|
|
# Good: Filter before expensive operations |
|
3786
|
|
|
|
|
|
|
->Where(status => '200')->OrderBy(...)->Take(10) |
|
3787
|
|
|
|
|
|
|
|
|
3788
|
|
|
|
|
|
|
# Bad: Order all data, then filter |
|
3789
|
|
|
|
|
|
|
->OrderBy(...)->Where(status => '200')->Take(10) |
|
3790
|
|
|
|
|
|
|
|
|
3791
|
|
|
|
|
|
|
=item * Limit early: Use Take to reduce processing |
|
3792
|
|
|
|
|
|
|
|
|
3793
|
|
|
|
|
|
|
# Process only what you need |
|
3794
|
|
|
|
|
|
|
->Take(1000)->GroupBy(...) |
|
3795
|
|
|
|
|
|
|
|
|
3796
|
|
|
|
|
|
|
=item * Avoid repeated ToArray: Reuse results |
|
3797
|
|
|
|
|
|
|
|
|
3798
|
|
|
|
|
|
|
# Bad: Calls ToArray twice |
|
3799
|
|
|
|
|
|
|
my $count = scalar($query->ToArray()); |
|
3800
|
|
|
|
|
|
|
my @items = $query->ToArray(); |
|
3801
|
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
# Good: Call once, reuse |
|
3803
|
|
|
|
|
|
|
my @items = $query->ToArray(); |
|
3804
|
|
|
|
|
|
|
my $count = scalar(@items); |
|
3805
|
|
|
|
|
|
|
|
|
3806
|
|
|
|
|
|
|
=back |
|
3807
|
|
|
|
|
|
|
|
|
3808
|
|
|
|
|
|
|
=head1 COMPATIBILITY |
|
3809
|
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
=head2 Perl Version Support |
|
3811
|
|
|
|
|
|
|
|
|
3812
|
|
|
|
|
|
|
This module is compatible with B. |
|
3813
|
|
|
|
|
|
|
|
|
3814
|
|
|
|
|
|
|
Tested on: |
|
3815
|
|
|
|
|
|
|
|
|
3816
|
|
|
|
|
|
|
=over 4 |
|
3817
|
|
|
|
|
|
|
|
|
3818
|
|
|
|
|
|
|
=item * Perl 5.005_03 (released 1999) |
|
3819
|
|
|
|
|
|
|
|
|
3820
|
|
|
|
|
|
|
=item * Perl 5.6.x |
|
3821
|
|
|
|
|
|
|
|
|
3822
|
|
|
|
|
|
|
=item * Perl 5.8.x |
|
3823
|
|
|
|
|
|
|
|
|
3824
|
|
|
|
|
|
|
=item * Perl 5.10.x - 5.42.x |
|
3825
|
|
|
|
|
|
|
|
|
3826
|
|
|
|
|
|
|
=back |
|
3827
|
|
|
|
|
|
|
|
|
3828
|
|
|
|
|
|
|
=head2 Compatibility Policy |
|
3829
|
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
B |
|
3831
|
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
This module maintains compatibility with Perl 5.005_03 through careful |
|
3833
|
|
|
|
|
|
|
coding practices: |
|
3834
|
|
|
|
|
|
|
|
|
3835
|
|
|
|
|
|
|
=over 4 |
|
3836
|
|
|
|
|
|
|
|
|
3837
|
|
|
|
|
|
|
=item * No use of features introduced after 5.005 |
|
3838
|
|
|
|
|
|
|
|
|
3839
|
|
|
|
|
|
|
=item * C |
|
3840
|
|
|
|
|
|
|
|
|
3841
|
|
|
|
|
|
|
=item * C keyword avoided (5.6+ feature) |
|
3842
|
|
|
|
|
|
|
|
|
3843
|
|
|
|
|
|
|
=item * Three-argument C used on Perl 5.6 and later (two-argument form retained for 5.005_03) |
|
3844
|
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
=item * No Unicode features required |
|
3846
|
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
=item * No module dependencies beyond core |
|
3848
|
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
=back |
|
3850
|
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
B |
|
3852
|
|
|
|
|
|
|
|
|
3853
|
|
|
|
|
|
|
This module adheres to the B, which was the |
|
3854
|
|
|
|
|
|
|
final version of JPerl (Japanese Perl). This is not about using the old |
|
3855
|
|
|
|
|
|
|
interpreter, but about maintaining the B |
|
3856
|
|
|
|
|
|
|
that made Perl enjoyable. |
|
3857
|
|
|
|
|
|
|
|
|
3858
|
|
|
|
|
|
|
B |
|
3859
|
|
|
|
|
|
|
|
|
3860
|
|
|
|
|
|
|
Some people think the strength of modern times is the ability to use |
|
3861
|
|
|
|
|
|
|
modern technology. That thinking is insufficient. The strength of modern |
|
3862
|
|
|
|
|
|
|
times is the ability to use B technology up to the present day. |
|
3863
|
|
|
|
|
|
|
|
|
3864
|
|
|
|
|
|
|
By adhering to the Perl 5.005_03 specification, we gain access to the |
|
3865
|
|
|
|
|
|
|
entire history of Perl--from 5.005_03 to 5.42 and beyond--rather than |
|
3866
|
|
|
|
|
|
|
limiting ourselves to only the latest versions. |
|
3867
|
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
Key reasons: |
|
3869
|
|
|
|
|
|
|
|
|
3870
|
|
|
|
|
|
|
=over 4 |
|
3871
|
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
=item * B - The original Perl approach keeps programming fun and easy |
|
3873
|
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
Perl 5.6 and later introduced character encoding complexity that made |
|
3875
|
|
|
|
|
|
|
programming harder. The confusion around character handling contributed |
|
3876
|
|
|
|
|
|
|
to Perl's decline. By staying with the 5.005_03 specification, we maintain |
|
3877
|
|
|
|
|
|
|
the simplicity that made Perl "rakuda" (camel) -> "raku" (easy/fun). |
|
3878
|
|
|
|
|
|
|
|
|
3879
|
|
|
|
|
|
|
=item * B - Preserves the last JPerl version |
|
3880
|
|
|
|
|
|
|
|
|
3881
|
|
|
|
|
|
|
Perl 5.005_03 was the final version of JPerl, which handled Japanese text |
|
3882
|
|
|
|
|
|
|
naturally. Later versions abandoned this approach for Unicode, adding |
|
3883
|
|
|
|
|
|
|
unnecessary complexity for many use cases. |
|
3884
|
|
|
|
|
|
|
|
|
3885
|
|
|
|
|
|
|
=item * B - Runs on ANY Perl version |
|
3886
|
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
Code written to the 5.005_03 specification runs on B Perl versions |
|
3888
|
|
|
|
|
|
|
from 5.005_03 through 5.42 and beyond. This maximizes compatibility across |
|
3889
|
|
|
|
|
|
|
two decades of Perl releases. |
|
3890
|
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
=item * B - Real-world enterprise needs |
|
3892
|
|
|
|
|
|
|
|
|
3893
|
|
|
|
|
|
|
Many production systems, embedded environments, and enterprise deployments |
|
3894
|
|
|
|
|
|
|
still run Perl 5.005, 5.6, or 5.8. This module provides modern query |
|
3895
|
|
|
|
|
|
|
capabilities without requiring upgrades. |
|
3896
|
|
|
|
|
|
|
|
|
3897
|
|
|
|
|
|
|
=item * B - Programming should be enjoyable |
|
3898
|
|
|
|
|
|
|
|
|
3899
|
|
|
|
|
|
|
As readers of the "Camel Book" (Programming Perl) know, Perl was designed |
|
3900
|
|
|
|
|
|
|
to make programming enjoyable. The 5.005_03 specification preserves this |
|
3901
|
|
|
|
|
|
|
original vision. |
|
3902
|
|
|
|
|
|
|
|
|
3903
|
|
|
|
|
|
|
=back |
|
3904
|
|
|
|
|
|
|
|
|
3905
|
|
|
|
|
|
|
B |
|
3906
|
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
All modules under the ina CPAN account (including mb, Jacode, UTF8-R2, |
|
3908
|
|
|
|
|
|
|
mb-JSON, and this module) follow this principle: Write to the Perl 5.005_03 |
|
3909
|
|
|
|
|
|
|
specification, test on all versions, maintain programming joy. |
|
3910
|
|
|
|
|
|
|
|
|
3911
|
|
|
|
|
|
|
This is not nostalgia--it's a commitment to: |
|
3912
|
|
|
|
|
|
|
|
|
3913
|
|
|
|
|
|
|
=over 4 |
|
3914
|
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
=item * Simple, maintainable code |
|
3916
|
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
=item * Maximum compatibility |
|
3918
|
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
=item * The original Perl philosophy |
|
3920
|
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
=item * Making programming "raku" (easy and fun) |
|
3922
|
|
|
|
|
|
|
|
|
3923
|
|
|
|
|
|
|
=back |
|
3924
|
|
|
|
|
|
|
|
|
3925
|
|
|
|
|
|
|
B |
|
3926
|
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
This module uses C instead of traditional make, since Perl 5.005_03 |
|
3928
|
|
|
|
|
|
|
on Microsoft Windows lacks make. All tests pass on Perl 5.005_03 through |
|
3929
|
|
|
|
|
|
|
modern versions. |
|
3930
|
|
|
|
|
|
|
|
|
3931
|
|
|
|
|
|
|
=head2 .NET LINQ Compatibility |
|
3932
|
|
|
|
|
|
|
|
|
3933
|
|
|
|
|
|
|
This section documents where LTSV::LINQ's behaviour matches .NET LINQ |
|
3934
|
|
|
|
|
|
|
exactly, where it intentionally differs, and where it cannot differ due |
|
3935
|
|
|
|
|
|
|
to Perl's type system. |
|
3936
|
|
|
|
|
|
|
|
|
3937
|
|
|
|
|
|
|
B |
|
3938
|
|
|
|
|
|
|
|
|
3939
|
|
|
|
|
|
|
=over 4 |
|
3940
|
|
|
|
|
|
|
|
|
3941
|
|
|
|
|
|
|
=item * C - throws when sequence is empty or has more than one element |
|
3942
|
|
|
|
|
|
|
|
|
3943
|
|
|
|
|
|
|
=item * C, C - throw when sequence is empty or no element matches |
|
3944
|
|
|
|
|
|
|
|
|
3945
|
|
|
|
|
|
|
=item * C and C |
|
3946
|
|
|
|
|
|
|
- matching 2- and 3-argument forms |
|
3947
|
|
|
|
|
|
|
|
|
3948
|
|
|
|
|
|
|
=item * C - groups are returned in insertion order (first-seen key order) |
|
3949
|
|
|
|
|
|
|
|
|
3950
|
|
|
|
|
|
|
=item * C - every outer element appears even with zero inner matches |
|
3951
|
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
=item * C - inner join semantics; unmatched outer elements are dropped |
|
3953
|
|
|
|
|
|
|
|
|
3954
|
|
|
|
|
|
|
=item * C / C / C - partially eager (second/inner |
|
3955
|
|
|
|
|
|
|
sequence buffered up front), matching .NET's hash-join approach |
|
3956
|
|
|
|
|
|
|
|
|
3957
|
|
|
|
|
|
|
=item * C, C, C, C - identical semantics |
|
3958
|
|
|
|
|
|
|
|
|
3959
|
|
|
|
|
|
|
=item * C / C with early exit |
|
3960
|
|
|
|
|
|
|
|
|
3961
|
|
|
|
|
|
|
=back |
|
3962
|
|
|
|
|
|
|
|
|
3963
|
|
|
|
|
|
|
B |
|
3964
|
|
|
|
|
|
|
|
|
3965
|
|
|
|
|
|
|
=over 4 |
|
3966
|
|
|
|
|
|
|
|
|
3967
|
|
|
|
|
|
|
=item * C |
|
3968
|
|
|
|
|
|
|
|
|
3969
|
|
|
|
|
|
|
.NET throws C when the sequence contains more |
|
3970
|
|
|
|
|
|
|
than one element. LTSV::LINQ returns C instead. This makes it |
|
3971
|
|
|
|
|
|
|
more natural in Perl code that checks return values with C. |
|
3972
|
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
If you require strict .NET behaviour (exception on multiple elements), |
|
3974
|
|
|
|
|
|
|
use C inside an C: |
|
3975
|
|
|
|
|
|
|
|
|
3976
|
|
|
|
|
|
|
my $val = eval { $query->Single() }; |
|
3977
|
|
|
|
|
|
|
# $val is undef and $@ is set if empty or multiple |
|
3978
|
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
=item * C |
|
3980
|
|
|
|
|
|
|
|
|
3981
|
|
|
|
|
|
|
.NET's C can return a sequence containing C |
|
3982
|
|
|
|
|
|
|
(the reference-type default). LTSV::LINQ cannot: the iterator protocol |
|
3983
|
|
|
|
|
|
|
uses C to signal end-of-sequence, so a default value of C |
|
3984
|
|
|
|
|
|
|
is indistinguishable from EOF and is silently lost. |
|
3985
|
|
|
|
|
|
|
|
|
3986
|
|
|
|
|
|
|
# .NET: seq.DefaultIfEmpty() produces one null element |
|
3987
|
|
|
|
|
|
|
# Perl: |
|
3988
|
|
|
|
|
|
|
LTSV::LINQ->From([])->DefaultIfEmpty(undef)->ToArray() # () - empty! |
|
3989
|
|
|
|
|
|
|
LTSV::LINQ->From([])->DefaultIfEmpty(0)->ToArray() # (0) - works |
|
3990
|
|
|
|
|
|
|
|
|
3991
|
|
|
|
|
|
|
Use a sentinel value (C<0>, C<''>, C<{}>) and handle it explicitly. |
|
3992
|
|
|
|
|
|
|
|
|
3993
|
|
|
|
|
|
|
=item * C smart comparison |
|
3994
|
|
|
|
|
|
|
|
|
3995
|
|
|
|
|
|
|
.NET's C is strongly typed: the key type determines the |
|
3996
|
|
|
|
|
|
|
comparison. In Perl there is no static type, so LTSV::LINQ's C |
|
3997
|
|
|
|
|
|
|
uses a heuristic: if both keys look like numbers, C=E> is used; |
|
3998
|
|
|
|
|
|
|
otherwise C. For explicit control, use C (always C) |
|
3999
|
|
|
|
|
|
|
or C (always C=E>). |
|
4000
|
|
|
|
|
|
|
|
|
4001
|
|
|
|
|
|
|
=item * EqualityComparer / IComparer |
|
4002
|
|
|
|
|
|
|
|
|
4003
|
|
|
|
|
|
|
.NET LINQ accepts C and C interface objects |
|
4004
|
|
|
|
|
|
|
for custom equality and ordering. LTSV::LINQ uses code references (C) |
|
4005
|
|
|
|
|
|
|
that extract a I from each element. This is equivalent in power but |
|
4006
|
|
|
|
|
|
|
different in calling convention: the sub receives one element and returns a |
|
4007
|
|
|
|
|
|
|
key, rather than receiving two elements and returning a comparison result. |
|
4008
|
|
|
|
|
|
|
|
|
4009
|
|
|
|
|
|
|
=item * C on typed sequences |
|
4010
|
|
|
|
|
|
|
|
|
4011
|
|
|
|
|
|
|
.NET's C is type-checked. LTSV::LINQ accepts any two sequences |
|
4012
|
|
|
|
|
|
|
regardless of element type. |
|
4013
|
|
|
|
|
|
|
|
|
4014
|
|
|
|
|
|
|
=item * No query expression syntax |
|
4015
|
|
|
|
|
|
|
|
|
4016
|
|
|
|
|
|
|
.NET's C syntax compiles to LINQ |
|
4017
|
|
|
|
|
|
|
method calls. Perl has no equivalent; use method chaining directly. |
|
4018
|
|
|
|
|
|
|
|
|
4019
|
|
|
|
|
|
|
=back |
|
4020
|
|
|
|
|
|
|
|
|
4021
|
|
|
|
|
|
|
=head2 Pure Perl Implementation |
|
4022
|
|
|
|
|
|
|
|
|
4023
|
|
|
|
|
|
|
B |
|
4024
|
|
|
|
|
|
|
|
|
4025
|
|
|
|
|
|
|
This module is implemented in Pure Perl with no XS (C extensions). |
|
4026
|
|
|
|
|
|
|
Benefits: |
|
4027
|
|
|
|
|
|
|
|
|
4028
|
|
|
|
|
|
|
=over 4 |
|
4029
|
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
=item * Works on any Perl installation |
|
4031
|
|
|
|
|
|
|
|
|
4032
|
|
|
|
|
|
|
=item * No C compiler required |
|
4033
|
|
|
|
|
|
|
|
|
4034
|
|
|
|
|
|
|
=item * Easy installation in restricted environments |
|
4035
|
|
|
|
|
|
|
|
|
4036
|
|
|
|
|
|
|
=item * Consistent behavior across platforms |
|
4037
|
|
|
|
|
|
|
|
|
4038
|
|
|
|
|
|
|
=item * Simpler debugging and maintenance |
|
4039
|
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
=back |
|
4041
|
|
|
|
|
|
|
|
|
4042
|
|
|
|
|
|
|
=head2 Core Module Dependencies |
|
4043
|
|
|
|
|
|
|
|
|
4044
|
|
|
|
|
|
|
B This module uses only Perl core features available since 5.005. |
|
4045
|
|
|
|
|
|
|
|
|
4046
|
|
|
|
|
|
|
No CPAN dependencies required. |
|
4047
|
|
|
|
|
|
|
|
|
4048
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
|
4049
|
|
|
|
|
|
|
|
|
4050
|
|
|
|
|
|
|
=head2 Error Messages |
|
4051
|
|
|
|
|
|
|
|
|
4052
|
|
|
|
|
|
|
This module may throw the following exceptions: |
|
4053
|
|
|
|
|
|
|
|
|
4054
|
|
|
|
|
|
|
=over 4 |
|
4055
|
|
|
|
|
|
|
|
|
4056
|
|
|
|
|
|
|
=item C |
|
4057
|
|
|
|
|
|
|
|
|
4058
|
|
|
|
|
|
|
Thrown by From() when the argument is not an array reference. |
|
4059
|
|
|
|
|
|
|
|
|
4060
|
|
|
|
|
|
|
Example: |
|
4061
|
|
|
|
|
|
|
|
|
4062
|
|
|
|
|
|
|
LTSV::LINQ->From("string"); # Dies |
|
4063
|
|
|
|
|
|
|
LTSV::LINQ->From([1, 2, 3]); # OK |
|
4064
|
|
|
|
|
|
|
|
|
4065
|
|
|
|
|
|
|
=item C |
|
4066
|
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
Thrown by SelectMany() when the selector function returns anything |
|
4068
|
|
|
|
|
|
|
other than an ARRAY reference. Wrap the return value in C<[...]>: |
|
4069
|
|
|
|
|
|
|
|
|
4070
|
|
|
|
|
|
|
# Wrong - hashref causes die |
|
4071
|
|
|
|
|
|
|
->SelectMany(sub { {key => 'val'} }) |
|
4072
|
|
|
|
|
|
|
|
|
4073
|
|
|
|
|
|
|
# Correct - arrayref |
|
4074
|
|
|
|
|
|
|
->SelectMany(sub { [{key => 'val'}] }) |
|
4075
|
|
|
|
|
|
|
|
|
4076
|
|
|
|
|
|
|
# Correct - empty array for "no results" case |
|
4077
|
|
|
|
|
|
|
->SelectMany(sub { [] }) |
|
4078
|
|
|
|
|
|
|
|
|
4079
|
|
|
|
|
|
|
=item C |
|
4080
|
|
|
|
|
|
|
|
|
4081
|
|
|
|
|
|
|
Thrown by First(), Last(), or Average() when called on an empty sequence. |
|
4082
|
|
|
|
|
|
|
|
|
4083
|
|
|
|
|
|
|
Methods that throw this error: |
|
4084
|
|
|
|
|
|
|
|
|
4085
|
|
|
|
|
|
|
=over 4 |
|
4086
|
|
|
|
|
|
|
|
|
4087
|
|
|
|
|
|
|
=item * First() |
|
4088
|
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
=item * Last() |
|
4090
|
|
|
|
|
|
|
|
|
4091
|
|
|
|
|
|
|
=item * Average() |
|
4092
|
|
|
|
|
|
|
|
|
4093
|
|
|
|
|
|
|
=back |
|
4094
|
|
|
|
|
|
|
|
|
4095
|
|
|
|
|
|
|
To avoid this error, use the OrDefault variants: |
|
4096
|
|
|
|
|
|
|
|
|
4097
|
|
|
|
|
|
|
=over 4 |
|
4098
|
|
|
|
|
|
|
|
|
4099
|
|
|
|
|
|
|
=item * FirstOrDefault() - returns undef instead of dying |
|
4100
|
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
=item * LastOrDefault() - returns undef instead of dying |
|
4102
|
|
|
|
|
|
|
|
|
4103
|
|
|
|
|
|
|
=item * AverageOrDefault() - returns undef instead of dying |
|
4104
|
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
=back |
|
4106
|
|
|
|
|
|
|
|
|
4107
|
|
|
|
|
|
|
Example: |
|
4108
|
|
|
|
|
|
|
|
|
4109
|
|
|
|
|
|
|
my @empty = (); |
|
4110
|
|
|
|
|
|
|
LTSV::LINQ->From(\@empty)->First(); # Dies |
|
4111
|
|
|
|
|
|
|
LTSV::LINQ->From(\@empty)->FirstOrDefault(); # Returns undef |
|
4112
|
|
|
|
|
|
|
|
|
4113
|
|
|
|
|
|
|
=item C |
|
4114
|
|
|
|
|
|
|
|
|
4115
|
|
|
|
|
|
|
Thrown by First() or Last() with a predicate when no element matches. |
|
4116
|
|
|
|
|
|
|
|
|
4117
|
|
|
|
|
|
|
Example: |
|
4118
|
|
|
|
|
|
|
|
|
4119
|
|
|
|
|
|
|
my @data = (1, 2, 3); |
|
4120
|
|
|
|
|
|
|
LTSV::LINQ->From(\@data)->First(sub { $_[0] > 10 }); # Dies |
|
4121
|
|
|
|
|
|
|
LTSV::LINQ->From(\@data)->FirstOrDefault(sub { $_[0] > 10 }); # Returns undef |
|
4122
|
|
|
|
|
|
|
|
|
4123
|
|
|
|
|
|
|
=item C |
|
4124
|
|
|
|
|
|
|
|
|
4125
|
|
|
|
|
|
|
File I/O error when FromLTSV() cannot open the specified file. |
|
4126
|
|
|
|
|
|
|
|
|
4127
|
|
|
|
|
|
|
Common causes: |
|
4128
|
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
=over 4 |
|
4130
|
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
=item * File does not exist |
|
4132
|
|
|
|
|
|
|
|
|
4133
|
|
|
|
|
|
|
=item * Insufficient permissions |
|
4134
|
|
|
|
|
|
|
|
|
4135
|
|
|
|
|
|
|
=item * Invalid path |
|
4136
|
|
|
|
|
|
|
|
|
4137
|
|
|
|
|
|
|
=back |
|
4138
|
|
|
|
|
|
|
|
|
4139
|
|
|
|
|
|
|
Example: |
|
4140
|
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("/nonexistent/file.ltsv"); # Dies with this error |
|
4142
|
|
|
|
|
|
|
|
|
4143
|
|
|
|
|
|
|
=back |
|
4144
|
|
|
|
|
|
|
|
|
4145
|
|
|
|
|
|
|
=head2 Methods That May Throw Exceptions |
|
4146
|
|
|
|
|
|
|
|
|
4147
|
|
|
|
|
|
|
=over 4 |
|
4148
|
|
|
|
|
|
|
|
|
4149
|
|
|
|
|
|
|
=item B |
|
4150
|
|
|
|
|
|
|
|
|
4151
|
|
|
|
|
|
|
Dies if argument is not an array reference. |
|
4152
|
|
|
|
|
|
|
|
|
4153
|
|
|
|
|
|
|
=item B |
|
4154
|
|
|
|
|
|
|
|
|
4155
|
|
|
|
|
|
|
Dies if file cannot be opened. |
|
4156
|
|
|
|
|
|
|
|
|
4157
|
|
|
|
|
|
|
B The file handle is held open until the iterator is fully |
|
4158
|
|
|
|
|
|
|
consumed. Partially consumed queries keep their file handles open. |
|
4159
|
|
|
|
|
|
|
See C in L for details. |
|
4160
|
|
|
|
|
|
|
|
|
4161
|
|
|
|
|
|
|
=item B |
|
4162
|
|
|
|
|
|
|
|
|
4163
|
|
|
|
|
|
|
Dies if sequence is empty or no element matches predicate. |
|
4164
|
|
|
|
|
|
|
|
|
4165
|
|
|
|
|
|
|
Safe alternative: FirstOrDefault() |
|
4166
|
|
|
|
|
|
|
|
|
4167
|
|
|
|
|
|
|
=item B |
|
4168
|
|
|
|
|
|
|
|
|
4169
|
|
|
|
|
|
|
Dies if sequence is empty or no element matches predicate. |
|
4170
|
|
|
|
|
|
|
|
|
4171
|
|
|
|
|
|
|
Safe alternative: LastOrDefault() |
|
4172
|
|
|
|
|
|
|
|
|
4173
|
|
|
|
|
|
|
=item B |
|
4174
|
|
|
|
|
|
|
|
|
4175
|
|
|
|
|
|
|
Dies if sequence is empty. |
|
4176
|
|
|
|
|
|
|
|
|
4177
|
|
|
|
|
|
|
Safe alternative: AverageOrDefault() |
|
4178
|
|
|
|
|
|
|
|
|
4179
|
|
|
|
|
|
|
=back |
|
4180
|
|
|
|
|
|
|
|
|
4181
|
|
|
|
|
|
|
=head2 Safe Alternatives |
|
4182
|
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
For methods that may throw exceptions, use the OrDefault variants: |
|
4184
|
|
|
|
|
|
|
|
|
4185
|
|
|
|
|
|
|
First() -> FirstOrDefault() (returns undef) |
|
4186
|
|
|
|
|
|
|
Last() -> LastOrDefault() (returns undef) |
|
4187
|
|
|
|
|
|
|
Average() -> AverageOrDefault() (returns undef) |
|
4188
|
|
|
|
|
|
|
|
|
4189
|
|
|
|
|
|
|
Example: |
|
4190
|
|
|
|
|
|
|
|
|
4191
|
|
|
|
|
|
|
# Unsafe - may die |
|
4192
|
|
|
|
|
|
|
my $first = LTSV::LINQ->From(\@data)->First(); |
|
4193
|
|
|
|
|
|
|
|
|
4194
|
|
|
|
|
|
|
# Safe - returns undef if empty |
|
4195
|
|
|
|
|
|
|
my $first = LTSV::LINQ->From(\@data)->FirstOrDefault(); |
|
4196
|
|
|
|
|
|
|
if (defined $first) { |
|
4197
|
|
|
|
|
|
|
# Process $first |
|
4198
|
|
|
|
|
|
|
} |
|
4199
|
|
|
|
|
|
|
|
|
4200
|
|
|
|
|
|
|
=head2 Exception Format and Stack Traces |
|
4201
|
|
|
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
All exceptions thrown by this module are plain strings produced by |
|
4203
|
|
|
|
|
|
|
C. Because no trailing newline is appended, Perl |
|
4204
|
|
|
|
|
|
|
automatically appends the source location: |
|
4205
|
|
|
|
|
|
|
|
|
4206
|
|
|
|
|
|
|
Sequence contains no elements at lib/LTSV/LINQ.pm line 764. |
|
4207
|
|
|
|
|
|
|
|
|
4208
|
|
|
|
|
|
|
This is intentional: the location helps when diagnosing unexpected |
|
4209
|
|
|
|
|
|
|
failures during development. |
|
4210
|
|
|
|
|
|
|
|
|
4211
|
|
|
|
|
|
|
When catching exceptions with C, the full string including the |
|
4212
|
|
|
|
|
|
|
location suffix is available in C<$@>. Use a prefix match if you want |
|
4213
|
|
|
|
|
|
|
to test only the message text: |
|
4214
|
|
|
|
|
|
|
|
|
4215
|
|
|
|
|
|
|
eval { LTSV::LINQ->From([])->First() }; |
|
4216
|
|
|
|
|
|
|
if ($@ =~ /^Sequence contains no elements/) { |
|
4217
|
|
|
|
|
|
|
# handle empty sequence |
|
4218
|
|
|
|
|
|
|
} |
|
4219
|
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
If you prefer exceptions without the location suffix, wrap the call |
|
4221
|
|
|
|
|
|
|
in a thin eval and re-die with a newline: |
|
4222
|
|
|
|
|
|
|
|
|
4223
|
|
|
|
|
|
|
eval { $result = $query->First() }; |
|
4224
|
|
|
|
|
|
|
die "$@\n" if $@; # strip " at ... line N" from the message |
|
4225
|
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
=head1 FAQ |
|
4227
|
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
=head2 General Questions |
|
4229
|
|
|
|
|
|
|
|
|
4230
|
|
|
|
|
|
|
=over 4 |
|
4231
|
|
|
|
|
|
|
|
|
4232
|
|
|
|
|
|
|
=item B |
|
4233
|
|
|
|
|
|
|
|
|
4234
|
|
|
|
|
|
|
A: LINQ provides: |
|
4235
|
|
|
|
|
|
|
|
|
4236
|
|
|
|
|
|
|
=over 4 |
|
4237
|
|
|
|
|
|
|
|
|
4238
|
|
|
|
|
|
|
=item * Method chaining (more Perl-like) |
|
4239
|
|
|
|
|
|
|
|
|
4240
|
|
|
|
|
|
|
=item * Type safety through code |
|
4241
|
|
|
|
|
|
|
|
|
4242
|
|
|
|
|
|
|
=item * No string parsing required |
|
4243
|
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
=item * Composable queries |
|
4245
|
|
|
|
|
|
|
|
|
4246
|
|
|
|
|
|
|
=back |
|
4247
|
|
|
|
|
|
|
|
|
4248
|
|
|
|
|
|
|
=item B |
|
4249
|
|
|
|
|
|
|
|
|
4250
|
|
|
|
|
|
|
A: No. Query objects use iterators that can only be consumed once. |
|
4251
|
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
# Wrong - iterator consumed by first ToArray |
|
4253
|
|
|
|
|
|
|
my $query = LTSV::LINQ->FromLTSV("file.ltsv"); |
|
4254
|
|
|
|
|
|
|
my @first = $query->ToArray(); # OK |
|
4255
|
|
|
|
|
|
|
my @second = $query->ToArray(); # Empty! Iterator exhausted |
|
4256
|
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
# Right - create new query for each use |
|
4258
|
|
|
|
|
|
|
my $query1 = LTSV::LINQ->FromLTSV("file.ltsv"); |
|
4259
|
|
|
|
|
|
|
my @first = $query1->ToArray(); |
|
4260
|
|
|
|
|
|
|
|
|
4261
|
|
|
|
|
|
|
my $query2 = LTSV::LINQ->FromLTSV("file.ltsv"); |
|
4262
|
|
|
|
|
|
|
my @second = $query2->ToArray(); |
|
4263
|
|
|
|
|
|
|
|
|
4264
|
|
|
|
|
|
|
=item B |
|
4265
|
|
|
|
|
|
|
|
|
4266
|
|
|
|
|
|
|
A: Use code reference form with C<||>: |
|
4267
|
|
|
|
|
|
|
|
|
4268
|
|
|
|
|
|
|
# OR condition requires code reference |
|
4269
|
|
|
|
|
|
|
->Where(sub { |
|
4270
|
|
|
|
|
|
|
$_[0]{status} == 200 || $_[0]{status} == 304 |
|
4271
|
|
|
|
|
|
|
}) |
|
4272
|
|
|
|
|
|
|
|
|
4273
|
|
|
|
|
|
|
# DSL only supports AND |
|
4274
|
|
|
|
|
|
|
->Where(status => '200') # Single condition only |
|
4275
|
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
=item B |
|
4277
|
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
A: Some operations require multiple passes: |
|
4279
|
|
|
|
|
|
|
|
|
4280
|
|
|
|
|
|
|
# This reads the file TWICE |
|
4281
|
|
|
|
|
|
|
my $avg = $query->Average(...); # Pass 1: Calculate |
|
4282
|
|
|
|
|
|
|
my @all = $query->ToArray(); # Pass 2: Collect (iterator reset!) |
|
4283
|
|
|
|
|
|
|
|
|
4284
|
|
|
|
|
|
|
# Save result instead |
|
4285
|
|
|
|
|
|
|
my @all = $query->ToArray(); |
|
4286
|
|
|
|
|
|
|
my $avg = LTSV::LINQ->From(\@all)->Average(...); |
|
4287
|
|
|
|
|
|
|
|
|
4288
|
|
|
|
|
|
|
=back |
|
4289
|
|
|
|
|
|
|
|
|
4290
|
|
|
|
|
|
|
=head2 Performance Questions |
|
4291
|
|
|
|
|
|
|
|
|
4292
|
|
|
|
|
|
|
=over 4 |
|
4293
|
|
|
|
|
|
|
|
|
4294
|
|
|
|
|
|
|
=item B |
|
4295
|
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
A: Use lazy operations and avoid materializing: |
|
4297
|
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
# Good - constant memory |
|
4299
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("huge.log") |
|
4300
|
|
|
|
|
|
|
->Where(status => '500') |
|
4301
|
|
|
|
|
|
|
->ForEach(sub { print $_[0]{message}, "\n" }); |
|
4302
|
|
|
|
|
|
|
|
|
4303
|
|
|
|
|
|
|
# Bad - loads everything into memory |
|
4304
|
|
|
|
|
|
|
my @all = LTSV::LINQ->FromLTSV("huge.log")->ToArray(); |
|
4305
|
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
=item B |
|
4307
|
|
|
|
|
|
|
|
|
4308
|
|
|
|
|
|
|
A: OrderBy must load all elements into memory to sort them. |
|
4309
|
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
# Slow on 1GB file - loads everything |
|
4311
|
|
|
|
|
|
|
->OrderBy(sub { $_[0]{timestamp} })->Take(10) |
|
4312
|
|
|
|
|
|
|
|
|
4313
|
|
|
|
|
|
|
# Faster - limit before sorting (if possible) |
|
4314
|
|
|
|
|
|
|
->Where(status => '500')->OrderBy(...)->Take(10) |
|
4315
|
|
|
|
|
|
|
|
|
4316
|
|
|
|
|
|
|
=item B |
|
4317
|
|
|
|
|
|
|
|
|
4318
|
|
|
|
|
|
|
A: Use ForEach or streaming terminal operations: |
|
4319
|
|
|
|
|
|
|
|
|
4320
|
|
|
|
|
|
|
# Process 100GB file with 1KB memory |
|
4321
|
|
|
|
|
|
|
my $error_count = 0; |
|
4322
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("100gb.log") |
|
4323
|
|
|
|
|
|
|
->Where(sub { $_[0]{level} eq 'ERROR' }) |
|
4324
|
|
|
|
|
|
|
->ForEach(sub { $error_count++ }); |
|
4325
|
|
|
|
|
|
|
|
|
4326
|
|
|
|
|
|
|
print "Errors: $error_count\n"; |
|
4327
|
|
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
=back |
|
4329
|
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
=head2 DSL Questions |
|
4331
|
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
=over 4 |
|
4333
|
|
|
|
|
|
|
|
|
4334
|
|
|
|
|
|
|
=item B |
|
4335
|
|
|
|
|
|
|
|
|
4336
|
|
|
|
|
|
|
A: No. DSL uses string equality (C). Use code reference for numeric: |
|
4337
|
|
|
|
|
|
|
|
|
4338
|
|
|
|
|
|
|
# DSL - string comparison |
|
4339
|
|
|
|
|
|
|
->Where(status => '200') # $_[0]{status} eq '200' |
|
4340
|
|
|
|
|
|
|
|
|
4341
|
|
|
|
|
|
|
# Code ref - numeric comparison |
|
4342
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} == 200 }) |
|
4343
|
|
|
|
|
|
|
->Where(sub { $_[0]{bytes} > 1000 }) |
|
4344
|
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
=item B |
|
4346
|
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
A: DSL doesn't support it. Use code reference: |
|
4348
|
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
# Case-insensitive requires code reference |
|
4350
|
|
|
|
|
|
|
->Where(sub { lc($_[0]{method}) eq 'get' }) |
|
4351
|
|
|
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
=item B |
|
4353
|
|
|
|
|
|
|
|
|
4354
|
|
|
|
|
|
|
A: No. Use code reference: |
|
4355
|
|
|
|
|
|
|
|
|
4356
|
|
|
|
|
|
|
# Regex requires code reference |
|
4357
|
|
|
|
|
|
|
->Where(sub { $_[0]{url} =~ m{^/api/} }) |
|
4358
|
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
=back |
|
4360
|
|
|
|
|
|
|
|
|
4361
|
|
|
|
|
|
|
=head2 Compatibility Questions |
|
4362
|
|
|
|
|
|
|
|
|
4363
|
|
|
|
|
|
|
=over 4 |
|
4364
|
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
=item B |
|
4366
|
|
|
|
|
|
|
|
|
4367
|
|
|
|
|
|
|
A: Yes. Tested on Perl 5.005_03 through 5.40+. |
|
4368
|
|
|
|
|
|
|
|
|
4369
|
|
|
|
|
|
|
=item B |
|
4370
|
|
|
|
|
|
|
|
|
4371
|
|
|
|
|
|
|
A: No. Pure Perl with no dependencies beyond core. |
|
4372
|
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
=item B |
|
4374
|
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
A: Yes. Pure Perl works on all platforms. |
|
4376
|
|
|
|
|
|
|
|
|
4377
|
|
|
|
|
|
|
=item B |
|
4378
|
|
|
|
|
|
|
|
|
4379
|
|
|
|
|
|
|
A: Many production systems cannot upgrade. This module provides |
|
4380
|
|
|
|
|
|
|
modern query capabilities without requiring upgrades. |
|
4381
|
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
=back |
|
4383
|
|
|
|
|
|
|
|
|
4384
|
|
|
|
|
|
|
=head1 COOKBOOK |
|
4385
|
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
=head2 Common Patterns |
|
4387
|
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
=over 4 |
|
4389
|
|
|
|
|
|
|
|
|
4390
|
|
|
|
|
|
|
=item B |
|
4391
|
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
->OrderByDescending(sub { $_[0]{score} }) |
|
4393
|
|
|
|
|
|
|
->Take(10) |
|
4394
|
|
|
|
|
|
|
->ToArray() |
|
4395
|
|
|
|
|
|
|
|
|
4396
|
|
|
|
|
|
|
=item B |
|
4397
|
|
|
|
|
|
|
|
|
4398
|
|
|
|
|
|
|
->GroupBy(sub { $_[0]{category} }) |
|
4399
|
|
|
|
|
|
|
->Select(sub { |
|
4400
|
|
|
|
|
|
|
{ |
|
4401
|
|
|
|
|
|
|
Category => $_[0]{Key}, |
|
4402
|
|
|
|
|
|
|
Count => scalar(@{$_[0]{Elements}}) |
|
4403
|
|
|
|
|
|
|
} |
|
4404
|
|
|
|
|
|
|
}) |
|
4405
|
|
|
|
|
|
|
->ToArray() |
|
4406
|
|
|
|
|
|
|
|
|
4407
|
|
|
|
|
|
|
=item B |
|
4408
|
|
|
|
|
|
|
|
|
4409
|
|
|
|
|
|
|
my $total = 0; |
|
4410
|
|
|
|
|
|
|
->Select(sub { |
|
4411
|
|
|
|
|
|
|
$total += $_[0]{amount}; |
|
4412
|
|
|
|
|
|
|
{ %{$_[0]}, running_total => $total } |
|
4413
|
|
|
|
|
|
|
}) |
|
4414
|
|
|
|
|
|
|
|
|
4415
|
|
|
|
|
|
|
=item B |
|
4416
|
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
# Page 3, size 20 |
|
4418
|
|
|
|
|
|
|
->Skip(40)->Take(20)->ToArray() |
|
4419
|
|
|
|
|
|
|
|
|
4420
|
|
|
|
|
|
|
=item B |
|
4421
|
|
|
|
|
|
|
|
|
4422
|
|
|
|
|
|
|
->Select(sub { $_[0]{category} }) |
|
4423
|
|
|
|
|
|
|
->Distinct() |
|
4424
|
|
|
|
|
|
|
->ToArray() |
|
4425
|
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
=item B |
|
4427
|
|
|
|
|
|
|
|
|
4428
|
|
|
|
|
|
|
Note: A query object can only be consumed once. To compute multiple |
|
4429
|
|
|
|
|
|
|
aggregations over the same source, materialise it first with C. |
|
4430
|
|
|
|
|
|
|
|
|
4431
|
|
|
|
|
|
|
my @all = LTSV::LINQ->FromLTSV("access.log")->ToArray(); |
|
4432
|
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
my $success_avg = LTSV::LINQ->From(\@all) |
|
4434
|
|
|
|
|
|
|
->Where(status => '200') |
|
4435
|
|
|
|
|
|
|
->Average(sub { $_[0]{response_time} }); |
|
4436
|
|
|
|
|
|
|
|
|
4437
|
|
|
|
|
|
|
my $error_avg = LTSV::LINQ->From(\@all) |
|
4438
|
|
|
|
|
|
|
->Where(sub { $_[0]{status} >= 400 }) |
|
4439
|
|
|
|
|
|
|
->Average(sub { $_[0]{response_time} }); |
|
4440
|
|
|
|
|
|
|
|
|
4441
|
|
|
|
|
|
|
=item B |
|
4442
|
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
A query object wraps a single-pass iterator. Once consumed, it is |
|
4444
|
|
|
|
|
|
|
exhausted and subsequent terminal operations return empty results or die. |
|
4445
|
|
|
|
|
|
|
|
|
4446
|
|
|
|
|
|
|
# WRONG - $q is exhausted after the first Count() |
|
4447
|
|
|
|
|
|
|
my $q = LTSV::LINQ->FromLTSV("access.log")->Where(status => '200'); |
|
4448
|
|
|
|
|
|
|
my $n = $q->Count(); # OK |
|
4449
|
|
|
|
|
|
|
my $first = $q->First(); # WRONG: iterator already at EOF |
|
4450
|
|
|
|
|
|
|
|
|
4451
|
|
|
|
|
|
|
# RIGHT - snapshot into array, then query as many times as needed |
|
4452
|
|
|
|
|
|
|
my @rows = LTSV::LINQ->FromLTSV("access.log")->Where(status => '200')->ToArray(); |
|
4453
|
|
|
|
|
|
|
my $n = LTSV::LINQ->From(\@rows)->Count(); |
|
4454
|
|
|
|
|
|
|
my $first = LTSV::LINQ->From(\@rows)->First(); |
|
4455
|
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
The snapshot approach is also the correct pattern for any multi-pass |
|
4457
|
|
|
|
|
|
|
computation such as computing both average and standard deviation, |
|
4458
|
|
|
|
|
|
|
comparing the same sequence against two different filters, or iterating |
|
4459
|
|
|
|
|
|
|
once to validate and once to transform. |
|
4460
|
|
|
|
|
|
|
|
|
4461
|
|
|
|
|
|
|
=item B |
|
4462
|
|
|
|
|
|
|
|
|
4463
|
|
|
|
|
|
|
For files too large to fit in memory, keep the chain fully lazy by |
|
4464
|
|
|
|
|
|
|
ensuring only one terminal operation is performed per pass: |
|
4465
|
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
# One pass - pick only what you need |
|
4467
|
|
|
|
|
|
|
my @slow = LTSV::LINQ->FromLTSV("access.log") |
|
4468
|
|
|
|
|
|
|
->Where(sub { $_[0]{response_time} > 1000 }) |
|
4469
|
|
|
|
|
|
|
->OrderByNum(sub { $_[0]{response_time} }) |
|
4470
|
|
|
|
|
|
|
->Take(20) |
|
4471
|
|
|
|
|
|
|
->ToArray(); |
|
4472
|
|
|
|
|
|
|
|
|
4473
|
|
|
|
|
|
|
# Never do two passes on the same FromLTSV object - |
|
4474
|
|
|
|
|
|
|
# open the file again for a second pass: |
|
4475
|
|
|
|
|
|
|
my $count = LTSV::LINQ->FromLTSV("access.log")->Count(); |
|
4476
|
|
|
|
|
|
|
my $sum = LTSV::LINQ->FromLTSV("access.log") |
|
4477
|
|
|
|
|
|
|
->Sum(sub { $_[0]{bytes} }); |
|
4478
|
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
=back |
|
4480
|
|
|
|
|
|
|
|
|
4481
|
|
|
|
|
|
|
=head1 DESIGN PHILOSOPHY |
|
4482
|
|
|
|
|
|
|
|
|
4483
|
|
|
|
|
|
|
=head2 Historical Compatibility: Perl 5.005_03 |
|
4484
|
|
|
|
|
|
|
|
|
4485
|
|
|
|
|
|
|
This module maintains compatibility with Perl 5.005_03 (released 1999-03-28), |
|
4486
|
|
|
|
|
|
|
following the B. |
|
4487
|
|
|
|
|
|
|
|
|
4488
|
|
|
|
|
|
|
B |
|
4489
|
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
=over 4 |
|
4491
|
|
|
|
|
|
|
|
|
4492
|
|
|
|
|
|
|
=item * B |
|
4493
|
|
|
|
|
|
|
|
|
4494
|
|
|
|
|
|
|
Code written in 1998-era Perl should still run in 2026 and beyond. |
|
4495
|
|
|
|
|
|
|
This demonstrates Perl's commitment to backwards compatibility. |
|
4496
|
|
|
|
|
|
|
|
|
4497
|
|
|
|
|
|
|
=item * B |
|
4498
|
|
|
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
Some production systems, embedded devices, and enterprise environments |
|
4500
|
|
|
|
|
|
|
cannot easily upgrade Perl. Maintaining compatibility ensures this module |
|
4501
|
|
|
|
|
|
|
remains useful in those contexts. |
|
4502
|
|
|
|
|
|
|
|
|
4503
|
|
|
|
|
|
|
=item * B |
|
4504
|
|
|
|
|
|
|
|
|
4505
|
|
|
|
|
|
|
By avoiding modern Perl features, this module has zero non-core dependencies. |
|
4506
|
|
|
|
|
|
|
It works with only the Perl core that has existed since 1999. |
|
4507
|
|
|
|
|
|
|
|
|
4508
|
|
|
|
|
|
|
=back |
|
4509
|
|
|
|
|
|
|
|
|
4510
|
|
|
|
|
|
|
B |
|
4511
|
|
|
|
|
|
|
|
|
4512
|
|
|
|
|
|
|
=over 4 |
|
4513
|
|
|
|
|
|
|
|
|
4514
|
|
|
|
|
|
|
=item * No C keyword - uses package variables |
|
4515
|
|
|
|
|
|
|
|
|
4516
|
|
|
|
|
|
|
=item * No C pragma - uses C |
|
4517
|
|
|
|
|
|
|
|
|
4518
|
|
|
|
|
|
|
=item * No C |
|
4519
|
|
|
|
|
|
|
|
|
4520
|
|
|
|
|
|
|
=item * All features implemented with Perl 5.005-era constructs |
|
4521
|
|
|
|
|
|
|
|
|
4522
|
|
|
|
|
|
|
=back |
|
4523
|
|
|
|
|
|
|
|
|
4524
|
|
|
|
|
|
|
The code comment C<# use 5.008001; # Lancaster Consensus 2013 for toolchains> |
|
4525
|
|
|
|
|
|
|
marks where modern code would typically start. We intentionally stay below |
|
4526
|
|
|
|
|
|
|
this line. |
|
4527
|
|
|
|
|
|
|
|
|
4528
|
|
|
|
|
|
|
=head2 US-ASCII Only Policy |
|
4529
|
|
|
|
|
|
|
|
|
4530
|
|
|
|
|
|
|
All source code is strictly US-ASCII (bytes 0x00-0x7F). No UTF-8, no |
|
4531
|
|
|
|
|
|
|
extended characters. |
|
4532
|
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
B |
|
4534
|
|
|
|
|
|
|
|
|
4535
|
|
|
|
|
|
|
=over 4 |
|
4536
|
|
|
|
|
|
|
|
|
4537
|
|
|
|
|
|
|
=item * B |
|
4538
|
|
|
|
|
|
|
|
|
4539
|
|
|
|
|
|
|
US-ASCII works everywhere - ancient terminals, modern IDEs, web browsers, |
|
4540
|
|
|
|
|
|
|
email systems. No encoding issues, ever. |
|
4541
|
|
|
|
|
|
|
|
|
4542
|
|
|
|
|
|
|
=item * B |
|
4543
|
|
|
|
|
|
|
|
|
4544
|
|
|
|
|
|
|
The code behaves identically regardless of system locale settings. |
|
4545
|
|
|
|
|
|
|
|
|
4546
|
|
|
|
|
|
|
=item * B |
|
4547
|
|
|
|
|
|
|
|
|
4548
|
|
|
|
|
|
|
Source code (ASCII) vs. data (any encoding). The module processes LTSV |
|
4549
|
|
|
|
|
|
|
data in any encoding, but its own code remains pure ASCII. |
|
4550
|
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
=back |
|
4552
|
|
|
|
|
|
|
|
|
4553
|
|
|
|
|
|
|
This policy is verified by C. |
|
4554
|
|
|
|
|
|
|
|
|
4555
|
|
|
|
|
|
|
=head2 The C<$VERSION = $VERSION> Idiom |
|
4556
|
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
You may notice: |
|
4558
|
|
|
|
|
|
|
|
|
4559
|
|
|
|
|
|
|
$VERSION = '1.05'; |
|
4560
|
|
|
|
|
|
|
$VERSION = $VERSION; |
|
4561
|
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
This is B, not a typo. Under C |
|
4563
|
|
|
|
|
|
|
only once triggers a warning. The self-assignment ensures C<$VERSION> |
|
4564
|
|
|
|
|
|
|
appears twice, silencing the warning without requiring C (which |
|
4565
|
|
|
|
|
|
|
doesn't exist in Perl 5.005). |
|
4566
|
|
|
|
|
|
|
|
|
4567
|
|
|
|
|
|
|
This is a well-known idiom from the pre-C era. |
|
4568
|
|
|
|
|
|
|
|
|
4569
|
|
|
|
|
|
|
=head2 Design Principles |
|
4570
|
|
|
|
|
|
|
|
|
4571
|
|
|
|
|
|
|
=over 4 |
|
4572
|
|
|
|
|
|
|
|
|
4573
|
|
|
|
|
|
|
=item * B |
|
4574
|
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
Operations return query objects, not arrays. Data is processed on-demand |
|
4576
|
|
|
|
|
|
|
when terminal operations (C, C, etc.) are called. |
|
4577
|
|
|
|
|
|
|
|
|
4578
|
|
|
|
|
|
|
=item * B |
|
4579
|
|
|
|
|
|
|
|
|
4580
|
|
|
|
|
|
|
All query operations return new query objects, enabling fluent syntax: |
|
4581
|
|
|
|
|
|
|
|
|
4582
|
|
|
|
|
|
|
$query->Where(...)->Select(...)->OrderBy(...)->ToArray() |
|
4583
|
|
|
|
|
|
|
|
|
4584
|
|
|
|
|
|
|
=item * B |
|
4585
|
|
|
|
|
|
|
|
|
4586
|
|
|
|
|
|
|
Query operations never modify the source data. They create new lazy |
|
4587
|
|
|
|
|
|
|
iterators. |
|
4588
|
|
|
|
|
|
|
|
|
4589
|
|
|
|
|
|
|
=item * B |
|
4590
|
|
|
|
|
|
|
|
|
4591
|
|
|
|
|
|
|
We follow LINQ's method names and semantics, but use Perl idioms for |
|
4592
|
|
|
|
|
|
|
implementation (closures for iterators, hash refs for records). |
|
4593
|
|
|
|
|
|
|
|
|
4594
|
|
|
|
|
|
|
=item * B |
|
4595
|
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
This module has zero non-core dependencies. It works with only the Perl |
|
4597
|
|
|
|
|
|
|
core that has existed since 1999. Even C is optional (stubbed |
|
4598
|
|
|
|
|
|
|
for Perl E 5.6). This ensures installation succeeds on minimal Perl |
|
4599
|
|
|
|
|
|
|
installations, avoids dependency chain vulnerabilities, and provides |
|
4600
|
|
|
|
|
|
|
permanence - the code will work decades into the future. |
|
4601
|
|
|
|
|
|
|
|
|
4602
|
|
|
|
|
|
|
=back |
|
4603
|
|
|
|
|
|
|
|
|
4604
|
|
|
|
|
|
|
=head1 LIMITATIONS AND KNOWN ISSUES |
|
4605
|
|
|
|
|
|
|
|
|
4606
|
|
|
|
|
|
|
=head2 Current Limitations |
|
4607
|
|
|
|
|
|
|
|
|
4608
|
|
|
|
|
|
|
=over 4 |
|
4609
|
|
|
|
|
|
|
|
|
4610
|
|
|
|
|
|
|
=item * B |
|
4611
|
|
|
|
|
|
|
|
|
4612
|
|
|
|
|
|
|
Query objects can only be consumed once. The iterator is exhausted |
|
4613
|
|
|
|
|
|
|
after terminal operations. |
|
4614
|
|
|
|
|
|
|
|
|
4615
|
|
|
|
|
|
|
Workaround: Create new query object or save ToArray() result. |
|
4616
|
|
|
|
|
|
|
|
|
4617
|
|
|
|
|
|
|
=item * B |
|
4618
|
|
|
|
|
|
|
|
|
4619
|
|
|
|
|
|
|
Due to iterator-based design, undef cannot be distinguished from end-of-sequence. |
|
4620
|
|
|
|
|
|
|
Sequences containing undef values may not work correctly with all operations. |
|
4621
|
|
|
|
|
|
|
|
|
4622
|
|
|
|
|
|
|
This is not a practical limitation for LTSV data (which uses hash references), |
|
4623
|
|
|
|
|
|
|
but affects operations on plain arrays containing undef. |
|
4624
|
|
|
|
|
|
|
|
|
4625
|
|
|
|
|
|
|
# Works fine (LTSV data - hash references) |
|
4626
|
|
|
|
|
|
|
LTSV::LINQ->FromLTSV("file.ltsv")->Contains({status => '200'}) |
|
4627
|
|
|
|
|
|
|
|
|
4628
|
|
|
|
|
|
|
# Limitation (plain array with undef) |
|
4629
|
|
|
|
|
|
|
LTSV::LINQ->From([1, undef, 3])->Contains(undef) # May not work |
|
4630
|
|
|
|
|
|
|
|
|
4631
|
|
|
|
|
|
|
=item * B |
|
4632
|
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
All operations execute sequentially in a single thread. |
|
4634
|
|
|
|
|
|
|
|
|
4635
|
|
|
|
|
|
|
=item * B |
|
4636
|
|
|
|
|
|
|
|
|
4637
|
|
|
|
|
|
|
All filtering requires full scan. No index optimization. |
|
4638
|
|
|
|
|
|
|
|
|
4639
|
|
|
|
|
|
|
=item * B |
|
4640
|
|
|
|
|
|
|
|
|
4641
|
|
|
|
|
|
|
Distinct with custom comparer uses stringified keys. May not work |
|
4642
|
|
|
|
|
|
|
correctly for complex objects. |
|
4643
|
|
|
|
|
|
|
|
|
4644
|
|
|
|
|
|
|
=item * B |
|
4645
|
|
|
|
|
|
|
|
|
4646
|
|
|
|
|
|
|
Because the iterator protocol uses C to signal end-of-sequence, |
|
4647
|
|
|
|
|
|
|
C cannot reliably deliver its C default |
|
4648
|
|
|
|
|
|
|
to downstream operations. |
|
4649
|
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
# Works correctly (non-undef default) |
|
4651
|
|
|
|
|
|
|
LTSV::LINQ->From([])->DefaultIfEmpty(0)->ToArray() # (0) |
|
4652
|
|
|
|
|
|
|
LTSV::LINQ->From([])->DefaultIfEmpty({})->ToArray() # ({}) |
|
4653
|
|
|
|
|
|
|
|
|
4654
|
|
|
|
|
|
|
# Does NOT work (undef default is indistinguishable from EOF) |
|
4655
|
|
|
|
|
|
|
LTSV::LINQ->From([])->DefaultIfEmpty(undef)->ToArray() # () - empty! |
|
4656
|
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
Workaround: Use a sentinel value such as C<0>, C<''>, or C<{}> instead |
|
4658
|
|
|
|
|
|
|
of C, and treat it as "no element" after the fact. |
|
4659
|
|
|
|
|
|
|
|
|
4660
|
|
|
|
|
|
|
=back |
|
4661
|
|
|
|
|
|
|
|
|
4662
|
|
|
|
|
|
|
=head2 Not Implemented |
|
4663
|
|
|
|
|
|
|
|
|
4664
|
|
|
|
|
|
|
The following LINQ methods from the .NET standard library are intentionally |
|
4665
|
|
|
|
|
|
|
not implemented in LTSV::LINQ. This section explains the design rationale |
|
4666
|
|
|
|
|
|
|
for each omission. |
|
4667
|
|
|
|
|
|
|
|
|
4668
|
|
|
|
|
|
|
=head3 Parallel LINQ (PLINQ) Methods |
|
4669
|
|
|
|
|
|
|
|
|
4670
|
|
|
|
|
|
|
The following methods belong to B, the .NET |
|
4671
|
|
|
|
|
|
|
parallel-execution extension to LINQ introduced in .NET 4.0. They exist |
|
4672
|
|
|
|
|
|
|
to distribute query execution across multiple CPU cores using the .NET |
|
4673
|
|
|
|
|
|
|
Thread Pool and Task Parallel Library. |
|
4674
|
|
|
|
|
|
|
|
|
4675
|
|
|
|
|
|
|
Perl does not have native shared-memory multithreading that maps onto |
|
4676
|
|
|
|
|
|
|
this execution model. Perl threads (C) copy the interpreter |
|
4677
|
|
|
|
|
|
|
state and communicate through shared variables, making them unsuitable |
|
4678
|
|
|
|
|
|
|
for the fine-grained, automatic work-stealing parallelism that PLINQ |
|
4679
|
|
|
|
|
|
|
provides. LTSV::LINQ's iterator-based design assumes a single sequential |
|
4680
|
|
|
|
|
|
|
execution context; introducing PLINQ semantics would require a completely |
|
4681
|
|
|
|
|
|
|
different architecture and would add heavy dependencies. |
|
4682
|
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
Furthermore, the primary use case for LTSV::LINQ -- parsing and querying |
|
4684
|
|
|
|
|
|
|
LTSV log files -- is typically I/O-bound rather than CPU-bound. |
|
4685
|
|
|
|
|
|
|
Parallelizing I/O over a single file provides little benefit and |
|
4686
|
|
|
|
|
|
|
considerable complexity. |
|
4687
|
|
|
|
|
|
|
|
|
4688
|
|
|
|
|
|
|
For these reasons, the entire PLINQ surface is omitted: |
|
4689
|
|
|
|
|
|
|
|
|
4690
|
|
|
|
|
|
|
=over 4 |
|
4691
|
|
|
|
|
|
|
|
|
4692
|
|
|
|
|
|
|
=item * B |
|
4693
|
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
Entry point for PLINQ. Converts an C> into a |
|
4695
|
|
|
|
|
|
|
C> that the .NET runtime executes in parallel using |
|
4696
|
|
|
|
|
|
|
multiple threads. Not applicable: Perl lacks the runtime infrastructure. |
|
4697
|
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
=item * B |
|
4699
|
|
|
|
|
|
|
|
|
4700
|
|
|
|
|
|
|
Converts a C> back to a sequential C>, |
|
4701
|
|
|
|
|
|
|
forcing subsequent operators to run on a single thread. Since |
|
4702
|
|
|
|
|
|
|
C is not implemented, C has no counterpart |
|
4703
|
|
|
|
|
|
|
to convert from. |
|
4704
|
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
=item * B |
|
4706
|
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
Instructs PLINQ to preserve the source order in the output even during |
|
4708
|
|
|
|
|
|
|
parallel execution. This is a hint to the PLINQ scheduler; it does not |
|
4709
|
|
|
|
|
|
|
exist outside of PLINQ. Not applicable. |
|
4710
|
|
|
|
|
|
|
|
|
4711
|
|
|
|
|
|
|
=item * B |
|
4712
|
|
|
|
|
|
|
|
|
4713
|
|
|
|
|
|
|
Instructs PLINQ that output order does not need to match source order, |
|
4714
|
|
|
|
|
|
|
potentially allowing more efficient parallel execution. Not applicable. |
|
4715
|
|
|
|
|
|
|
|
|
4716
|
|
|
|
|
|
|
=item * B |
|
4717
|
|
|
|
|
|
|
|
|
4718
|
|
|
|
|
|
|
PLINQ terminal operator that applies an action to each element in |
|
4719
|
|
|
|
|
|
|
parallel, without collecting results. It is the parallel equivalent of |
|
4720
|
|
|
|
|
|
|
C. LTSV::LINQ provides C for sequential iteration. |
|
4721
|
|
|
|
|
|
|
A parallel C is not applicable. |
|
4722
|
|
|
|
|
|
|
|
|
4723
|
|
|
|
|
|
|
=item * B |
|
4724
|
|
|
|
|
|
|
|
|
4725
|
|
|
|
|
|
|
Attaches a .NET C to a C>, allowing |
|
4726
|
|
|
|
|
|
|
cooperative cancellation of a running parallel query. Cancellation tokens |
|
4727
|
|
|
|
|
|
|
are a .NET threading primitive. Not applicable. |
|
4728
|
|
|
|
|
|
|
|
|
4729
|
|
|
|
|
|
|
=item * B |
|
4730
|
|
|
|
|
|
|
|
|
4731
|
|
|
|
|
|
|
Sets the maximum number of concurrent tasks that PLINQ may use. A |
|
4732
|
|
|
|
|
|
|
tuning knob for the PLINQ scheduler. Not applicable. |
|
4733
|
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
=item * B |
|
4735
|
|
|
|
|
|
|
|
|
4736
|
|
|
|
|
|
|
Controls whether PLINQ may choose sequential execution for efficiency |
|
4737
|
|
|
|
|
|
|
(C) or is forced to parallelize (C). Not |
|
4738
|
|
|
|
|
|
|
applicable. |
|
4739
|
|
|
|
|
|
|
|
|
4740
|
|
|
|
|
|
|
=item * B |
|
4741
|
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
Controls how PLINQ merges results from parallel partitions back into the |
|
4743
|
|
|
|
|
|
|
output stream (buffered, auto-buffered, or not-buffered). Not applicable. |
|
4744
|
|
|
|
|
|
|
|
|
4745
|
|
|
|
|
|
|
=back |
|
4746
|
|
|
|
|
|
|
|
|
4747
|
|
|
|
|
|
|
=head3 .NET Type System Methods |
|
4748
|
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
The following methods are specific to .NET's static type system. They |
|
4750
|
|
|
|
|
|
|
exist to work with .NET generics and interface hierarchies, which have |
|
4751
|
|
|
|
|
|
|
no Perl equivalent. |
|
4752
|
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
=over 4 |
|
4754
|
|
|
|
|
|
|
|
|
4755
|
|
|
|
|
|
|
=item * B |
|
4756
|
|
|
|
|
|
|
|
|
4757
|
|
|
|
|
|
|
Casts each element of a non-generic C to a specified type |
|
4758
|
|
|
|
|
|
|
C, returning C>. In .NET, C> is needed when |
|
4759
|
|
|
|
|
|
|
working with legacy APIs that return C (without a type |
|
4760
|
|
|
|
|
|
|
parameter) and you need to treat the elements as a specific type. |
|
4761
|
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
Perl is dynamically typed. Every Perl value already holds type |
|
4763
|
|
|
|
|
|
|
information at runtime (scalar, reference, blessed object), and Perl |
|
4764
|
|
|
|
|
|
|
does not have a concept of a "non-generic enumerable" that needs to be |
|
4765
|
|
|
|
|
|
|
explicitly cast before it can be queried. There is no meaningful |
|
4766
|
|
|
|
|
|
|
operation to implement. |
|
4767
|
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
=item * B |
|
4769
|
|
|
|
|
|
|
|
|
4770
|
|
|
|
|
|
|
Filters elements of a non-generic C, returning only those |
|
4771
|
|
|
|
|
|
|
that can be successfully cast to a specified type C. Like C, |
|
4772
|
|
|
|
|
|
|
it exists to bridge generic and non-generic .NET APIs. |
|
4773
|
|
|
|
|
|
|
|
|
4774
|
|
|
|
|
|
|
In LTSV::LINQ, all records from C are hash references. |
|
4775
|
|
|
|
|
|
|
Records from C are whatever the caller puts in the array. |
|
4776
|
|
|
|
|
|
|
Perl's C[, C, or a C predicate can ] |
|
4777
|
|
|
|
|
|
|
perform any type-based filtering the caller needs. A dedicated |
|
4778
|
|
|
|
|
|
|
C adds no expressiveness. |
|
4779
|
|
|
|
|
|
|
|
|
4780
|
|
|
|
|
|
|
# Perl equivalent of OfType for blessed objects of class "Foo": |
|
4781
|
|
|
|
|
|
|
$query->Where(sub { ref($_[0]) && $_[0]->isa('Foo') }) |
|
4782
|
|
|
|
|
|
|
|
|
4783
|
|
|
|
|
|
|
=back |
|
4784
|
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
=head3 64-bit and Large-Count Methods |
|
4786
|
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
=over 4 |
|
4788
|
|
|
|
|
|
|
|
|
4789
|
|
|
|
|
|
|
=item * B |
|
4790
|
|
|
|
|
|
|
|
|
4791
|
|
|
|
|
|
|
Returns the number of elements as a 64-bit integer (C in .NET). |
|
4792
|
|
|
|
|
|
|
On 32-bit .NET platforms, a sequence can theoretically contain more than |
|
4793
|
|
|
|
|
|
|
C<2**31 - 1> (~2 billion) elements, which would overflow C; hence |
|
4794
|
|
|
|
|
|
|
the need for C. |
|
4795
|
|
|
|
|
|
|
|
|
4796
|
|
|
|
|
|
|
In Perl, integers are represented as native signed integers or floating- |
|
4797
|
|
|
|
|
|
|
point doubles (C). On 64-bit Perl (which is universal in practice |
|
4798
|
|
|
|
|
|
|
today), the native integer type is 64 bits, so C already handles |
|
4799
|
|
|
|
|
|
|
any realistic sequence length. On 32-bit Perl, the floating-point C |
|
4800
|
|
|
|
|
|
|
provides 53 bits of integer precision (~9 quadrillion), far exceeding |
|
4801
|
|
|
|
|
|
|
any in-memory sequence. There is no semantic gap between C and |
|
4802
|
|
|
|
|
|
|
C in Perl. |
|
4803
|
|
|
|
|
|
|
|
|
4804
|
|
|
|
|
|
|
=back |
|
4805
|
|
|
|
|
|
|
|
|
4806
|
|
|
|
|
|
|
=head3 IEnumerable Conversion Method |
|
4807
|
|
|
|
|
|
|
|
|
4808
|
|
|
|
|
|
|
=over 4 |
|
4809
|
|
|
|
|
|
|
|
|
4810
|
|
|
|
|
|
|
=item * B |
|
4811
|
|
|
|
|
|
|
|
|
4812
|
|
|
|
|
|
|
In .NET, C> is used to force evaluation of a query as |
|
4813
|
|
|
|
|
|
|
C> rather than, for example, C> (which |
|
4814
|
|
|
|
|
|
|
might be translated to SQL). It is a type-cast at the interface level, |
|
4815
|
|
|
|
|
|
|
not a data transformation. |
|
4816
|
|
|
|
|
|
|
|
|
4817
|
|
|
|
|
|
|
LTSV::LINQ has only one query type: C. There is no |
|
4818
|
|
|
|
|
|
|
C counterpart that would benefit from being downgraded to |
|
4819
|
|
|
|
|
|
|
C. The method has no meaningful semantics to implement. |
|
4820
|
|
|
|
|
|
|
|
|
4821
|
|
|
|
|
|
|
=back |
|
4822
|
|
|
|
|
|
|
|
|
4823
|
|
|
|
|
|
|
=head1 BUGS |
|
4824
|
|
|
|
|
|
|
|
|
4825
|
|
|
|
|
|
|
Please report any bugs or feature requests to: |
|
4826
|
|
|
|
|
|
|
|
|
4827
|
|
|
|
|
|
|
=over 4 |
|
4828
|
|
|
|
|
|
|
|
|
4829
|
|
|
|
|
|
|
=item * Email: C |
|
4830
|
|
|
|
|
|
|
|
|
4831
|
|
|
|
|
|
|
=back |
|
4832
|
|
|
|
|
|
|
|
|
4833
|
|
|
|
|
|
|
=head1 SUPPORT |
|
4834
|
|
|
|
|
|
|
|
|
4835
|
|
|
|
|
|
|
=head2 Documentation |
|
4836
|
|
|
|
|
|
|
|
|
4837
|
|
|
|
|
|
|
Full documentation is available via: |
|
4838
|
|
|
|
|
|
|
|
|
4839
|
|
|
|
|
|
|
perldoc LTSV::LINQ |
|
4840
|
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
=head2 CPAN |
|
4842
|
|
|
|
|
|
|
|
|
4843
|
|
|
|
|
|
|
https://metacpan.org/pod/LTSV::LINQ |
|
4844
|
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
4846
|
|
|
|
|
|
|
|
|
4847
|
|
|
|
|
|
|
=over 4 |
|
4848
|
|
|
|
|
|
|
|
|
4849
|
|
|
|
|
|
|
=item * LTSV specification |
|
4850
|
|
|
|
|
|
|
|
|
4851
|
|
|
|
|
|
|
http://ltsv.org/ |
|
4852
|
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
=item * Microsoft LINQ documentation |
|
4854
|
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
https://learn.microsoft.com/en-us/dotnet/csharp/linq/ |
|
4856
|
|
|
|
|
|
|
|
|
4857
|
|
|
|
|
|
|
=back |
|
4858
|
|
|
|
|
|
|
|
|
4859
|
|
|
|
|
|
|
=head1 AUTHOR |
|
4860
|
|
|
|
|
|
|
|
|
4861
|
|
|
|
|
|
|
INABA Hitoshi Eina@cpan.orgE |
|
4862
|
|
|
|
|
|
|
|
|
4863
|
|
|
|
|
|
|
=head2 Contributors |
|
4864
|
|
|
|
|
|
|
|
|
4865
|
|
|
|
|
|
|
Contributions are welcome! See file: CONTRIBUTING. |
|
4866
|
|
|
|
|
|
|
|
|
4867
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
4868
|
|
|
|
|
|
|
|
|
4869
|
|
|
|
|
|
|
=head2 LINQ Technology |
|
4870
|
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
This module is inspired by LINQ (Language Integrated Query), which was |
|
4872
|
|
|
|
|
|
|
developed by Microsoft Corporation for the .NET Framework. |
|
4873
|
|
|
|
|
|
|
|
|
4874
|
|
|
|
|
|
|
LINQ(R) is a registered trademark of Microsoft Corporation. |
|
4875
|
|
|
|
|
|
|
|
|
4876
|
|
|
|
|
|
|
We are grateful to Microsoft for pioneering the LINQ technology and |
|
4877
|
|
|
|
|
|
|
making it a widely recognized programming pattern. The elegance and |
|
4878
|
|
|
|
|
|
|
power of LINQ has influenced query interfaces across many programming |
|
4879
|
|
|
|
|
|
|
languages, and this module brings that same capability to LTSV data |
|
4880
|
|
|
|
|
|
|
processing in Perl. |
|
4881
|
|
|
|
|
|
|
|
|
4882
|
|
|
|
|
|
|
This module is not affiliated with, endorsed by, or sponsored by |
|
4883
|
|
|
|
|
|
|
Microsoft Corporation. |
|
4884
|
|
|
|
|
|
|
|
|
4885
|
|
|
|
|
|
|
=head2 References |
|
4886
|
|
|
|
|
|
|
|
|
4887
|
|
|
|
|
|
|
This module was inspired by: |
|
4888
|
|
|
|
|
|
|
|
|
4889
|
|
|
|
|
|
|
=over 4 |
|
4890
|
|
|
|
|
|
|
|
|
4891
|
|
|
|
|
|
|
=item * Microsoft LINQ (Language Integrated Query) |
|
4892
|
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
L |
|
4894
|
|
|
|
|
|
|
|
|
4895
|
|
|
|
|
|
|
=item * LTSV specification |
|
4896
|
|
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
L |
|
4898
|
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
=back |
|
4900
|
|
|
|
|
|
|
|
|
4901
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
4902
|
|
|
|
|
|
|
|
|
4903
|
|
|
|
|
|
|
Copyright (c) 2026 INABA Hitoshi |
|
4904
|
|
|
|
|
|
|
|
|
4905
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
4906
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
|
4907
|
|
|
|
|
|
|
|
|
4908
|
|
|
|
|
|
|
=head2 License Details |
|
4909
|
|
|
|
|
|
|
|
|
4910
|
|
|
|
|
|
|
This module is released under the same license as Perl itself: |
|
4911
|
|
|
|
|
|
|
|
|
4912
|
|
|
|
|
|
|
=over 4 |
|
4913
|
|
|
|
|
|
|
|
|
4914
|
|
|
|
|
|
|
=item * Artistic License 1.0 |
|
4915
|
|
|
|
|
|
|
|
|
4916
|
|
|
|
|
|
|
L |
|
4917
|
|
|
|
|
|
|
|
|
4918
|
|
|
|
|
|
|
=item * GNU General Public License version 1 or later |
|
4919
|
|
|
|
|
|
|
|
|
4920
|
|
|
|
|
|
|
L |
|
4921
|
|
|
|
|
|
|
|
|
4922
|
|
|
|
|
|
|
=back |
|
4923
|
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
You may choose either license. |
|
4925
|
|
|
|
|
|
|
|
|
4926
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTY |
|
4927
|
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY |
|
4929
|
|
|
|
|
|
|
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT |
|
4930
|
|
|
|
|
|
|
WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER |
|
4931
|
|
|
|
|
|
|
PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, |
|
4932
|
|
|
|
|
|
|
EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
4933
|
|
|
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
|
4934
|
|
|
|
|
|
|
THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS |
|
4935
|
|
|
|
|
|
|
WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF |
|
4936
|
|
|
|
|
|
|
ALL NECESSARY SERVICING, REPAIR, OR CORRECTION. |
|
4937
|
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING |
|
4939
|
|
|
|
|
|
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR |
|
4940
|
|
|
|
|
|
|
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE LIABLE |
|
4941
|
|
|
|
|
|
|
TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR |
|
4942
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE |
|
4943
|
|
|
|
|
|
|
SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING |
|
4944
|
|
|
|
|
|
|
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A |
|
4945
|
|
|
|
|
|
|
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF |
|
4946
|
|
|
|
|
|
|
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF |
|
4947
|
|
|
|
|
|
|
SUCH DAMAGES. |
|
4948
|
|
|
|
|
|
|
|
|
4949
|
|
|
|
|
|
|
=cut |