File Coverage

lib/LTSV/LINQ.pm
Criterion Covered Total %
statement 651 663 98.1
branch 228 266 85.7
condition 47 75 62.6
subroutine 130 131 99.2
pod 56 56 100.0
total 1112 1191 93.3


line stmt bran cond sub pod time code
1             package LTSV::LINQ;
2             ######################################################################
3             #
4             # LTSV::LINQ - LINQ-style query interface for LTSV files
5             #
6             # https://metacpan.org/dist/LTSV-LINQ
7             #
8             # Copyright (c) 2026 INABA Hitoshi
9             ######################################################################
10             #
11             # Compatible : Perl 5.005_03 and later
12             # Platform : Windows and UNIX/Linux
13             #
14             ######################################################################
15              
16 14     14   328192 use 5.00503; # Universal Consensus 1998 for primetools
  14         55  
17             # Perl 5.005_03 compatibility for historical toolchains
18             # use 5.008001; # Lancaster Consensus 2013 for toolchains
19              
20 14     14   75 use strict;
  14         102  
  14         1248  
21 14 50 33 14   416 BEGIN { if ($] < 5.006 && !defined(&warnings::import)) { $INC{'warnings.pm'} = 'stub'; eval 'package warnings; sub import {}' } }
  0         0  
  0         0  
22 14     14   90 use warnings; local $^W = 1;
  14         33  
  14         1173  
23 14 50   14   485 BEGIN { pop @INC if $INC[-1] eq '.' }
24              
25 14     14   137 use vars qw($VERSION $_fh_seq);
  14         25  
  14         2396  
26             $VERSION = '1.09';
27             $VERSION = $VERSION;
28             # $VERSION self-assignment suppresses "used only once" warning under strict.
29             $_fh_seq = 0;
30             $_fh_seq = $_fh_seq;
31             # $_fh_seq self-assignment suppresses "used only once" warning under strict.
32              
33             ###############################################################################
34             # Internal file-handle helper
35             ###############################################################################
36              
37             # _open_fh - open a file for reading ('<') or writing ('>') and return
38             # the glob name string. Works on Perl 5.005_03 and all later versions.
39             #
40             # Always uses a unique numbered package glob (LTSV::LINQ::FH::H) so
41             # that concurrent iterators each get their own IO slot.
42             #
43             # $raw: if true, binmode is called (raw bytes; for LTSV).
44             # Pass 0 for CSV interop where OS-level \r\n->\n conversion is desired.
45             sub _open_fh {
46 27     27   50 my($mode, $file, $raw) = @_;
47 27         37 $_fh_seq++;
48 27         27 my $seq = $_fh_seq;
49 27         58 my $fhn = "LTSV::LINQ::FH::H${seq}";
50 27 100       60 my $arg = ($mode eq '>') ? ">$file" : "< $file";
51 14 50   14   98 { no strict 'refs'; open($fhn, $arg) or die "Cannot open '$file': $!" }
  14         45  
  14         1372  
  27         28  
  27         1347  
52 14 50   14   97 if ($raw) { no strict 'refs'; binmode(*{$fhn}) }
  14         30  
  14         5853  
  27         82  
  27         31  
  27         100  
53 27         59 return $fhn;
54             }
55              
56             ###############################################################################
57             # Constructor and Iterator Infrastructure
58             ###############################################################################
59              
60             sub new {
61 507     507 1 799 my($class, $iterator) = @_;
62 507         3505 return bless { iterator => $iterator }, $class;
63             }
64              
65             sub iterator {
66 651     651 1 772 my $self = $_[0];
67             # If this object was created by _from_snapshot, _factory provides
68             # a fresh iterator closure each time iterator() is called.
69 651 100       1302 if (exists $self->{_factory}) {
70 147         208 return $self->{_factory}->();
71             }
72 504         804 return $self->{iterator};
73             }
74              
75             ###############################################################################
76             # Data Source Methods
77             ###############################################################################
78              
79             # From - create query from array
80             sub From {
81 320     320 1 1517421 my($class, $source) = @_;
82              
83 320 50       740 if (ref($source) eq 'ARRAY') {
84 320         443 my $i = 0;
85             return $class->new(sub {
86 1200 100   1200   2302 return undef if $i >= scalar(@$source);
87 908         1638 return $source->[$i++];
88 320         1238 });
89             }
90              
91 0         0 die "From() requires ARRAY reference";
92             }
93              
94             # FromLTSV - read from LTSV file
95             sub FromLTSV {
96 17     17 1 300004 my($class, $file) = @_;
97              
98 17         36 my $fhn = _open_fh('<', $file, 1);
99              
100             return $class->new(sub {
101 59     59   72 while (1) {
102 14     14   97 no strict 'refs';
  14         70  
  14         961  
103 60         64 my $line = readline(*{$fhn});
  60         461  
104 60 100       141 unless (defined $line) {
105 14     14   87 no strict 'refs'; close($fhn);
  14         69  
  14         94916  
  17         180  
106 17         43 return undef;
107             }
108 43         91 chomp $line;
109 43         74 $line =~ s/\r\z//; # Remove CR for CRLF files on any platform
110 43 100       68 next unless length $line;
111              
112             my %record = map {
113 42 50       95 /\A(.+?):(.*)\z/ ? ($1, $2) : ()
  107         547  
114             } split /\t/, $line;
115              
116 42 50       255 return { %record } if %record;
117             }
118 17         111 });
119             }
120              
121             # Range - generate sequence of integers
122             sub Range {
123 4     4 1 167482 my($class, $start, $count) = @_;
124              
125 4         7 my $current = $start;
126 4         15 my $remaining = $count;
127              
128             return $class->new(sub {
129 37 100   37   62 return undef if $remaining <= 0;
130 33         35 $remaining--;
131 33         48 return $current++;
132 4         26 });
133             }
134              
135             # Empty - return empty sequence
136             sub Empty {
137 3     3 1 228935 my($class) = @_;
138              
139             return $class->new(sub {
140 4     4   12 return undef;
141 3         20 });
142             }
143              
144             # Repeat - repeat element specified number of times
145             sub Repeat {
146 4     4 1 77 my($class, $element, $count) = @_;
147              
148 4         8 my $remaining = $count;
149              
150             return $class->new(sub {
151 22 100   22   50 return undef if $remaining <= 0;
152 18         24 $remaining--;
153 18         35 return $element;
154 4         19 });
155             }
156              
157             ###############################################################################
158             # Filtering Methods
159             ###############################################################################
160              
161             # Where - filter elements
162             sub Where {
163 27     27 1 82 my($self, @args) = @_;
164 27         68 my $iter = $self->iterator;
165 27         58 my $class = ref($self);
166              
167             # Support both code reference and DSL form
168 27         36 my $cond;
169 27 100 66     138 if (@args == 1 && ref($args[0]) eq 'CODE') {
170 17         32 $cond = $args[0];
171             }
172             else {
173             # DSL form: Where(key => value, ...)
174 10         20 my %match = @args;
175             $cond = sub {
176 37     37   33 my $row = shift;
177 37         53 for my $k (keys %match) {
178 39 50       60 return 0 unless defined $row->{$k};
179 39 100       86 return 0 unless $row->{$k} eq $match{$k};
180             }
181 18         46 return 1;
182 10         25 };
183             }
184              
185             return $class->new(sub {
186 91     91   108 while (1) {
187 145         264 my $item = $iter->();
188 145 100       265 return undef unless defined $item;
189 119 100       176 return $item if $cond->($item);
190             }
191 27         1134 });
192             }
193              
194             ###############################################################################
195             # Projection Methods
196             ###############################################################################
197              
198             # Select - transform elements
199             sub Select {
200 29     29 1 85 my($self, $selector) = @_;
201 29         93 my $iter = $self->iterator;
202 29         67 my $class = ref($self);
203              
204             return $class->new(sub {
205 151     151   233 my $item = $iter->();
206 151 100       325 return undef unless defined $item;
207 123         246 return $selector->($item);
208 29         163 });
209             }
210              
211             # SelectMany - flatten sequences
212             sub SelectMany {
213 5     5 1 10 my($self, $selector) = @_;
214 5         21 my $iter = $self->iterator;
215 5         11 my $class = ref($self);
216              
217 5         9 my @buffer;
218              
219             return $class->new(sub {
220 12     12   17 while (1) {
221 21 100       43 if (@buffer) {
222 7         16 return shift @buffer;
223             }
224              
225 14         21 my $item = $iter->();
226 14 100       33 return undef unless defined $item;
227              
228 11         26 my $result = $selector->($item);
229 11 100       47 unless (ref($result) eq 'ARRAY') {
230 2         29 die "SelectMany: selector must return an ARRAY reference";
231             }
232 9         19 @buffer = @$result;
233             }
234 5         20 });
235             }
236              
237             # Concat - concatenate two sequences
238             sub Concat {
239 11     11 1 41 my($self, $second) = @_;
240 11         20 my $class = ref($self);
241              
242 11         22 my $first_iter = $self->iterator;
243 11         16 my $second_iter;
244 11         16 my $first_done = 0;
245              
246             return $class->new(sub {
247 49 100   49   91 if (!$first_done) {
248 31         42 my $item = $first_iter->();
249 31 100       61 if (defined $item) {
250 20         37 return $item;
251             }
252 11         14 $first_done = 1;
253 11         24 $second_iter = $second->iterator;
254             }
255              
256 29 50       58 return $second_iter ? $second_iter->() : undef;
257 11         53 });
258             }
259              
260             # Zip - combine two sequences element-wise
261             sub Zip {
262 5     5 1 13 my($self, $second, $result_selector) = @_;
263              
264 5         13 my $iter1 = $self->iterator;
265 5         14 my $iter2 = $second->iterator;
266 5         12 my $class = ref($self);
267              
268             return $class->new(sub {
269 17     17   34 my $item1 = $iter1->();
270 17         30 my $item2 = $iter2->();
271              
272             # Return undef if either sequence ends
273 17 100 100     71 return undef unless defined($item1) && defined($item2);
274              
275 12         29 return $result_selector->($item1, $item2);
276 5         24 });
277             }
278              
279             ###############################################################################
280             # Partitioning Methods
281             ###############################################################################
282              
283             # Take - take first N elements
284             sub Take {
285 7     7 1 28 my($self, $count) = @_;
286 7         15 my $iter = $self->iterator;
287 7         13 my $class = ref($self);
288 7         16 my $taken = 0;
289              
290             return $class->new(sub {
291 26 100   26   1105 return undef if $taken >= $count;
292 19         30 my $item = $iter->();
293 19 50       47 return undef unless defined $item;
294 19         21 $taken++;
295 19         40 return $item;
296 7         33 });
297             }
298              
299             # Skip - skip first N elements
300             sub Skip {
301 3     3 1 8 my($self, $count) = @_;
302 3         12 my $iter = $self->iterator;
303 3         7 my $class = ref($self);
304 3         6 my $skipped = 0;
305              
306             return $class->new(sub {
307 9     9   17 while ($skipped < $count) {
308 6         9 my $item = $iter->();
309 6 50       13 return undef unless defined $item;
310 6         13 $skipped++;
311             }
312 9         12 return $iter->();
313 3         20 });
314             }
315              
316             # TakeWhile - take while condition is true
317             sub TakeWhile {
318 2     2 1 4 my($self, $predicate) = @_;
319 2         4 my $iter = $self->iterator;
320 2         10 my $class = ref($self);
321 2         2 my $done = 0;
322              
323             return $class->new(sub {
324 7 50   7   9 return undef if $done;
325 7         10 my $item = $iter->();
326 7 50       11 return undef unless defined $item;
327              
328 7 100       17 if ($predicate->($item)) {
329 5         15 return $item;
330             }
331             else {
332 2         761 $done = 1;
333 2         10 return undef;
334             }
335 2         35 });
336             }
337              
338             # SkipWhile - skip elements while predicate is true
339             sub SkipWhile {
340 4     4 1 5 my($self, $predicate) = @_;
341 4         5 my $iter = $self->iterator;
342 4         4 my $class = ref($self);
343 4         4 my $skipping = 1;
344              
345             return $class->new(sub {
346 12     12   12 while (1) {
347 19         28 my $item = $iter->();
348 19 100       22 return undef unless defined $item;
349              
350 16 100       17 if ($skipping) {
351 10 100       11 if (!$predicate->($item)) {
352 3         6 $skipping = 0;
353 3         4 return $item;
354             }
355             }
356             else {
357 6         7 return $item;
358             }
359             }
360 4         10 });
361             }
362              
363             ###############################################################################
364             # Ordering Methods
365             ###############################################################################
366              
367             # OrderBy - sort ascending (smart: numeric when both keys look numeric)
368             sub OrderBy {
369 21     21 1 45 my($self, $key_selector) = @_;
370 21         47 my @items = $self->ToArray();
371 21         153 return LTSV::LINQ::Ordered->_new_ordered(
372             [ @items ],
373             [{ sel => $key_selector, dir => 1, type => 'smart' }]
374             );
375             }
376              
377             # OrderByDescending - sort descending (smart comparison)
378             sub OrderByDescending {
379 4     4 1 8 my($self, $key_selector) = @_;
380 4         13 my @items = $self->ToArray();
381 4         69 return LTSV::LINQ::Ordered->_new_ordered(
382             [ @items ],
383             [{ sel => $key_selector, dir => -1, type => 'smart' }]
384             );
385             }
386              
387             # OrderByStr - sort ascending by string comparison
388             sub OrderByStr {
389 23     23 1 39 my($self, $key_selector) = @_;
390 23         51 my @items = $self->ToArray();
391 23         126 return LTSV::LINQ::Ordered->_new_ordered(
392             [ @items ],
393             [{ sel => $key_selector, dir => 1, type => 'str' }]
394             );
395             }
396              
397             # OrderByStrDescending - sort descending by string comparison
398             sub OrderByStrDescending {
399 5     5 1 7 my($self, $key_selector) = @_;
400 5         7 my @items = $self->ToArray();
401 5         16 return LTSV::LINQ::Ordered->_new_ordered(
402             [ @items ],
403             [{ sel => $key_selector, dir => -1, type => 'str' }]
404             );
405             }
406              
407             # OrderByNum - sort ascending by numeric comparison
408             sub OrderByNum {
409 15     15 1 28 my($self, $key_selector) = @_;
410 15         45 my @items = $self->ToArray();
411 15         69 return LTSV::LINQ::Ordered->_new_ordered(
412             [ @items ],
413             [{ sel => $key_selector, dir => 1, type => 'num' }]
414             );
415             }
416              
417             # OrderByNumDescending - sort descending by numeric comparison
418             sub OrderByNumDescending {
419 5     5 1 754 my($self, $key_selector) = @_;
420 5         12 my @items = $self->ToArray();
421 5         22 return LTSV::LINQ::Ordered->_new_ordered(
422             [ @items ],
423             [{ sel => $key_selector, dir => -1, type => 'num' }]
424             );
425             }
426              
427             # Reverse - reverse order
428             sub Reverse {
429 1     1 1 2 my($self) = @_;
430 1         2 my @items = reverse $self->ToArray();
431 1         2 my $class = ref($self);
432 1         3 return $class->From([ @items ]);
433             }
434              
435             ###############################################################################
436             # Grouping Methods
437             ###############################################################################
438              
439             # GroupBy - group elements by key
440             sub GroupBy {
441 5     5 1 11 my($self, $key_selector, $element_selector) = @_;
442 5   33 21   25 $element_selector ||= sub { $_[0] };
  21         38  
443              
444 5         18 my %groups;
445             my @key_order;
446              
447             $self->ForEach(sub {
448 21     21   31 my $item = shift;
449 21         30 my $key = $key_selector->($item);
450 21 50       73 $key = '' unless defined $key;
451 21 100       47 unless (exists $groups{$key}) {
452 13         18 push @key_order, $key;
453             }
454 21         22 push @{$groups{$key}}, $element_selector->($item);
  21         30  
455 5         31 });
456              
457 5         16 my @result;
458 5         9 for my $key (@key_order) {
459             push @result, {
460             Key => $key,
461 13         28 Elements => $groups{$key},
462             };
463             }
464              
465 5         8 my $class = ref($self);
466 5         12 return $class->From([ @result ]);
467             }
468              
469             ###############################################################################
470             # Set Operations
471             ###############################################################################
472              
473             # Distinct - remove duplicates
474             sub Distinct {
475 12     12 1 22 my($self, $key_selector) = @_;
476 12         20 my $iter = $self->iterator;
477 12         47 my $class = ref($self);
478 12         38 my %seen;
479              
480             return $class->new(sub {
481 47     47   81 while (1) {
482 62         83 my $item = $iter->();
483 62 100       105 return undef unless defined $item;
484              
485 50 100       93 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
486 50 50       87 $key = '' unless defined $key;
487              
488 50 100       109 unless ($seen{$key}++) {
489 35         69 return $item;
490             }
491             }
492 12         44 });
493             }
494              
495             # Internal helper for set operations - make key from item
496             sub _make_key {
497 99     99   158 my($item) = @_;
498              
499 99 50       223 return '' unless defined $item;
500              
501 99 100       201 if (ref($item) eq 'HASH') {
    100          
502             # Hash to stable key
503 9         14 my @pairs = ();
504 9         25 for my $k (sort keys %$item) {
505 14 50       28 my $v = defined($item->{$k}) ? $item->{$k} : '';
506 14         26 push @pairs, "$k\x1F$v"; # \x1F = Unit Separator
507             }
508 9         27 return join("\x1E", @pairs); # \x1E = Record Separator
509             }
510             elsif (ref($item) eq 'ARRAY') {
511             # Array to key
512 3 50       5 return join("\x1E", map { defined($_) ? $_ : '' } @$item);
  6         12  
513             }
514             else {
515             # Scalar
516 87         152 return $item;
517             }
518             }
519              
520             # _from_snapshot - internal helper for GroupJoin.
521             # Returns a LTSV::LINQ object backed by a plain array that can be iterated
522             # multiple times within a single result_selector call.
523             # Each LINQ terminal method (Count, Sum, ToArray, etc.) calls iterator()
524             # to get a fresh iterator. We achieve re-iterability by overriding the
525             # iterator() method so it always creates a new closure over the same array.
526             sub _from_snapshot {
527 71     71   76 my($class_or_self, $aref) = @_;
528              
529 71   33     131 my $class = ref($class_or_self) || $class_or_self;
530              
531             # Build a sentinel sub that, when called, returns a brand-new
532             # index-based iterator every time.
533             my $iter_factory = sub {
534 145     145   155 my $i = 0;
535             return sub {
536 139 100       212 return undef if $i >= scalar(@$aref);
537 74         111 return $aref->[$i++];
538 145         334 };
539 71         115 };
540              
541             # The object stores the factory in place of a plain iterator.
542             # The iterator() accessor returns the result of calling the factory,
543             # so every consumer gets its own fresh iterator starting at index 0.
544 71         78 my $obj = bless {
545             iterator => $iter_factory->(),
546             _factory => $iter_factory,
547             }, $class;
548              
549 71         97 return $obj;
550             }
551              
552             # Union - set union with distinct
553             sub Union {
554 5     5 1 13 my($self, $second, $key_selector) = @_;
555              
556 5         16 return $self->Concat($second)->Distinct($key_selector);
557             }
558              
559             # Intersect - set intersection
560             sub Intersect {
561 5     5 1 12 my($self, $second, $key_selector) = @_;
562              
563             # Build hash of second sequence
564 5         10 my %second_set = ();
565             $second->ForEach(sub {
566 13     13   23 my $item = shift;
567 13 50       31 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
568 13         38 $second_set{$key} = $item;
569 5         23 });
570              
571 5         26 my $class = ref($self);
572 5         13 my $iter = $self->iterator;
573 5         10 my %seen = ();
574              
575             return $class->new(sub {
576 11     11   21 while (defined(my $item = $iter->())) {
577 14 50       35 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
578              
579 14 100       53 next if $seen{$key}++; # Skip duplicates
580 12 100       38 return $item if exists $second_set{$key};
581             }
582 5         11 return undef;
583 5         26 });
584             }
585              
586             # Except - set difference
587             sub Except {
588 5     5 1 11 my($self, $second, $key_selector) = @_;
589              
590             # Build hash of second sequence
591 5         10 my %second_set = ();
592             $second->ForEach(sub {
593 11     11   17 my $item = shift;
594 11 50       31 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
595 11         32 $second_set{$key} = 1;
596 5         23 });
597              
598 5         23 my $class = ref($self);
599 5         11 my $iter = $self->iterator;
600 5         11 my %seen = ();
601              
602             return $class->new(sub {
603 13     13   26 while (defined(my $item = $iter->())) {
604 17 50       38 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
605              
606 17 100       51 next if $seen{$key}++; # Skip duplicates
607 15 100       43 return $item unless exists $second_set{$key};
608             }
609 5         12 return undef;
610 5         24 });
611             }
612              
613             # Join - correlates elements of two sequences
614             sub Join {
615 8     8 1 21 my($self, $inner, $outer_key_selector, $inner_key_selector, $result_selector) = @_;
616              
617             # Build hash table from inner sequence
618 8         18 my %inner_hash = ();
619             $inner->ForEach(sub {
620 16     16   64 my $item = shift;
621 16         50 my $key = $inner_key_selector->($item);
622 16 50       90 $key = _make_key($key) if ref($key);
623 16         18 push @{$inner_hash{$key}}, $item;
  16         68  
624 8         39 });
625              
626             # Process outer sequence with lazy evaluation
627 8         46 my $class = ref($self);
628 8         22 my $iter = $self->iterator;
629 8         15 my @buffer = ();
630              
631             return $class->new(sub {
632 22     22   25 while (1) {
633             # Return from buffer if available
634 38 100       145 return shift @buffer if @buffer;
635              
636             # Get next outer element
637 24         38 my $outer_item = $iter->();
638 24 100       56 return undef unless defined $outer_item;
639              
640             # Find matching inner elements
641 16         32 my $key = $outer_key_selector->($outer_item);
642 16 50       54 $key = _make_key($key) if ref($key);
643              
644 16 100       37 if (exists $inner_hash{$key}) {
645 12         1194 for my $inner_item (@{$inner_hash{$key}}) {
  12         33  
646 14         57 push @buffer, $result_selector->($outer_item, $inner_item);
647             }
648             }
649             # If no match, continue to next outer element
650             }
651 8         41 });
652             }
653              
654             # GroupJoin - group join (LEFT OUTER JOIN-like operation)
655             sub GroupJoin {
656 30     30 1 41 my($self, $inner, $outer_key_selector, $inner_key_selector, $result_selector) = @_;
657 30         34 my $class = ref($self);
658 30         36 my $outer_iter = $self->iterator;
659              
660             # 1. Build lookup table from inner sequence.
661             # Group all inner items by their keys for efficient lookup.
662             # The inner sequence is fully materialized into memory here.
663 30         32 my %inner_lookup = ();
664             $inner->ForEach(sub {
665 81     81   76 my $item = shift;
666 81         92 my $key = $inner_key_selector->($item);
667 81 50       187 $key = _make_key($key) if ref($key);
668 81 100       96 $key = '' unless defined $key;
669 81         63 push @{$inner_lookup{$key}}, $item;
  81         133  
670 30         75 });
671              
672             # 2. Return lazy iterator over outer sequence
673             return $class->new(sub {
674 100     100   117 my $outer_item = $outer_iter->();
675 100 100       147 return undef unless defined $outer_item;
676              
677             # Get key from outer item
678 71         86 my $key = $outer_key_selector->($outer_item);
679 71 50       162 $key = _make_key($key) if ref($key);
680 71 100       80 $key = '' unless defined $key;
681              
682             # Get matching inner items (empty array ref if no matches)
683 71 100       109 my $matched_inners = exists $inner_lookup{$key} ? $inner_lookup{$key} : [];
684              
685             # Snapshot the matched items into a plain array.
686             # We create a LTSV::LINQ object whose iterator sub always reads
687             # from a fresh index variable, so the group can be traversed
688             # multiple times inside result_selector (e.g. Count() then Sum()).
689 71         84 my @snapshot = @$matched_inners;
690 71         107 my $inner_group = $class->_from_snapshot([ @snapshot ]);
691              
692 71         114 return $result_selector->($outer_item, $inner_group);
693 30         119 });
694             }
695              
696             ###############################################################################
697             # Quantifier Methods
698             ###############################################################################
699              
700             # All - test if all elements satisfy condition
701             sub All {
702 4     4 1 10 my($self, $predicate) = @_;
703 4         14 my $iter = $self->iterator;
704              
705 4         8 while (defined(my $item = $iter->())) {
706 8 100       16 return 0 unless $predicate->($item);
707             }
708 3         7 return 1;
709             }
710              
711             # Any - test if any element satisfies condition
712             sub Any {
713 19     19 1 43 my($self, $predicate) = @_;
714 19         24 my $iter = $self->iterator;
715              
716 19 100       40 if ($predicate) {
717 6         11 while (defined(my $item = $iter->())) {
718 9 100       13 return 1 if $predicate->($item);
719             }
720 2         10 return 0;
721             }
722             else {
723 13         16 my $item = $iter->();
724 13 100       32 return defined($item) ? 1 : 0;
725             }
726             }
727              
728             # Contains - check if sequence contains element
729             sub Contains {
730 5     5 1 9 my($self, $value, $comparer) = @_;
731              
732 5 100       7 if ($comparer) {
733 1     1   4 return $self->Any(sub { $comparer->($_[0], $value) });
  1         3  
734             }
735             else {
736             return $self->Any(sub {
737 6     6   7 my $item = $_[0];
738 6   33     50 return (!defined($item) && !defined($value)) ||
739             (defined($item) && defined($value) && $item eq $value);
740 4         12 });
741             }
742             }
743              
744             # SequenceEqual - compare two sequences for equality
745             sub SequenceEqual {
746 5     5 1 13 my($self, $second, $comparer) = @_;
747             $comparer ||= sub {
748 8     8   17 my($a, $b) = @_;
749 8   33     108 return (!defined($a) && !defined($b)) ||
750             (defined($a) && defined($b) && $a eq $b);
751 5   66     29 };
752              
753 5         13 my $iter1 = $self->iterator;
754 5         12 my $iter2 = $second->iterator;
755              
756 5         9 while (1) {
757 14         35 my $item1 = $iter1->();
758 14         25 my $item2 = $iter2->();
759              
760             # Both ended - equal
761 14 100 100     52 return 1 if !defined($item1) && !defined($item2);
762              
763             # One ended - not equal
764 11 100 66     42 return 0 if !defined($item1) || !defined($item2);
765              
766             # Compare items
767 10 100       20 return 0 unless $comparer->($item1, $item2);
768             }
769             }
770              
771             ###############################################################################
772             # Element Access Methods
773             ###############################################################################
774              
775             # First - get first element
776             sub First {
777 8     8 1 21 my($self, $predicate) = @_;
778 8         18 my $iter = $self->iterator;
779              
780 8 100       21 if ($predicate) {
781 3         6 while (defined(my $item = $iter->())) {
782 9 100       18 return $item if $predicate->($item);
783             }
784 1         13 die "No element satisfies the condition";
785             }
786             else {
787 5         10 my $item = $iter->();
788 5 100       26 return $item if defined $item;
789 1         7 die "Sequence contains no elements";
790             }
791             }
792              
793             # FirstOrDefault - get first element or default
794             sub FirstOrDefault {
795 4     4 1 8 my $self = shift;
796 4         5 my($predicate, $default);
797              
798 4 100       13 if (@_ >= 2) {
    50          
799             # Two arguments: ($predicate, $default)
800 2         5 ($predicate, $default) = @_;
801             }
802             elsif (@_ == 1) {
803             # One argument: distinguish CODE (predicate) vs non-CODE (default)
804 2 50       5 if (ref($_[0]) eq 'CODE') {
805 0         0 $predicate = $_[0];
806             }
807             else {
808 2         2 $default = $_[0];
809             }
810             }
811              
812 4         6 my $result = eval { $self->First($predicate) };
  4         9  
813 4 100       17 return $@ ? $default : $result;
814             }
815              
816             # Last - get last element
817             sub Last {
818 3     3 1 14 my($self, $predicate) = @_;
819 3         11 my @items = $self->ToArray();
820              
821 3 50       14 if ($predicate) {
822 0         0 for (my $i = $#items; $i >= 0; $i--) {
823 0 0       0 return $items[$i] if $predicate->($items[$i]);
824             }
825 0         0 die "No element satisfies the condition";
826             }
827             else {
828 3 50       12 die "Sequence contains no elements" unless @items;
829 3         14 return $items[-1];
830             }
831             }
832              
833             # LastOrDefault - return last element or default
834             sub LastOrDefault {
835 9     9 1 9 my $self = shift;
836 9         10 my($predicate, $default);
837              
838 9 100       24 if (@_ >= 2) {
    100          
839             # Two arguments: ($predicate, $default)
840 2         5 ($predicate, $default) = @_;
841             }
842             elsif (@_ == 1) {
843             # One argument: distinguish CODE (predicate) vs non-CODE (default)
844 3 100       7 if (ref($_[0]) eq 'CODE') {
845 2         3 $predicate = $_[0];
846             }
847             else {
848 1         1 $default = $_[0];
849             }
850             }
851              
852 9         15 my @items = $self->ToArray();
853              
854 9 100       15 if ($predicate) {
855 3         8 for (my $i = $#items; $i >= 0; $i--) {
856 7 100       20 return $items[$i] if $predicate->($items[$i]);
857             }
858 2         7 return $default;
859             }
860             else {
861 6 100       14 return @items ? $items[-1] : $default;
862             }
863             }
864              
865             # Single - return the only element
866             sub Single {
867 5     5 1 12 my($self, $predicate) = @_;
868 5         12 my $iter = $self->iterator;
869 5         8 my $found;
870 5         9 my $count = 0;
871              
872 5         11 while (defined(my $item = $iter->())) {
873 8 100 100     25 next if $predicate && !$predicate->($item);
874              
875 6         46 $count++;
876 6 100       15 if ($count > 1) {
877 2         24 die "Sequence contains more than one element";
878             }
879 4         9 $found = $item;
880             }
881              
882 3 100       38 die "Sequence contains no elements" if $count == 0;
883 2         5 return $found;
884             }
885              
886             # SingleOrDefault - return the only element or undef
887             sub SingleOrDefault {
888 4     4 1 8 my($self, $predicate) = @_;
889 4         19 my $iter = $self->iterator;
890 4         8 my $found;
891 4         7 my $count = 0;
892              
893 4         6 while (defined(my $item = $iter->())) {
894 6 100 100     15 next if $predicate && !$predicate->($item);
895              
896 4         9 $count++;
897 4 100       10 if ($count > 1) {
898 1         3 return undef; # More than one element
899             }
900 3         5 $found = $item;
901             }
902              
903 3 100       17 return $count == 1 ? $found : undef;
904             }
905              
906             # ElementAt - return element at specified index
907             sub ElementAt {
908 4     4 1 5 my($self, $index) = @_;
909 4 100       12 die "Index must be non-negative" if $index < 0;
910              
911 3         7 my $iter = $self->iterator;
912 3         5 my $current = 0;
913              
914 3         6 while (defined(my $item = $iter->())) {
915 5 100       26 return $item if $current == $index;
916 3         1526 $current++;
917             }
918              
919 1         9 die "Index out of range";
920             }
921              
922             # ElementAtOrDefault - return element at index or undef
923             sub ElementAtOrDefault {
924 3     3 1 4 my($self, $index) = @_;
925 3 100       6 return undef if $index < 0;
926              
927 2         4 my $iter = $self->iterator;
928 2         2 my $current = 0;
929              
930 2         3 while (defined(my $item = $iter->())) {
931 4 100       7 return $item if $current == $index;
932 3         3 $current++;
933             }
934              
935 1         3 return undef;
936             }
937              
938             ###############################################################################
939             # Aggregation Methods
940             ###############################################################################
941              
942             # Count - count elements
943             sub Count {
944 48     48 1 196 my($self, $predicate) = @_;
945              
946 48 50       77 if ($predicate) {
947 0         0 return $self->Where($predicate)->Count();
948             }
949              
950 48         58 my $count = 0;
951 48         64 my $iter = $self->iterator;
952 48         82 $count++ while defined $iter->();
953 48         205 return $count;
954             }
955              
956             # Sum - calculate sum
957             sub Sum {
958 9     9 1 21 my($self, $selector) = @_;
959 9   66 10   25 $selector ||= sub { $_[0] };
  10         14  
960              
961 9         8 my $sum = 0;
962             $self->ForEach(sub {
963 19     19   29 $sum += $selector->(shift);
964 9         27 });
965 9         33 return $sum;
966             }
967              
968             # Min - find minimum
969             sub Min {
970 1     1 1 8 my($self, $selector) = @_;
971 1   33 5   5 $selector ||= sub { $_[0] };
  5         6  
972              
973 1         1 my $min;
974             $self->ForEach(sub {
975 5     5   5 my $val = $selector->(shift);
976 5 100 100     12 $min = $val if !defined($min) || $val < $min;
977 1         3 });
978 1         4 return $min;
979             }
980              
981             # Max - find maximum
982             sub Max {
983 1     1 1 3 my($self, $selector) = @_;
984 1   33 5   4 $selector ||= sub { $_[0] };
  5         4  
985              
986 1         2 my $max;
987             $self->ForEach(sub {
988 5     5   6 my $val = $selector->(shift);
989 5 100 100     11 $max = $val if !defined($max) || $val > $max;
990 1         3 });
991 1         2 return $max;
992             }
993              
994             # Average - calculate average
995             sub Average {
996 1     1 1 1 my($self, $selector) = @_;
997 1   33 3   5 $selector ||= sub { $_[0] };
  3         2  
998              
999 1         1 my $sum = 0;
1000 1         2 my $count = 0;
1001             $self->ForEach(sub {
1002 3     3   4 $sum += $selector->(shift);
1003 3         4 $count++;
1004 1         3 });
1005              
1006 1 50       2 die "Sequence contains no elements" if $count == 0;
1007 1         4 return $sum / $count;
1008             }
1009              
1010             # AverageOrDefault - calculate average or return undef if empty
1011             sub AverageOrDefault {
1012 2     2 1 5 my($self, $selector) = @_;
1013 2   33 3   13 $selector ||= sub { $_[0] };
  3         6  
1014              
1015 2         4 my $sum = 0;
1016 2         4 my $count = 0;
1017             $self->ForEach(sub {
1018 3     3   6 $sum += $selector->(shift);
1019 3         7 $count++;
1020 2         9 });
1021              
1022 2 100       13 return undef if $count == 0;
1023 1         5 return $sum / $count;
1024             }
1025              
1026             # Aggregate - apply accumulator function over sequence
1027             sub Aggregate {
1028 7     7 1 26 my($self, @args) = @_;
1029              
1030 7         12 my($seed, $func, $result_selector);
1031              
1032 7 100       24 if (@args == 1) {
    100          
    50          
1033             # Aggregate($func) - use first element as seed
1034 2         5 $func = $args[0];
1035 2         5 my $iter = $self->iterator;
1036 2         4 $seed = $iter->();
1037 2 100       18 die "Sequence contains no elements" unless defined $seed;
1038              
1039             # Continue with rest of elements
1040 1         5 while (defined(my $item = $iter->())) {
1041 3         10 $seed = $func->($seed, $item);
1042             }
1043             }
1044             elsif (@args == 2) {
1045             # Aggregate($seed, $func)
1046 4         9 ($seed, $func) = @args;
1047             $self->ForEach(sub {
1048 11     11   23 $seed = $func->($seed, shift);
1049 4         16 });
1050             }
1051             elsif (@args == 3) {
1052             # Aggregate($seed, $func, $result_selector)
1053 1         3 ($seed, $func, $result_selector) = @args;
1054             $self->ForEach(sub {
1055 3     3   9 $seed = $func->($seed, shift);
1056 1         5 });
1057             }
1058             else {
1059 0         0 die "Invalid number of arguments for Aggregate";
1060             }
1061              
1062 6 100       30 return $result_selector ? $result_selector->($seed) : $seed;
1063             }
1064              
1065             ###############################################################################
1066             # Conversion Methods
1067             ###############################################################################
1068              
1069             # ToArray - convert to array
1070             sub ToArray {
1071 274     274 1 539 my($self) = @_;
1072 274         328 my @result;
1073 274         449 my $iter = $self->iterator;
1074              
1075 274         1578 while (defined(my $item = $iter->())) {
1076 874         2181 push @result, $item;
1077             }
1078 272         912 return @result;
1079             }
1080              
1081             # ToList - convert to array reference
1082             sub ToList {
1083 0     0 1 0 my($self) = @_;
1084 0         0 return [$self->ToArray()];
1085             }
1086              
1087             # ToDictionary - convert sequence to hash reference
1088             sub ToDictionary {
1089 5     5 1 13 my($self, $key_selector, $value_selector) = @_;
1090              
1091             # Default value selector returns the element itself
1092 5   66 2   23 $value_selector ||= sub { $_[0] };
  2         5  
1093              
1094 5         10 my %dictionary = ();
1095              
1096             $self->ForEach(sub {
1097 11     11   16 my $item = shift;
1098 11         22 my $key = $key_selector->($item);
1099 11         43 my $value = $value_selector->($item);
1100              
1101             # Convert undef key to empty string
1102 11 50       37 $key = '' unless defined $key;
1103              
1104             # Later values overwrite earlier ones (Perl hash behavior)
1105 11         34 $dictionary{$key} = $value;
1106 5         22 });
1107              
1108 5         41 return { %dictionary };
1109             }
1110              
1111             # ToLookup - convert sequence to hash of arrays
1112             sub ToLookup {
1113 5     5 1 11 my($self, $key_selector, $value_selector) = @_;
1114              
1115             # Default value selector returns the element itself
1116 5   66 5   25 $value_selector ||= sub { $_[0] };
  5         9  
1117              
1118 5         11 my %lookup = ();
1119              
1120             $self->ForEach(sub {
1121 9     9   16 my $item = shift;
1122 9         18 my $key = $key_selector->($item);
1123 9         35 my $value = $value_selector->($item);
1124              
1125             # Convert undef key to empty string
1126 9 50       28 $key = '' unless defined $key;
1127              
1128 9         12 push @{$lookup{$key}}, $value;
  9         36  
1129 5         25 });
1130              
1131 5         43 return { %lookup };
1132             }
1133              
1134             # DefaultIfEmpty - return default value if empty
1135             sub DefaultIfEmpty {
1136 6     6 1 11 my($self, $default_value) = @_;
1137             # default_value defaults to undef
1138 6         11 my $has_default_arg = @_ > 1;
1139 6 100       24 if (!$has_default_arg) {
1140 1         2 $default_value = undef;
1141             }
1142              
1143 6         11 my $class = ref($self);
1144 6         9 my $iter = $self->iterator;
1145 6         8 my $has_elements = 0;
1146 6         8 my $returned_default = 0;
1147              
1148             return $class->new(sub {
1149 11     11   18 my $item = $iter->();
1150 11 100       20 if (defined $item) {
1151 2         6 $has_elements = 1;
1152 2         7 return $item;
1153             }
1154              
1155             # EOF reached
1156 9 100 100     27 if (!$has_elements && !$returned_default) {
1157 5         6 $returned_default = 1;
1158 5         13 return $default_value;
1159             }
1160              
1161 4         8 return undef;
1162 6         32 });
1163             }
1164              
1165             # ToLTSV - write to LTSV file
1166             sub ToLTSV {
1167 10     10 1 19 my($self, $file, %opts) = @_;
1168              
1169             # label_order => [...] specifies output label order.
1170             # headers => [...] is an alias for label_order (CSV-LINQ interop).
1171             my $label_order = defined $opts{label_order} ? $opts{label_order}
1172             : defined $opts{headers} ? $opts{headers}
1173 10 100       24 : undef;
    100          
1174              
1175 10         13 my $fhn = _open_fh('>', $file, 1);
1176              
1177             $self->ForEach(sub {
1178 12     12   14 my $record = shift;
1179             # LTSV spec: tab is the field separator; newline terminates the record.
1180             # Sanitize values to prevent structural corruption of the output file.
1181 12         10 my @keys;
1182 12 100       17 if (defined $label_order) {
1183             # Output specified labels first (in given order), skipping those
1184             # absent from the record; then append remaining keys (sorted).
1185 3         4 my %in_order = map { $_ => 1 } @{$label_order};
  12         20  
  3         6  
1186 3         7 my @extra = sort grep { !$in_order{$_} } keys %$record;
  11         15  
1187 3         4 @keys = ((grep { exists $record->{$_} } @{$label_order}), @extra);
  12         23  
  3         3  
1188             }
1189             else {
1190 9         33 @keys = sort keys %$record;
1191             }
1192             my $line = join("\t", map {
1193 12 100       21 my $v = defined($record->{$_}) ? $record->{$_} : '';
  25         41  
1194 25         56 $v =~ s/[\t\n\r]/ /g;
1195 25         67 "$_:$v"
1196             } @keys);
1197 14     14   155 no strict 'refs'; print {*{$fhn}} $line, "\n";
  14         25  
  14         1672  
  12         15  
  12         12  
  12         114  
1198 10         54 });
1199              
1200 14     14   85 { no strict 'refs'; close($fhn) }
  14         25  
  14         27100  
  10         64  
  10         505  
1201 10         34 return 1;
1202             }
1203              
1204             ###############################################################################
1205             # Utility Methods
1206             ###############################################################################
1207              
1208             # ForEach - execute action for each element
1209             sub ForEach {
1210 92     92 1 145 my($self, $action) = @_;
1211 92         215 my $iter = $self->iterator;
1212              
1213 92         181 while (defined(my $item = $iter->())) {
1214 223         304 $action->($item);
1215             }
1216 92         140 return;
1217             }
1218              
1219             1;
1220              
1221             ######################################################################
1222             #
1223             # LTSV::LINQ::Ordered - Ordered query supporting ThenBy/ThenByDescending
1224             #
1225             # Returned by OrderBy* methods. Inherits all LTSV::LINQ methods via @ISA.
1226             # ThenBy* methods are only available on this class, mirroring the way
1227             # .NET LINQ's IOrderedEnumerable exposes ThenBy/ThenByDescending while
1228             # plain IEnumerable does not.
1229             #
1230             # Stability guarantee: every sort uses a Schwartzian-Transform-style
1231             # decorated array that appends the original element index as a final
1232             # tie-breaker. This makes the multi-key sort completely stable on all
1233             # Perl versions including 5.005_03, where built-in sort stability is not
1234             # guaranteed.
1235             ######################################################################
1236              
1237             package LTSV::LINQ::Ordered;
1238              
1239             # 5.005_03-compatible inheritance (no 'use parent', no 'our')
1240             @LTSV::LINQ::Ordered::ISA = ('LTSV::LINQ');
1241              
1242             # _new_ordered($items_aref, $specs_aref) - internal constructor
1243             #
1244             # $specs_aref is an arrayref of sort-spec hashrefs:
1245             # { sel => $code_ref, # key selector: ($item) -> $key
1246             # dir => 1 or -1, # 1 = ascending, -1 = descending
1247             # type => 'smart'|'str'|'num' # comparison family
1248             # }
1249             sub _new_ordered {
1250 100     100   218 my($class, $items, $specs) = @_;
1251             # Use _factory so that iterator() returns a fresh sorted iterator on
1252             # each call (enables re-iteration, e.g. in GroupJoin result selectors).
1253             # Methods like Take/Where/Select that call ref($self)->new(sub{...})
1254             # will create a plain object with an {iterator} field (no _factory),
1255             # so they are unaffected by this override.
1256             return bless {
1257             _items => $items,
1258             _specs => $specs,
1259             _factory => sub {
1260 73     73   174 my @sorted = _perform_sort($items, $specs);
1261 73         130 my $i = 0;
1262 73 100       357 return sub { $i < scalar(@sorted) ? $sorted[$i++] : undef };
  361         813  
1263             },
1264 100         1961 }, $class;
1265             }
1266              
1267             # _perform_sort($items_aref, $specs_aref) - core stable multi-key sort
1268             #
1269             # Decorated-array (Schwartzian Transform) technique:
1270             # 1. Build [ orig_index, [key1, key2, ..., keyN], item ] per element
1271             # 2. Sort by key1..keyN in sequence; original index as final tie-breaker
1272             # 3. Strip decoration and return plain item list
1273             #
1274             # The original-index tie-breaker guarantees stability on every Perl version.
1275             sub _perform_sort {
1276 73     73   111 my($items, $specs) = @_;
1277              
1278             # Step 1: decorate
1279             my @decorated = map {
1280 317         428 my $idx = $_;
1281 317         449 my $item = $items->[$idx];
1282 317         368 my @keys = map { _extract_key($_->{sel}->($item), $_->{type}) } @{$specs};
  472         993  
  317         448  
1283 317         960 [$idx, [ @keys ], $item]
1284 73         112 } 0 .. $#{$items};
  73         166  
1285              
1286             # Step 2: sort
1287             my @sorted_dec = sort {
1288 73         260 my $r = 0;
  464         601  
1289 464         543 for my $i (0 .. $#{$specs}) {
  464         888  
1290 570         1171 my $cmp = _compare_keys($a->[1][$i], $b->[1][$i], $specs->[$i]{type});
1291 570 100       1192 if ($specs->[$i]{dir} < 0) { $cmp = -$cmp }
  88         111  
1292 570 100       1010 if ($cmp != 0) { $r = $cmp; last }
  445         562  
  445         628  
1293             }
1294 464 100       1010 $r != 0 ? $r : ($a->[0] <=> $b->[0]);
1295             } @decorated;
1296              
1297             # Step 3: undecorate
1298 73         129 return map { $_->[2] } @sorted_dec;
  317         745  
1299             }
1300              
1301             # _extract_key($raw_value, $type) - normalise one sort key
1302             #
1303             # Returns a scalar (num/str) or a two-element arrayref [flag, value]
1304             # for 'smart' type:
1305             # [0, $numeric_val] - key is numeric
1306             # [1, $string_val ] - key is string
1307             sub _extract_key {
1308 472     472   1630 my($val, $type) = @_;
1309 472 100       848 $val = '' unless defined $val;
1310 472 100       881 if ($type eq 'num') {
    100          
1311             # Force numeric; undef/empty/non-numeric treated as 0
1312 116 100 66     500 return defined($val) && length($val) ? $val + 0 : 0;
1313             }
1314             elsif ($type eq 'str') {
1315 160         399 return "$val";
1316             }
1317             else {
1318             # smart: detect whether value looks like a number
1319 196         254 my $t = $val;
1320 196         728 $t =~ s/^\s+|\s+$//g;
1321 196 100       1361 if ($t =~ /^[+-]?(?:\d+\.?\d*|\d*\.\d+)(?:[eE][+-]?\d+)?$/) {
1322 45         1274 return [0, $t + 0];
1323             }
1324             else {
1325 151         570 return [1, "$val"];
1326             }
1327             }
1328             }
1329              
1330             # _compare_keys($ka, $kb, $type) - compare two extracted keys
1331             sub _compare_keys {
1332 570     570   952 my($ka, $kb, $type) = @_;
1333 570 100       1034 if ($type eq 'num') {
    100          
1334 127         183 return $ka <=> $kb;
1335             }
1336             elsif ($type eq 'str') {
1337 182         311 return $ka cmp $kb;
1338             }
1339             else {
1340             # smart: both are [flag, value] arrayrefs
1341 261         412 my $fa = $ka->[0]; my $va = $ka->[1];
  261         425  
1342 261         393 my $fb = $kb->[0]; my $vb = $kb->[1];
  261         373  
1343 261 100 66     912 if ($fa == 0 && $fb == 0) { return $va <=> $vb } # both numeric
  60 50 33     123  
1344 201         511 elsif ($fa == 1 && $fb == 1) { return $va cmp $vb } # both string
1345 0         0 else { return $fa <=> $fb } # mixed: numeric before string
1346             }
1347             }
1348              
1349             # (No iterator() override needed: _factory in {_items,_specs,_factory} objects
1350             # is handled by LTSV::LINQ::iterator(), which calls _factory->() each time.
1351             # Objects produced by Take/Where/Select etc. via ref($self)->new(sub{...})
1352             # store their closure in {iterator} and do not have _factory, so they use
1353             # the normal non-re-entrant path.)
1354              
1355             # _thenby($key_selector, $dir, $type) - shared implementation for all ThenBy*
1356             #
1357             # Non-destructive: builds a new spec list and returns a new
1358             # LTSV::LINQ::Ordered object. The original object is unchanged, so
1359             # branching sort chains work correctly:
1360             #
1361             # my $by_dept = From(\@data)->OrderBy(sub { $_[0]{dept} });
1362             # my $by_dept_name = $by_dept->ThenBy(sub { $_[0]{name} });
1363             # my $by_dept_salary = $by_dept->ThenByNum(sub { $_[0]{salary} });
1364             # # $by_dept_name and $by_dept_salary are independent queries
1365             sub _thenby {
1366 27     27   59 my($self, $key_selector, $dir, $type) = @_;
1367 27         43 my @new_specs = (@{$self->{_specs}}, { sel => $key_selector, dir => $dir, type => $type });
  27         129  
1368 27         113 return LTSV::LINQ::Ordered->_new_ordered($self->{_items}, [ @new_specs ]);
1369             }
1370              
1371             # ThenBy - ascending secondary key, smart comparison
1372 12     12   35 sub ThenBy { my($s, $k)=@_; $s->_thenby($k, 1, 'smart') }
  12         28  
1373              
1374             # ThenByDescending - descending secondary key, smart comparison
1375 1     1   3 sub ThenByDescending { my($s, $k)=@_; $s->_thenby($k, -1, 'smart') }
  1         3  
1376              
1377             # ThenByStr - ascending secondary key, string comparison
1378 6     6   15 sub ThenByStr { my($s, $k)=@_; $s->_thenby($k, 1, 'str') }
  6         17  
1379              
1380             # ThenByStrDescending - descending secondary key, string comparison
1381 1     1   2 sub ThenByStrDescending { my($s, $k)=@_; $s->_thenby($k, -1, 'str') }
  1         2  
1382              
1383             # ThenByNum - ascending secondary key, numeric comparison
1384 5     5   35 sub ThenByNum { my($s, $k)=@_; $s->_thenby($k, 1, 'num') }
  5         12  
1385              
1386             # ThenByNumDescending - descending secondary key, numeric comparison
1387 2     2   5 sub ThenByNumDescending { my($s, $k)=@_; $s->_thenby($k, -1, 'num') }
  2         7  
1388              
1389             1;
1390              
1391             =encoding utf-8
1392              
1393             =head1 NAME
1394              
1395             LTSV::LINQ - LINQ-style query interface for LTSV files
1396              
1397             =head1 VERSION
1398              
1399             Version 1.09
1400              
1401             =head1 SYNOPSIS
1402              
1403             use LTSV::LINQ;
1404              
1405             # Read LTSV file and query
1406             my @results = LTSV::LINQ->FromLTSV("access.log")
1407             ->Where(sub { $_[0]{status} eq '200' })
1408             ->Select(sub { $_[0]{url} })
1409             ->Distinct()
1410             ->ToArray();
1411              
1412             # DSL syntax for simple filtering
1413             my @errors = LTSV::LINQ->FromLTSV("access.log")
1414             ->Where(status => '404')
1415             ->ToArray();
1416              
1417             # Grouping and aggregation
1418             my @stats = LTSV::LINQ->FromLTSV("access.log")
1419             ->GroupBy(sub { $_[0]{status} })
1420             ->Select(sub {
1421             my $g = shift;
1422             return {
1423             Status => $g->{Key},
1424             Count => scalar(@{$g->{Elements}})
1425             };
1426             })
1427             ->OrderByDescending(sub { $_[0]{Count} })
1428             ->ToArray();
1429              
1430             =head1 TABLE OF CONTENTS
1431              
1432             =over 4
1433              
1434             =item * L
1435              
1436             =item * L -- eg/ samples and doc/ cheat sheets
1437              
1438             =item * L -- Complete method reference (60 methods)
1439              
1440             =item * L -- 8 practical examples
1441              
1442             =item * L -- Lazy evaluation, method chaining, DSL
1443              
1444             =item * L -- Iterator design, execution flow
1445              
1446             =item * L -- Memory usage, optimization tips
1447              
1448             =item * L -- Perl 5.005+ support, pure Perl
1449              
1450             =item * L -- Error messages
1451              
1452             =item * L -- Common questions and answers
1453              
1454             =item * L -- Common patterns
1455              
1456             =item * L
1457              
1458             =item * L
1459              
1460             =item * L
1461              
1462             =item * L
1463              
1464             =item * L
1465              
1466             =back
1467              
1468             =head1 DESCRIPTION
1469              
1470             LTSV::LINQ provides a LINQ-style query interface for LTSV (Labeled
1471             Tab-Separated Values) files. It offers a fluent, chainable API for
1472             filtering, transforming, and aggregating LTSV data.
1473              
1474             Key features:
1475              
1476             =over 4
1477              
1478             =item * B - O(1) memory usage for most operations
1479              
1480             =item * B - Fluent, readable query composition
1481              
1482             =item * B - Simple key-value filtering
1483              
1484             =item * B<60 LINQ methods> - Comprehensive query capabilities
1485              
1486             =item * B - No XS dependencies
1487              
1488             =item * B - Works on ancient and modern Perl
1489              
1490             =back
1491              
1492             =head2 What is LTSV?
1493              
1494             LTSV (Labeled Tab-Separated Values) is a text format for structured logs and
1495             data records. Each line consists of tab-separated fields, where each field is
1496             a C pair. A single LTSV record occupies exactly one line.
1497              
1498             B
1499              
1500             time:2026-02-13T10:00:00 host:192.0.2.1 status:200 url:/index.html bytes:1024
1501              
1502             =head3 LTSV Characteristics
1503              
1504             =over 4
1505              
1506             =item * B
1507              
1508             A complete record is always a single newline-terminated line. This makes
1509             streaming processing trivial: read a line, parse it, process it, discard it.
1510             There is no multi-line quoting problem, no block parser required.
1511              
1512             =item * B
1513              
1514             Fields are separated by a single horizontal tab character (C<0x09>).
1515             The tab is a C0 control character in the ASCII range (C<0x00>-C<0x7F>),
1516             which has an important consequence for multibyte character encodings.
1517              
1518             =item * B
1519              
1520             Within each field, the label and value are separated by a single colon
1521             (C<0x3A>, US-ASCII C<:>). This is also a plain ASCII character with the same
1522             multibyte-safety guarantees as the tab.
1523              
1524             =back
1525              
1526             =head3 LTSV Advantages
1527              
1528             =over 4
1529              
1530             =item * B
1531              
1532             This is perhaps the most important technical advantage of LTSV over formats
1533             such as CSV (comma-delimited) or TSV without labels.
1534              
1535             In many multibyte character encodings used across Asia and beyond, a
1536             single logical character is represented by a sequence of two or more bytes.
1537             The danger in older encodings is that a byte within a multibyte sequence can
1538             coincidentally equal the byte value of an ASCII delimiter, causing a naive
1539             byte-level parser to split the field in the wrong place.
1540              
1541             The following table shows well-known encodings and their byte ranges:
1542              
1543             Encoding First byte range Following byte range
1544             ---------- -------------------- -------------------------------
1545             Big5 0x81-0xFE 0x40-0x7E, 0xA1-0xFE
1546             Big5-HKSCS 0x81-0xFE 0x40-0x7E, 0xA1-0xFE
1547             CP932X 0x81-0x9F, 0xE0-0xFC 0x40-0x7E, 0x80-0xFC
1548             EUC-JP 0x8E-0x8F, 0xA1-0xFE 0xA1-0xFE
1549             GB 18030 0x81-0xFE 0x30-0x39, 0x40-0x7E, 0x80-0xFE
1550             GBK 0x81-0xFE 0x40-0x7E, 0x80-0xFE
1551             Shift_JIS 0x81-0x9F, 0xE0-0xFC 0x40-0x7E, 0x80-0xFC
1552             RFC 2279 0xC2-0xF4 0x80-0xBF
1553             UHC 0x81-0xFE 0x41-0x5A, 0x61-0x7A, 0x81-0xFE
1554             UTF-8 0xC2-0xF4 0x80-0xBF
1555             WTF-8 0xC2-0xF4 0x80-0xBF
1556              
1557             The tab character is C<0x09>. The colon is C<0x3A>. Both values are
1558             strictly below C<0x40>, the lower bound of any following byte in the encodings
1559             listed above. Neither C<0x09> nor C<0x3A> appears anywhere as a first byte
1560             either. Therefore:
1561              
1562             TAB (0x09) never appears as a byte within any multibyte character
1563             in Big5, Big5-HKSCS, CP932X, EUC-JP, GB 18030, GBK, Shift_JIS,
1564             RFC 2279, UHC, UTF-8, or WTF-8.
1565             ':' (0x3A) never appears as a byte within any multibyte character
1566             in the same set of encodings.
1567              
1568             This means that LTSV files containing values in B of those encodings
1569             can be parsed correctly by a B on tab and colon,
1570             with no knowledge of the encoding whatsoever. There is no need to decode
1571             the text before parsing, and no risk of a misidentified delimiter.
1572              
1573             By contrast, CSV has encoding problems of a different kind.
1574             The comma (C<0x2C>) and the double-quote (C<0x22>) do B appear as
1575             following bytes in Shift_JIS or Big5, so they are not directly confused with
1576             multibyte character content. However, the backslash (C<0x5C>) B
1577             appear as a valid following byte in both Shift_JIS (following byte range
1578             C<0x40>-C<0x7E> includes C<0x5C>) and Big5 (same range). Many CSV
1579             parsers and the C runtime on Windows use backslash or backslash-like
1580             sequences for escaping, so a naive byte-level search for the escape
1581             character can be misled by a multibyte character whose second byte is
1582             C<0x5C>. Beyond this, CSV's quoting rules are underspecified (RFC 4180
1583             vs. Excel vs. custom dialects differ), which makes writing a correct,
1584             encoding-aware CSV parser considerably harder than parsing LTSV.
1585             LTSV sidesteps all of these issues by choosing delimiters (tab and colon)
1586             that fall below C<0x40>, outside every following-byte range of every traditional
1587             multibyte encoding.
1588              
1589             UTF-8 is safe for all ASCII delimiters because continuation bytes are
1590             always in the range C<0x80>-C<0xBF>, never overlapping ASCII. But LTSV's
1591             choice of tab and colon also makes it safe for the traditional multibyte
1592             encodings that predate Unicode, which is critical for systems that still
1593             operate on traditional-encoded data.
1594              
1595             =item * B
1596              
1597             Every field carries its own label. A record is human-readable without a
1598             separate schema or header line. Fields can appear in any order, and
1599             optional fields can simply be omitted. Adding a new field to some records
1600             does not break parsers that do not know about it.
1601              
1602             =item * B
1603              
1604             Because each record is one line, LTSV files can be processed with line-by-line
1605             streaming. Memory usage is proportional to the longest single record, not
1606             the total file size. This is why C in this module uses a lazy
1607             iterator rather than loading the whole file.
1608              
1609             =item * B
1610              
1611             Standard Unix text tools (C, C, C, C, C) work
1612             naturally on LTSV files. A field can be located with a pattern like
1613             C without any special parser. This makes ad-hoc
1614             analysis and shell scripting straightforward.
1615              
1616             =item * B
1617              
1618             CSV requires quoting fields that contain commas or newlines, and the quoting
1619             rules differ between implementations (RFC 4180 vs. Microsoft Excel vs. others).
1620             LTSV has no quoting: the tab delimiter and the colon separator do not appear
1621             inside values in any of the supported encodings (by the multibyte-safety
1622             argument above), so no escaping mechanism is needed.
1623              
1624             =item * B
1625              
1626             LTSV originated in the Japanese web industry as a structured log format for
1627             HTTP access logs. Many web servers (Apache, Nginx) and log aggregation tools
1628             support LTSV output or parsing. The format is particularly popular for
1629             application and infrastructure logging where grep-ability and streaming
1630             analysis matter.
1631              
1632             =back
1633              
1634             For the formal LTSV specification, see L.
1635              
1636             =head2 What is LINQ?
1637              
1638             LINQ (Language Integrated Query) is a set of query capabilities introduced
1639             in the .NET Framework 3.5 (C# 3.0, 2007) by Microsoft. It defines a
1640             unified model for querying and transforming data from diverse sources --
1641             in-memory collections, relational databases (LINQ to SQL), XML documents
1642             (LINQ to XML), and more -- using a single, consistent API.
1643              
1644             This module brings LINQ-style querying to Perl, applied specifically to
1645             LTSV data sources.
1646              
1647             =head3 LINQ Characteristics
1648              
1649             =over 4
1650              
1651             =item * B
1652              
1653             LINQ provides a single set of operators that works uniformly across
1654             data sources. Whether the source is an array, a file, or a database,
1655             the same C, C
1656             LTSV::LINQ follows this principle: the same methods work on in-memory
1657             arrays (C) and LTSV files (C) alike.
1658              
1659             =item * B
1660              
1661             LINQ queries express I to retrieve, not I to retrieve it.
1662             A query like C<-EWhere(sub { $_[0]{status} >= 400 })-ESelect(...)>
1663             describes the intent clearly, without explicit loop management.
1664             This reduces cognitive overhead and makes queries easier to read and verify.
1665              
1666             =item * B
1667              
1668             Each LINQ operator takes a sequence and returns a new sequence (or a
1669             scalar result for terminal operators). Because operators are ordinary
1670             method calls that return objects, they compose naturally:
1671              
1672             $query->Where(...)->Select(...)->OrderBy(...)->GroupBy(...)->ToArray()
1673              
1674             Any intermediate result is itself a valid query object, ready for
1675             further transformation or immediate consumption.
1676              
1677             =item * B
1678              
1679             Intermediate operators (C, C
1680             execute immediately. They construct a chain of iterator closures.
1681             Evaluation is deferred until a terminal operator (C, C,
1682             C, C, C, etc.) pulls items through the chain.
1683             This means:
1684              
1685             =over 4
1686              
1687             =item - Memory usage is bounded by the window of data in flight, not by the
1688             total data size. A CSelect-ETake(10)> over a million-line
1689             file reads at most 10 records past the first matching one.
1690              
1691             =item - Short-circuiting is free. C stops at the first match.
1692             C stops as soon as one match is found.
1693              
1694             =item - Pipelines can be built without executing them, and executed
1695             multiple times by wrapping in a factory (see C<_from_snapshot>).
1696              
1697             =back
1698              
1699             =item * B
1700              
1701             LINQ's design makes chaining natural. In C# this is supported by
1702             extension methods; in Perl it is supported by returning C<$self>-class
1703             objects from every intermediate operator. The result is readable,
1704             left-to-right query expressions.
1705              
1706             =item * B
1707              
1708             A LINQ query object is a description of a computation, not its result.
1709             You can pass query objects around, inspect them, extend them, and decide
1710             later when to execute them. This separation is valuable in library and
1711             framework code.
1712              
1713             =back
1714              
1715             =head3 LINQ Advantages for LTSV Processing
1716              
1717             =over 4
1718              
1719             =item * B
1720              
1721             LTSV log analysis often involves the same logical steps: filter records
1722             by a condition, extract a field, aggregate. LINQ methods map directly
1723             onto these steps, making the code read like a description of the analysis.
1724              
1725             =item * B
1726              
1727             Web server access logs can be gigabytes in size. LTSV::LINQ's lazy
1728             C iterator reads one line at a time. Combined with C
1729             and C, only the needed records are ever in memory simultaneously.
1730              
1731             =item * B
1732              
1733             Unlike C# LINQ (which has query comprehension syntax C
1734             select ...>), LTSV::LINQ works with ordinary Perl method calls and
1735             anonymous subroutines. There is no source filter, no parser extension,
1736             and no dependency on modern Perl features. The same code runs on Perl
1737             5.005_03 and Perl 5.40.
1738              
1739             =item * B
1740              
1741             A C clause stored in a variable can be applied to multiple
1742             data sources. Query logic can be parameterized and reused across scripts.
1743              
1744             =back
1745              
1746             For the original LINQ documentation, see
1747             L.
1748              
1749             =head1 INCLUDED DOCUMENTATION
1750              
1751             The C directory contains sample programs demonstrating LTSV::LINQ features:
1752              
1753             eg/01_ltsv_query.pl LTSV file query: FromLTSV/Where/Select/OrderByNumDescending/
1754             Distinct/ToLookup
1755             eg/02_array_query.pl In-memory array queries, aggregation (Sum/Average/Min/Max),
1756             Any/All, Skip/Take paging, Zip
1757             eg/03_grouping.pl GroupBy, ToLookup, GroupJoin (left outer join),
1758             SelectMany with array-ref selector
1759             eg/04_sorting.pl OrderBy/ThenBy multi-key sort, OrderByNum vs OrderByStr,
1760             Reverse
1761              
1762             The C directory contains LTSV::LINQ cheat sheets in 21 languages:
1763              
1764             doc/linq_cheatsheet.EN.txt English
1765             doc/linq_cheatsheet.JA.txt Japanese
1766             doc/linq_cheatsheet.ZH.txt Chinese (Simplified)
1767             doc/linq_cheatsheet.TW.txt Chinese (Traditional)
1768             doc/linq_cheatsheet.KO.txt Korean
1769             doc/linq_cheatsheet.FR.txt French
1770             doc/linq_cheatsheet.ID.txt Indonesian
1771             doc/linq_cheatsheet.VI.txt Vietnamese
1772             doc/linq_cheatsheet.TH.txt Thai
1773             doc/linq_cheatsheet.HI.txt Hindi
1774             doc/linq_cheatsheet.BN.txt Bengali
1775             doc/linq_cheatsheet.TR.txt Turkish
1776             doc/linq_cheatsheet.MY.txt Malay
1777             doc/linq_cheatsheet.TL.txt Filipino
1778             doc/linq_cheatsheet.KM.txt Khmer
1779             doc/linq_cheatsheet.MN.txt Mongolian
1780             doc/linq_cheatsheet.NE.txt Nepali
1781             doc/linq_cheatsheet.SI.txt Sinhala
1782             doc/linq_cheatsheet.UR.txt Urdu
1783             doc/linq_cheatsheet.UZ.txt Uzbek
1784             doc/linq_cheatsheet.BM.txt Burmese
1785              
1786             Each cheat sheet covers: creating queries, filtering, projection, sorting,
1787             paging, grouping, set operations, joins, aggregation, and links to the
1788             official LTSV and LINQ specifications.
1789              
1790             =head1 METHODS
1791              
1792             =head2 Complete Method Reference
1793              
1794             This module implements 60 LINQ-style methods organized into 15 categories:
1795              
1796             =over 4
1797              
1798             =item * B: From, FromLTSV, Range, Empty, Repeat
1799              
1800             =item * B: Where (with DSL)
1801              
1802             =item * B: Select, SelectMany
1803              
1804             =item * B: Concat, Zip
1805              
1806             =item * B: Take, Skip, TakeWhile, SkipWhile
1807              
1808             =item * B: OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending, OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending, ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending
1809              
1810             =item * B: GroupBy
1811              
1812             =item * B: Distinct, Union, Intersect, Except
1813              
1814             =item * B: Join, GroupJoin
1815              
1816             =item * B: All, Any, Contains
1817              
1818             =item * B: SequenceEqual
1819              
1820             =item * B: First, FirstOrDefault, Last, LastOrDefault, Single, SingleOrDefault, ElementAt, ElementAtOrDefault
1821              
1822             =item * B: Count, Sum, Min, Max, Average, AverageOrDefault, Aggregate
1823              
1824             =item * B: ToArray, ToList, ToDictionary, ToLookup, ToLTSV, DefaultIfEmpty
1825              
1826             =item * B: ForEach
1827              
1828             =back
1829              
1830             B
1831              
1832             Method Category Lazy? Returns
1833             ===================== ============== ===== ================
1834             From Data Source Yes Query
1835             FromLTSV Data Source Yes Query
1836             Range Data Source Yes Query
1837             Empty Data Source Yes Query
1838             Repeat Data Source Yes Query
1839             Where Filtering Yes Query
1840             Select Projection Yes Query
1841             SelectMany Projection Yes Query
1842             Concat Concatenation Yes Query
1843             Zip Concatenation Yes Query
1844             Take Partitioning Yes Query
1845             Skip Partitioning Yes Query
1846             TakeWhile Partitioning Yes Query
1847             SkipWhile Partitioning Yes Query
1848             OrderBy Ordering No* OrderedQuery
1849             OrderByDescending Ordering No* OrderedQuery
1850             OrderByStr Ordering No* OrderedQuery
1851             OrderByStrDescending Ordering No* OrderedQuery
1852             OrderByNum Ordering No* OrderedQuery
1853             OrderByNumDescending Ordering No* OrderedQuery
1854             Reverse Ordering No* Query
1855             ThenBy Ordering No* OrderedQuery
1856             ThenByDescending Ordering No* OrderedQuery
1857             ThenByStr Ordering No* OrderedQuery
1858             ThenByStrDescending Ordering No* OrderedQuery
1859             ThenByNum Ordering No* OrderedQuery
1860             ThenByNumDescending Ordering No* OrderedQuery
1861             GroupBy Grouping No* Query
1862             Distinct Set Operation Yes Query
1863             Union Set Operation No* Query
1864             Intersect Set Operation No* Query
1865             Except Set Operation No* Query
1866             Join Join No* Query
1867             GroupJoin Join No* Query
1868             All Quantifier No Boolean
1869             Any Quantifier No Boolean
1870             Contains Quantifier No Boolean
1871             SequenceEqual Comparison No Boolean
1872             First Element Access No Element
1873             FirstOrDefault Element Access No Element
1874             Last Element Access No* Element
1875             LastOrDefault Element Access No* Element or undef
1876             Single Element Access No* Element
1877             SingleOrDefault Element Access No* Element or undef
1878             ElementAt Element Access No* Element
1879             ElementAtOrDefault Element Access No* Element or undef
1880             Count Aggregation No Integer
1881             Sum Aggregation No Number
1882             Min Aggregation No Number
1883             Max Aggregation No Number
1884             Average Aggregation No Number
1885             AverageOrDefault Aggregation No Number or undef
1886             Aggregate Aggregation No Any
1887             DefaultIfEmpty Conversion Yes Query
1888             ToArray Conversion No Array
1889             ToList Conversion No ArrayRef
1890             ToDictionary Conversion No HashRef
1891             ToLookup Conversion No HashRef
1892             ToLTSV Conversion No Boolean
1893             ForEach Utility No Void
1894              
1895             * Materializing operation (loads all data into memory)
1896             OrderedQuery = LTSV::LINQ::Ordered (subclass of LTSV::LINQ;
1897             all LTSV::LINQ methods available plus ThenBy* methods)
1898              
1899             =head2 Data Source Methods
1900              
1901             =over 4
1902              
1903             =item B
1904              
1905             Create a query from an array.
1906              
1907             my $query = LTSV::LINQ->From([{name => 'Alice'}, {name => 'Bob'}]);
1908              
1909             =item B
1910              
1911             Create a query from an LTSV file.
1912              
1913             my $query = LTSV::LINQ->FromLTSV("access.log");
1914              
1915             B C opens the file immediately and
1916             holds the file handle open until the iterator reaches end-of-file.
1917             If the query is not fully consumed (e.g. you call C or C
1918             and stop early), the file handle remains open until the query object
1919             is garbage collected.
1920              
1921             This is harmless for a small number of files, but if you open many
1922             LTSV files concurrently without consuming them fully, you may exhaust
1923             the OS file descriptor limit. In such cases, consume the query fully
1924             or use C to materialise the data and close the file
1925             immediately:
1926              
1927             # File closed as soon as all records are loaded
1928             my @records = LTSV::LINQ->FromLTSV("access.log")->ToArray();
1929              
1930             B On Perl 5.006 and later,
1931             each call to C uses a distinct lexical file handle, so
1932             multiple iterators may be open simultaneously without interference.
1933             On Perl 5.005_03, a unique numbered package glob is used per call
1934             (C, C, ...) to achieve
1935             the same safety. Using C or C with two C
1936             sources is therefore safe on all supported Perl versions.
1937              
1938             =item B
1939              
1940             Generate a sequence of integers.
1941              
1942             my $query = LTSV::LINQ->Range(1, 10); # 1, 2, ..., 10
1943              
1944             =item B
1945              
1946             Create an empty sequence.
1947              
1948             B Empty LTSV::LINQ query
1949              
1950             B
1951              
1952             my $empty = LTSV::LINQ->Empty();
1953             $empty->Count(); # 0
1954              
1955             # Conditional empty sequence
1956             my $result = $condition ? $query : LTSV::LINQ->Empty();
1957              
1958             B Equivalent to C but more explicit.
1959              
1960             =item B
1961              
1962             Repeat the same element a specified number of times.
1963              
1964             B
1965              
1966             =over 4
1967              
1968             =item * C<$element> - Element to repeat
1969              
1970             =item * C<$count> - Number of times to repeat
1971              
1972             =back
1973              
1974             B LTSV::LINQ query with repeated elements
1975              
1976             B
1977              
1978             # Repeat scalar
1979             LTSV::LINQ->Repeat('x', 5)->ToArray(); # ('x', 'x', 'x', 'x', 'x')
1980              
1981             # Repeat reference (same reference repeated)
1982             my $item = {id => 1};
1983             LTSV::LINQ->Repeat($item, 3)->ToArray(); # ($item, $item, $item)
1984              
1985             # Generate default values
1986             LTSV::LINQ->Repeat(0, 10)->ToArray(); # (0, 0, 0, ..., 0)
1987              
1988             B The element reference is repeated, not cloned.
1989              
1990             =back
1991              
1992             =head2 Filtering Methods
1993              
1994             =over 4
1995              
1996             =item B
1997              
1998             =item B value, ...)>
1999              
2000             Filter elements. Accepts either a code reference or DSL form.
2001              
2002             B
2003              
2004             ->Where(sub { $_[0]{status} == 200 })
2005             ->Where(sub { $_[0]{status} >= 400 && $_[0]{bytes} > 1000 })
2006              
2007             The code reference receives each element as C<$_[0]> and should return
2008             true to include the element, false to exclude it.
2009              
2010             B
2011              
2012             The DSL (Domain Specific Language) form provides a concise syntax for
2013             simple equality comparisons. All conditions are combined with AND logic.
2014              
2015             # Single condition
2016             ->Where(status => '200')
2017              
2018             # Multiple conditions (AND)
2019             ->Where(status => '200', method => 'GET')
2020              
2021             # Equivalent to:
2022             ->Where(sub {
2023             $_[0]{status} eq '200' && $_[0]{method} eq 'GET'
2024             })
2025              
2026             B
2027              
2028             =over 4
2029              
2030             =item * Arguments must be an even number of C value> pairs
2031              
2032             The DSL form interprets its arguments as a flat list of key-value pairs.
2033             Passing an odd number of arguments produces a Perl warning
2034             (C) and the unpaired key
2035             receives C as its value, which will never match. Always use
2036             complete pairs:
2037              
2038             ->Where(status => '200') # correct: 1 pair
2039             ->Where(status => '200', method => 'GET') # correct: 2 pairs
2040             ->Where(status => '200', 'method') # wrong: 3 args, Perl warning
2041              
2042             =item * All comparisons are string equality (C)
2043              
2044             =item * All conditions are combined with AND
2045              
2046             =item * Undefined values are treated as failures
2047              
2048             =item * For numeric or OR logic, use code reference form
2049              
2050             =back
2051              
2052             B
2053              
2054             # DSL: Simple and readable
2055             ->Where(status => '200')
2056             ->Where(user => 'alice', role => 'admin')
2057              
2058             # Code ref: Complex logic
2059             ->Where(sub { $_[0]{status} >= 400 && $_[0]{status} < 500 })
2060             ->Where(sub { $_[0]{user} eq 'alice' || $_[0]{user} eq 'bob' })
2061              
2062             =back
2063              
2064             =head2 Projection Methods
2065              
2066             =over 4
2067              
2068             =item B
2069              
2070             Transform each element using the provided selector function.
2071              
2072             The selector receives each element as C<$_[0]> and should return
2073             the transformed value.
2074              
2075             B
2076              
2077             =over 4
2078              
2079             =item * C<$selector> - Code reference that transforms each element
2080              
2081             =back
2082              
2083             B New query with transformed elements (lazy)
2084              
2085             B
2086              
2087             # Extract single field
2088             ->Select(sub { $_[0]{url} })
2089              
2090             # Transform to new structure
2091             ->Select(sub {
2092             {
2093             path => $_[0]{url},
2094             code => $_[0]{status}
2095             }
2096             })
2097              
2098             # Calculate derived values
2099             ->Select(sub { $_[0]{bytes} * 8 }) # bytes to bits
2100              
2101             B Select preserves one-to-one mapping. For one-to-many, use
2102             SelectMany.
2103              
2104             =item B
2105              
2106             Flatten nested sequences into a single sequence.
2107              
2108             The selector should return an array reference. All arrays are flattened
2109             into a single sequence.
2110              
2111             B
2112              
2113             =over 4
2114              
2115             =item * C<$selector> - Code reference returning array reference
2116              
2117             =back
2118              
2119             B New query with flattened elements (lazy)
2120              
2121             B
2122              
2123             # Flatten array of arrays
2124             my @nested = ([1, 2], [3, 4], [5]);
2125             LTSV::LINQ->From(\@nested)
2126             ->SelectMany(sub { $_[0] })
2127             ->ToArray(); # (1, 2, 3, 4, 5)
2128              
2129             # Expand related records
2130             ->SelectMany(sub {
2131             my $user = shift;
2132             return [ map {
2133             { user => $user->{name}, role => $_ }
2134             } @{$user->{roles}} ];
2135             })
2136              
2137             B
2138              
2139             =over 4
2140              
2141             =item * Flattening nested arrays
2142              
2143             =item * Expanding one-to-many relationships
2144              
2145             =item * Generating multiple outputs per input
2146              
2147             =back
2148              
2149             B The selector B return an ARRAY reference. If it returns
2150             any other value (e.g. a hashref or scalar), this method throws an exception:
2151              
2152             die "SelectMany: selector must return an ARRAY reference"
2153              
2154             This matches the behaviour of .NET LINQ's C, which requires
2155             the selector to return an C. Always wrap results in C<[...]>:
2156              
2157             ->SelectMany(sub { [ $_[0]{items} ] }) # correct: arrayref
2158             ->SelectMany(sub { $_[0]{items} }) # wrong: dies at runtime
2159              
2160             =back
2161              
2162             =head2 Concatenation Methods
2163              
2164             =over 4
2165              
2166             =item B
2167              
2168             Concatenate two sequences into one.
2169              
2170             B
2171              
2172             =over 4
2173              
2174             =item * C<$second> - Second sequence (LTSV::LINQ object)
2175              
2176             =back
2177              
2178             B New query with both sequences concatenated (lazy)
2179              
2180             B
2181              
2182             # Combine two data sources
2183             my $q1 = LTSV::LINQ->From([1, 2, 3]);
2184             my $q2 = LTSV::LINQ->From([4, 5, 6]);
2185             $q1->Concat($q2)->ToArray(); # (1, 2, 3, 4, 5, 6)
2186              
2187             # Merge LTSV files
2188             LTSV::LINQ->FromLTSV("jan.log")
2189             ->Concat(LTSV::LINQ->FromLTSV("feb.log"))
2190             ->Where(status => '500')
2191              
2192             B This operation is lazy - sequences are read on-demand.
2193              
2194             =item B
2195              
2196             Combine two sequences element-wise using a result selector function.
2197              
2198             B
2199              
2200             =over 4
2201              
2202             =item * C<$second> - Second sequence (LTSV::LINQ object)
2203              
2204             =item * C<$result_selector> - Function to combine elements: ($first, $second) -> $result
2205              
2206             =back
2207              
2208             B New query with combined elements (lazy)
2209              
2210             B
2211              
2212             # Combine numbers
2213             my $numbers = LTSV::LINQ->From([1, 2, 3]);
2214             my $letters = LTSV::LINQ->From(['a', 'b', 'c']);
2215             $numbers->Zip($letters, sub {
2216             my($num, $letter) = @_;
2217             return "$num-$letter";
2218             })->ToArray(); # ('1-a', '2-b', '3-c')
2219              
2220             # Create key-value pairs
2221             my $keys = LTSV::LINQ->From(['name', 'age', 'city']);
2222             my $values = LTSV::LINQ->From(['Alice', 30, 'NYC']);
2223             $keys->Zip($values, sub {
2224             return {$_[0] => $_[1]};
2225             })->ToArray();
2226              
2227             # Stops at shorter sequence
2228             LTSV::LINQ->From([1, 2, 3, 4])
2229             ->Zip(LTSV::LINQ->From(['a', 'b']), sub { [$_[0], $_[1]] })
2230             ->ToArray(); # ([1, 'a'], [2, 'b'])
2231              
2232             B Iteration stops when either sequence ends.
2233              
2234             =back
2235              
2236             =head2 Partitioning Methods
2237              
2238             =over 4
2239              
2240             =item B
2241              
2242             Take the first N elements from the sequence.
2243              
2244             B
2245              
2246             =over 4
2247              
2248             =item * C<$count> - Number of elements to take (integer >= 0)
2249              
2250             =back
2251              
2252             B New query limited to first N elements (lazy)
2253              
2254             B
2255              
2256             # Top 10 results
2257             ->OrderByDescending(sub { $_[0]{score} })
2258             ->Take(10)
2259              
2260             # First record only
2261             ->Take(1)->ToArray()
2262              
2263             # Limit large file processing
2264             LTSV::LINQ->FromLTSV("huge.log")->Take(1000)
2265              
2266             B Take(0) returns empty sequence. Negative values treated as 0.
2267              
2268             =item B
2269              
2270             Skip the first N elements, return the rest.
2271              
2272             B
2273              
2274             =over 4
2275              
2276             =item * C<$count> - Number of elements to skip (integer >= 0)
2277              
2278             =back
2279              
2280             B New query skipping first N elements (lazy)
2281              
2282             B
2283              
2284             # Skip header row
2285             ->Skip(1)
2286              
2287             # Pagination: page 3, size 20
2288             ->Skip(40)->Take(20)
2289              
2290             # Skip first batch
2291             ->Skip(1000)->ForEach(sub { ... })
2292              
2293             B
2294              
2295             =over 4
2296              
2297             =item * Pagination
2298              
2299             =item * Skipping header rows
2300              
2301             =item * Processing in batches
2302              
2303             =back
2304              
2305             =item B
2306              
2307             Take elements while the predicate is true. Stops at first false.
2308              
2309             B
2310              
2311             =over 4
2312              
2313             =item * C<$predicate> - Code reference returning boolean
2314              
2315             =back
2316              
2317             B New query taking elements while predicate holds (lazy)
2318              
2319             B
2320              
2321             # Take while value is small
2322             ->TakeWhile(sub { $_[0]{count} < 100 })
2323              
2324             # Take while timestamp is in range
2325             ->TakeWhile(sub { $_[0]{time} lt '2026-02-01' })
2326              
2327             # Process until error
2328             ->TakeWhile(sub { $_[0]{status} < 400 })
2329              
2330             B TakeWhile stops immediately when predicate returns false.
2331             It does NOT filter - it terminates the sequence.
2332              
2333             # Different from Where:
2334             ->TakeWhile(sub { $_[0] < 5 }) # 1,2,3,4 then STOP
2335             ->Where(sub { $_[0] < 5 }) # 1,2,3,4 (checks all)
2336              
2337             =item B
2338              
2339             Skip elements while the predicate is true. Returns rest after first false.
2340              
2341             B
2342              
2343             =over 4
2344              
2345             =item * C<$predicate> - Code reference returning boolean
2346              
2347             =back
2348              
2349             B New query skipping initial elements (lazy)
2350              
2351             B
2352              
2353             # Skip header lines
2354             ->SkipWhile(sub { $_[0]{line} =~ /^#/ })
2355              
2356             # Skip while value is small
2357             ->SkipWhile(sub { $_[0]{count} < 100 })
2358              
2359             # Process after certain timestamp
2360             ->SkipWhile(sub { $_[0]{time} lt '2026-02-01' })
2361              
2362             B SkipWhile only skips initial elements. Once predicate is
2363             false, all remaining elements are included.
2364              
2365             [1,2,3,4,5,2,1]->SkipWhile(sub { $_[0] < 4 }) # (4,5,2,1)
2366              
2367             =back
2368              
2369             =head2 Ordering Methods
2370              
2371             B C and C use a Schwartzian-Transform
2372             decorated-array technique that appends the original element index as a
2373             final tie-breaker. This guarantees completely stable multi-key sorting on
2374             B, where built-in C stability
2375             is not guaranteed.
2376              
2377             B LTSV::LINQ provides three families:
2378              
2379             =over 4
2380              
2381             =item * C / C / C / C
2382              
2383             Smart comparison: numeric (C=E>) when both keys look numeric,
2384             string (C) otherwise. Convenient for LTSV data where field values
2385             are always strings but commonly hold numbers.
2386              
2387             =item * C / C / C / C
2388              
2389             Unconditional string comparison (C). Use when keys must sort
2390             lexicographically regardless of content (e.g. version strings, codes).
2391              
2392             =item * C / C / C / C
2393              
2394             Unconditional numeric comparison (C=E>). Use when keys are
2395             always numeric. Undefined or empty values are treated as C<0>.
2396              
2397             =back
2398              
2399             B C methods return a C
2400             object (a subclass of C). This mirrors the way .NET LINQ's
2401             C returns C>, which exposes C and
2402             C. All C methods (C, C
2403             C, etc.) are available on the returned object through inheritance.
2404             C methods are B available on C objects,
2405             not on plain C objects.
2406              
2407             B C always returns a B C
2408             object; the original is unchanged. Branching sort chains work correctly:
2409              
2410             my $by_dept = LTSV::LINQ->From(\@data)->OrderBy(sub { $_[0]{dept} });
2411             my $asc = $by_dept->ThenBy(sub { $_[0]{name} });
2412             my $desc = $by_dept->ThenByNum(sub { $_[0]{salary} });
2413             # $asc and $desc are completely independent queries
2414              
2415             =over 4
2416              
2417             =item B
2418              
2419             Sort in ascending order using smart comparison: if both keys look like
2420             numbers (integers, decimals, negative, or exponential notation), numeric
2421             comparison (C=E>) is used; otherwise string comparison (C)
2422             is used. Returns a C object.
2423              
2424             ->OrderBy(sub { $_[0]{timestamp} }) # string keys: lexicographic
2425             ->OrderBy(sub { $_[0]{bytes} }) # "1024", "256" -> numeric (256, 1024)
2426              
2427             B When you need explicit control over the comparison type, use
2428             C (always C) or C (always C=E>).
2429              
2430             =item B
2431              
2432             Sort in descending order using the same smart comparison as C.
2433             Returns a C object.
2434              
2435             ->OrderByDescending(sub { $_[0]{count} })
2436              
2437             =item B
2438              
2439             Sort in ascending order using string comparison (C) unconditionally.
2440             Returns a C object.
2441              
2442             ->OrderByStr(sub { $_[0]{code} }) # "10" lt "9" (lexicographic)
2443              
2444             =item B
2445              
2446             Sort in descending order using string comparison (C) unconditionally.
2447             Returns a C object.
2448              
2449             ->OrderByStrDescending(sub { $_[0]{name} })
2450              
2451             =item B
2452              
2453             Sort in ascending order using numeric comparison (C=E>)
2454             unconditionally. Returns a C object.
2455              
2456             ->OrderByNum(sub { $_[0]{bytes} }) # 9 < 10 (numeric)
2457              
2458             B Undefined or empty values are treated as C<0>.
2459              
2460             =item B
2461              
2462             Sort in descending order using numeric comparison (C=E>)
2463             unconditionally. Returns a C object.
2464              
2465             ->OrderByNumDescending(sub { $_[0]{response_time} })
2466              
2467             =item B
2468              
2469             Reverse the order.
2470              
2471             ->Reverse()
2472              
2473             =item B
2474              
2475             Add an ascending secondary sort key using smart comparison. Must be
2476             called on a C object (i.e., after C).
2477             Returns a new C object; the original is unchanged.
2478              
2479             ->OrderBy(sub { $_[0]{dept} })->ThenBy(sub { $_[0]{name} })
2480              
2481             =item B
2482              
2483             Add a descending secondary sort key using smart comparison.
2484              
2485             ->OrderBy(sub { $_[0]{dept} })->ThenByDescending(sub { $_[0]{salary} })
2486              
2487             =item B
2488              
2489             Add an ascending secondary sort key using string comparison (C).
2490              
2491             ->OrderByStr(sub { $_[0]{dept} })->ThenByStr(sub { $_[0]{code} })
2492              
2493             =item B
2494              
2495             Add a descending secondary sort key using string comparison (C).
2496              
2497             ->OrderByStr(sub { $_[0]{dept} })->ThenByStrDescending(sub { $_[0]{name} })
2498              
2499             =item B
2500              
2501             Add an ascending secondary sort key using numeric comparison (C=E>).
2502              
2503             ->OrderByStr(sub { $_[0]{dept} })->ThenByNum(sub { $_[0]{salary} })
2504              
2505             =item B
2506              
2507             Add a descending secondary sort key using numeric comparison (C=E>).
2508             Undefined or empty values are treated as C<0>.
2509              
2510             ->OrderByStr(sub { $_[0]{host} })->ThenByNumDescending(sub { $_[0]{bytes} })
2511              
2512             =back
2513              
2514             =head2 Grouping Methods
2515              
2516             =over 4
2517              
2518             =item B
2519              
2520             Group elements by key.
2521              
2522             B New query where each element is a hashref with two fields:
2523              
2524             =over 4
2525              
2526             =item * C - The group key (string)
2527              
2528             =item * C - Array reference of elements in the group
2529              
2530             =back
2531              
2532             B This operation is eager - the entire sequence is loaded into memory
2533             immediately. Groups are returned in the order their keys first appear in
2534             the source sequence, matching the behaviour of .NET LINQ's C.
2535              
2536             B
2537              
2538             # Group access log by status code
2539             my @groups = LTSV::LINQ->FromLTSV('access.log')
2540             ->GroupBy(sub { $_[0]{status} })
2541             ->ToArray();
2542              
2543             for my $g (@groups) {
2544             printf "status=%s count=%d\n", $g->{Key}, scalar @{$g->{Elements}};
2545             }
2546              
2547             # With element selector
2548             ->GroupBy(sub { $_[0]{status} }, sub { $_[0]{path} })
2549              
2550             B C is a plain array reference, not a LTSV::LINQ object.
2551             To apply further LINQ operations on a group, wrap it with C:
2552              
2553             for my $g (@groups) {
2554             my $total = LTSV::LINQ->From($g->{Elements})
2555             ->Sum(sub { $_[0]{bytes} });
2556             printf "status=%s total_bytes=%d\n", $g->{Key}, $total;
2557             }
2558              
2559             =back
2560              
2561             =head2 Set Operations
2562              
2563             B
2564              
2565             =over 4
2566              
2567             =item * C is fully lazy: elements are tested one by one as the
2568             output sequence is consumed.
2569              
2570             =item * C, C, C are B: when
2571             the method is called, the B sequence is consumed in full and
2572             stored in an in-memory hash for O(1) lookup. The B sequence is
2573             then iterated lazily. This matches the behaviour of .NET LINQ, which
2574             also buffers the second (hash-side) sequence up front.
2575              
2576             =back
2577              
2578             =over 4
2579              
2580             =item B
2581              
2582             Remove duplicate elements.
2583              
2584             B
2585              
2586             =over 4
2587              
2588             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2589             Extracts a comparison key from each element. This is a single-argument
2590             function (unlike Perl's C comparator), and is I a two-argument
2591             comparison function.
2592              
2593             =back
2594              
2595             ->Distinct()
2596             ->Distinct(sub { lc($_[0]) }) # case-insensitive strings
2597             ->Distinct(sub { $_[0]{id} }) # hashref: dedupe by field
2598              
2599             =item B
2600              
2601             Produce set union of two sequences (no duplicates).
2602              
2603             B
2604              
2605             =over 4
2606              
2607             =item * C<$second> - Second sequence (LTSV::LINQ object)
2608              
2609             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2610             Single-argument key extraction function (not a two-argument sort comparator).
2611              
2612             =back
2613              
2614             B New query with elements from both sequences (distinct)
2615              
2616             B B The first sequence is iterated lazily;
2617             the second is fully consumed at call time and stored in memory.
2618              
2619             B
2620              
2621             # Simple union
2622             my $q1 = LTSV::LINQ->From([1, 2, 3]);
2623             my $q2 = LTSV::LINQ->From([3, 4, 5]);
2624             $q1->Union($q2)->ToArray(); # (1, 2, 3, 4, 5)
2625              
2626             # Case-insensitive union
2627             ->Union($other, sub { lc($_[0]) })
2628              
2629             B Equivalent to Concat()->Distinct(). Automatically removes duplicates.
2630              
2631             =item B
2632              
2633             Produce set intersection of two sequences.
2634              
2635             B
2636              
2637             =over 4
2638              
2639             =item * C<$second> - Second sequence (LTSV::LINQ object)
2640              
2641             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2642             Single-argument key extraction function (not a two-argument sort comparator).
2643              
2644             =back
2645              
2646             B New query with common elements only (distinct)
2647              
2648             B B The second sequence is fully consumed
2649             at call time and stored in a hash; the first is iterated lazily.
2650              
2651             B
2652              
2653             # Common elements
2654             LTSV::LINQ->From([1, 2, 3])
2655             ->Intersect(LTSV::LINQ->From([2, 3, 4]))
2656             ->ToArray(); # (2, 3)
2657              
2658             # Find users in both lists
2659             $users1->Intersect($users2, sub { $_[0]{id} })
2660              
2661             B Only includes elements present in both sequences.
2662              
2663             =item B
2664              
2665             Produce set difference (elements in first but not in second).
2666              
2667             B
2668              
2669             =over 4
2670              
2671             =item * C<$second> - Second sequence (LTSV::LINQ object)
2672              
2673             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2674             Single-argument key extraction function (not a two-argument sort comparator).
2675              
2676             =back
2677              
2678             B New query with elements only in first sequence (distinct)
2679              
2680             B B The second sequence is fully consumed
2681             at call time and stored in a hash; the first is iterated lazily.
2682              
2683             B
2684              
2685             # Set difference
2686             LTSV::LINQ->From([1, 2, 3])
2687             ->Except(LTSV::LINQ->From([2, 3, 4]))
2688             ->ToArray(); # (1)
2689              
2690             # Find users in first list but not second
2691             $all_users->Except($inactive_users, sub { $_[0]{id} })
2692              
2693             B Returns elements from first sequence not present in second.
2694              
2695             =back
2696              
2697             =head2 Join Operations
2698              
2699             B Both C and C are B:
2700             when the method is called, the B sequence is consumed in full and
2701             stored in an in-memory lookup table (hash of arrays, keyed by inner key).
2702             The B sequence is then iterated lazily, producing results on demand.
2703              
2704             This matches the behaviour of .NET LINQ's hash-join implementation.
2705             The memory cost is O(inner size); for very large inner sequences, consider
2706             reversing the join or pre-filtering the inner sequence before passing it.
2707              
2708             =over 4
2709              
2710             =item B
2711              
2712             Correlate elements of two sequences based on matching keys (inner join).
2713              
2714             B
2715              
2716             =over 4
2717              
2718             =item * C<$inner> - Inner sequence (LTSV::LINQ object)
2719              
2720             =item * C<$outer_key_selector> - Function to extract key from outer element
2721              
2722             =item * C<$inner_key_selector> - Function to extract key from inner element
2723              
2724             =item * C<$result_selector> - Function to create result: ($outer_item, $inner_item) -> $result
2725              
2726             =back
2727              
2728             B Query with joined results
2729              
2730             B
2731              
2732             # Join users with their orders
2733             my $users = LTSV::LINQ->From([
2734             {id => 1, name => 'Alice'},
2735             {id => 2, name => 'Bob'}
2736             ]);
2737              
2738             my $orders = LTSV::LINQ->From([
2739             {user_id => 1, product => 'Book'},
2740             {user_id => 1, product => 'Pen'},
2741             {user_id => 2, product => 'Notebook'}
2742             ]);
2743              
2744             $users->Join(
2745             $orders,
2746             sub { $_[0]{id} }, # outer key
2747             sub { $_[0]{user_id} }, # inner key
2748             sub {
2749             my($user, $order) = @_;
2750             return {
2751             name => $user->{name},
2752             product => $order->{product}
2753             };
2754             }
2755             )->ToArray();
2756             # [{name => 'Alice', product => 'Book'},
2757             # {name => 'Alice', product => 'Pen'},
2758             # {name => 'Bob', product => 'Notebook'}]
2759              
2760             # Join LTSV files by request ID
2761             LTSV::LINQ->FromLTSV('access.log')->Join(
2762             LTSV::LINQ->FromLTSV('error.log'),
2763             sub { $_[0]{request_id} },
2764             sub { $_[0]{request_id} },
2765             sub {
2766             my($access, $error) = @_;
2767             return {
2768             url => $access->{url},
2769             error => $error->{message}
2770             };
2771             }
2772             )
2773              
2774             B This is an inner join - only matching elements are returned.
2775             The inner sequence is fully loaded into memory.
2776              
2777             =item B
2778              
2779             Correlates elements of two sequences with group join (LEFT OUTER JOIN-like).
2780             Each outer element is matched with a group of inner elements (possibly empty).
2781              
2782             B
2783              
2784             =over 4
2785              
2786             =item * C<$inner> - Inner sequence (LTSV::LINQ object)
2787              
2788             =item * C<$outer_key_selector> - Function to extract key from outer element
2789              
2790             =item * C<$inner_key_selector> - Function to extract key from inner element
2791              
2792             =item * C<$result_selector> - Function: ($outer_item, $inner_group) -> $result.
2793             The C<$inner_group> is a LTSV::LINQ object containing matched inner elements
2794             (empty sequence if no matches).
2795              
2796             =back
2797              
2798             B New query with one result per outer element (lazy)
2799              
2800             B
2801              
2802             # Order count per user (including users with no orders)
2803             my $users = LTSV::LINQ->From([
2804             {id => 1, name => 'Alice'},
2805             {id => 2, name => 'Bob'},
2806             {id => 3, name => 'Carol'}
2807             ]);
2808              
2809             my $orders = LTSV::LINQ->From([
2810             {user_id => 1, product => 'Book', amount => 10},
2811             {user_id => 1, product => 'Pen', amount => 5},
2812             {user_id => 2, product => 'Notebook', amount => 15}
2813             ]);
2814              
2815             $users->GroupJoin(
2816             $orders,
2817             sub { $_[0]{id} },
2818             sub { $_[0]{user_id} },
2819             sub {
2820             my($user, $orders) = @_;
2821             return {
2822             name => $user->{name},
2823             count => $orders->Count(),
2824             total => $orders->Sum(sub { $_[0]{amount} })
2825             };
2826             }
2827             )->ToArray();
2828             # [
2829             # {name => 'Alice', count => 2, total => 15},
2830             # {name => 'Bob', count => 1, total => 15},
2831             # {name => 'Carol', count => 0, total => 0}, # no orders
2832             # ]
2833              
2834             # Flat list with no-match rows included (LEFT OUTER JOIN, cf. Join for inner join)
2835             $users->GroupJoin(
2836             $orders,
2837             sub { $_[0]{id} },
2838             sub { $_[0]{user_id} },
2839             sub {
2840             my($user, $user_orders) = @_;
2841             my @rows = $user_orders->ToArray();
2842             return @rows
2843             ? [ map { {name => $user->{name}, product => $_->{product}} } @rows ]
2844             : [ {name => $user->{name}, product => 'none'} ];
2845             }
2846             )->SelectMany(sub { $_[0] }) # Flatten the array references
2847             ->ToArray();
2848              
2849             B Unlike Join, every outer element appears in the result even when
2850             there are no matching inner elements (LEFT OUTER JOIN semantics).
2851             The inner sequence is fully loaded into memory.
2852              
2853             B The C<$inner_group> LTSV::LINQ object is highly flexible.
2854             It is specifically designed to be iterated multiple times within the
2855             result selector (e.g., calling C followed by C) because
2856             it generates a fresh iterator for every terminal operation.
2857              
2858             =back
2859              
2860             =head2 Quantifier Methods
2861              
2862             =over 4
2863              
2864             =item B
2865              
2866             Test if all elements satisfy condition.
2867              
2868             ->All(sub { $_[0]{status} == 200 })
2869              
2870             =item B
2871              
2872             Test if any element satisfies condition.
2873              
2874             ->Any(sub { $_[0]{status} >= 400 })
2875             ->Any() # Test if sequence is non-empty
2876              
2877             =item B
2878              
2879             Check if sequence contains specified element.
2880              
2881             B
2882              
2883             =over 4
2884              
2885             =item * C<$value> - Value to search for
2886              
2887             =item * C<$comparer> - (Optional) Custom comparison function
2888              
2889             =back
2890              
2891             B Boolean (1 or 0)
2892              
2893             B
2894              
2895             # Simple search
2896             ->Contains(5) # 1 if found, 0 otherwise
2897              
2898             # Case-insensitive search
2899             ->Contains('foo', sub { lc($_[0]) eq lc($_[1]) })
2900              
2901             # Check for undef
2902             ->Contains(undef)
2903              
2904             =item B
2905              
2906             Determine if two sequences are equal (same elements in same order).
2907              
2908             B
2909              
2910             =over 4
2911              
2912             =item * C<$second> - Second sequence (LTSV::LINQ object)
2913              
2914             =item * C<$comparer> - (Optional) Comparison function ($a, $b) -> boolean
2915              
2916             =back
2917              
2918             B Boolean (1 if equal, 0 otherwise)
2919              
2920             B
2921              
2922             # Same sequences
2923             LTSV::LINQ->From([1, 2, 3])
2924             ->SequenceEqual(LTSV::LINQ->From([1, 2, 3])) # 1 (true)
2925              
2926             # Different elements
2927             LTSV::LINQ->From([1, 2, 3])
2928             ->SequenceEqual(LTSV::LINQ->From([1, 2, 4])) # 0 (false)
2929              
2930             # Different lengths
2931             LTSV::LINQ->From([1, 2])
2932             ->SequenceEqual(LTSV::LINQ->From([1, 2, 3])) # 0 (false)
2933              
2934             # Case-insensitive comparison
2935             $seq1->SequenceEqual($seq2, sub { lc($_[0]) eq lc($_[1]) })
2936              
2937             B Order matters. Both content AND order must match.
2938              
2939             =back
2940              
2941             =head2 Element Access Methods
2942              
2943             =over 4
2944              
2945             =item B
2946              
2947             Get first element. Dies if empty.
2948              
2949             ->First()
2950             ->First(sub { $_[0]{status} == 404 })
2951              
2952             =item B
2953              
2954             Get first element or default value.
2955              
2956             ->FirstOrDefault(undef, {})
2957              
2958             =item B
2959              
2960             Get last element. Dies if empty.
2961              
2962             ->Last()
2963              
2964             =item B
2965              
2966             Get last element or default value. Never throws exceptions.
2967              
2968             B
2969              
2970             =over 4
2971              
2972             =item * C<$predicate> - (Optional) Condition
2973              
2974             =item * C<$default> - (Optional) Value to return when no element is found.
2975             Defaults to C when omitted.
2976              
2977             =back
2978              
2979             B Last element or C<$default>
2980              
2981             B
2982              
2983             # Get last element (undef if empty)
2984             ->LastOrDefault()
2985              
2986             # Specify a default value
2987             LTSV::LINQ->From([])->LastOrDefault(undef, 0) # 0
2988              
2989             # With predicate and default
2990             ->LastOrDefault(sub { $_[0] % 2 == 0 }, -1) # Last even, or -1
2991              
2992             =item B
2993              
2994             Get the only element. Dies if sequence has zero or more than one element.
2995              
2996             B
2997              
2998             =over 4
2999              
3000             =item * C<$predicate> - (Optional) Condition
3001              
3002             =back
3003              
3004             B Single element
3005              
3006             B
3007             - Dies with "Sequence contains no elements" if empty
3008             - Dies with "Sequence contains more than one element" if multiple elements
3009              
3010             B<.NET LINQ Compatibility:> Exception messages match .NET LINQ behavior exactly.
3011              
3012             B Uses lazy evaluation. Stops iterating immediately when
3013             second element is found (does not load entire sequence).
3014              
3015             B
3016              
3017             # Exactly one element
3018             LTSV::LINQ->From([5])->Single() # 5
3019              
3020             # With predicate
3021             ->Single(sub { $_[0] > 10 })
3022              
3023             # Memory-efficient: stops at 2nd element
3024             LTSV::LINQ->FromLTSV("huge.log")->Single(sub { $_[0]{id} eq '999' })
3025              
3026             =item B
3027              
3028             Get the only element, or undef if zero or multiple elements.
3029              
3030             B Single element or undef (if 0 or 2+ elements)
3031              
3032             B<.NET LINQ Compatibility:> B .NET's C throws
3033             C when the sequence contains more than one
3034             element. LTSV::LINQ returns C in that case instead of throwing,
3035             which makes it more convenient for Perl code that checks return values.
3036             If you require the strict .NET behaviour (exception on multiple elements),
3037             use C wrapped in C.
3038              
3039             B Uses lazy evaluation. Memory-efficient.
3040              
3041             B
3042              
3043             LTSV::LINQ->From([5])->SingleOrDefault() # 5
3044             LTSV::LINQ->From([])->SingleOrDefault() # undef (empty)
3045             LTSV::LINQ->From([1,2])->SingleOrDefault() # undef (multiple)
3046              
3047             =item B
3048              
3049             Get element at specified index. Dies if out of range.
3050              
3051             B
3052              
3053             =over 4
3054              
3055             =item * C<$index> - Zero-based index
3056              
3057             =back
3058              
3059             B Element at index
3060              
3061             B Dies if index is negative or out of range
3062              
3063             B Uses lazy evaluation (iterator-based). Does NOT load
3064             entire sequence into memory. Stops iterating once target index is reached.
3065              
3066             B
3067              
3068             ->ElementAt(0) # First element
3069             ->ElementAt(2) # Third element
3070              
3071             # Memory-efficient for large files
3072             LTSV::LINQ->FromLTSV("huge.log")->ElementAt(10) # Reads only 11 lines
3073              
3074             =item B
3075              
3076             Get element at index, or undef if out of range.
3077              
3078             B Element or undef
3079              
3080             B Uses lazy evaluation (iterator-based). Memory-efficient.
3081              
3082             B
3083              
3084             ->ElementAtOrDefault(0) # First element
3085             ->ElementAtOrDefault(99) # undef if out of range
3086              
3087             =back
3088              
3089             =head2 Aggregation Methods
3090              
3091             All aggregation methods are B - they consume the
3092             entire sequence and return a scalar value.
3093              
3094             =over 4
3095              
3096             =item B
3097              
3098             Count the number of elements.
3099              
3100             B
3101              
3102             =over 4
3103              
3104             =item * C<$predicate> - (Optional) Code reference to filter elements
3105              
3106             =back
3107              
3108             B Integer count
3109              
3110             B
3111              
3112             # Count all
3113             ->Count() # 1000
3114              
3115             # Count with condition
3116             ->Count(sub { $_[0]{status} >= 400 }) # 42
3117              
3118             # Equivalent to
3119             ->Where(sub { $_[0]{status} >= 400 })->Count()
3120              
3121             B O(n) - must iterate entire sequence
3122              
3123             =item B
3124              
3125             Calculate sum of numeric values.
3126              
3127             B
3128              
3129             =over 4
3130              
3131             =item * C<$selector> - (Optional) Code reference to extract value.
3132             Default: identity function
3133              
3134             =back
3135              
3136             B Numeric sum
3137              
3138             B
3139              
3140             # Sum of values
3141             LTSV::LINQ->From([1, 2, 3, 4, 5])->Sum() # 15
3142              
3143             # Sum of field
3144             ->Sum(sub { $_[0]{bytes} })
3145              
3146             # Sum with transformation
3147             ->Sum(sub { $_[0]{price} * $_[0]{quantity} })
3148              
3149             B Non-numeric values may produce warnings. Use numeric context.
3150              
3151             B Returns C<0>.
3152              
3153             =item B
3154              
3155             Find minimum value.
3156              
3157             B
3158              
3159             =over 4
3160              
3161             =item * C<$selector> - (Optional) Code reference to extract value
3162              
3163             =back
3164              
3165             B Minimum value, or C if sequence is empty.
3166              
3167             B
3168              
3169             # Minimum of values
3170             ->Min()
3171              
3172             # Minimum of field
3173             ->Min(sub { $_[0]{response_time} })
3174              
3175             # Oldest timestamp
3176             ->Min(sub { $_[0]{timestamp} })
3177              
3178             =item B
3179              
3180             Find maximum value.
3181              
3182             B
3183              
3184             =over 4
3185              
3186             =item * C<$selector> - (Optional) Code reference to extract value
3187              
3188             =back
3189              
3190             B Maximum value, or C if sequence is empty.
3191              
3192             B
3193              
3194             # Maximum of values
3195             ->Max()
3196              
3197             # Maximum of field
3198             ->Max(sub { $_[0]{bytes} })
3199              
3200             # Latest timestamp
3201             ->Max(sub { $_[0]{timestamp} })
3202              
3203             =item B
3204              
3205             Calculate arithmetic mean.
3206              
3207             B
3208              
3209             =over 4
3210              
3211             =item * C<$selector> - (Optional) Code reference to extract value
3212              
3213             =back
3214              
3215             B Numeric average (floating point)
3216              
3217             B
3218              
3219             # Average of values
3220             LTSV::LINQ->From([1, 2, 3, 4, 5])->Average() # 3
3221              
3222             # Average of field
3223             ->Average(sub { $_[0]{bytes} })
3224              
3225             # Average response time
3226             ->Average(sub { $_[0]{response_time} })
3227              
3228             B Dies with "Sequence contains no elements".
3229             Unlike C (returns 0) and C/C (return C), C
3230             throws on an empty sequence. Use C to avoid the exception.
3231              
3232             B Returns floating point. Use C for integer result.
3233              
3234             =item B
3235              
3236             Calculate arithmetic mean, or return undef if sequence is empty.
3237              
3238             B
3239              
3240             =over 4
3241              
3242             =item * C<$selector> - (Optional) Code reference to extract value
3243              
3244             =back
3245              
3246             B Numeric average (floating point), or undef if empty
3247              
3248             B
3249              
3250             # Safe average - returns undef for empty sequence
3251             my @empty = ();
3252             my $avg = LTSV::LINQ->From(\@empty)->AverageOrDefault(); # undef
3253              
3254             # With data
3255             LTSV::LINQ->From([1, 2, 3])->AverageOrDefault(); # 2
3256              
3257             # With selector
3258             ->AverageOrDefault(sub { $_[0]{value} })
3259              
3260             B Unlike Average(), this method never throws an exception.
3261              
3262             =item B
3263              
3264             Apply an accumulator function over a sequence.
3265              
3266             B
3267              
3268             =over 4
3269              
3270             =item * C - Use first element as seed
3271              
3272             =item * C - Explicit seed value
3273              
3274             =item * C - Transform result
3275              
3276             =back
3277              
3278             B
3279              
3280             =over 4
3281              
3282             =item * C<$seed> - Initial accumulator value (optional for first signature)
3283              
3284             =item * C<$func> - Code reference: ($accumulator, $element) -> $new_accumulator
3285              
3286             =item * C<$result_selector> - (Optional) Transform final result
3287              
3288             =back
3289              
3290             B Accumulated value
3291              
3292             B
3293              
3294             # Sum (without seed)
3295             LTSV::LINQ->From([1,2,3,4])->Aggregate(sub { $_[0] + $_[1] }) # 10
3296              
3297             # Product (with seed)
3298             LTSV::LINQ->From([2,3,4])->Aggregate(1, sub { $_[0] * $_[1] }) # 24
3299              
3300             # Concatenate strings
3301             LTSV::LINQ->From(['a','b','c'])
3302             ->Aggregate('', sub { $_[0] ? "$_[0],$_[1]" : $_[1] }) # 'a,b,c'
3303              
3304             # With result selector
3305             LTSV::LINQ->From([1,2,3])
3306             ->Aggregate(0,
3307             sub { $_[0] + $_[1] }, # accumulate
3308             sub { "Sum: $_[0]" }) # transform result
3309             # "Sum: 6"
3310              
3311             # Build complex structure
3312             ->Aggregate([], sub {
3313             my($list, $item) = @_;
3314             push @$list, uc($item);
3315             return $list;
3316             })
3317              
3318             B<.NET LINQ Compatibility:> Supports all three .NET signatures.
3319              
3320             =back
3321              
3322             =head2 Conversion Methods
3323              
3324             =over 4
3325              
3326             =item B
3327              
3328             Convert to array.
3329              
3330             my @array = $query->ToArray();
3331              
3332             =item B
3333              
3334             Convert to array reference.
3335              
3336             my $arrayref = $query->ToList();
3337              
3338             =item B
3339              
3340             Convert sequence to hash reference with unique keys.
3341              
3342             B
3343              
3344             =over 4
3345              
3346             =item * C<$key_selector> - Function to extract key from element
3347              
3348             =item * C<$value_selector> - (Optional) Function to extract value, defaults to element itself
3349              
3350             =back
3351              
3352             B Hash reference
3353              
3354             B
3355              
3356             # ID to name mapping
3357             my $users = LTSV::LINQ->From([
3358             {id => 1, name => 'Alice'},
3359             {id => 2, name => 'Bob'}
3360             ]);
3361              
3362             my $dict = $users->ToDictionary(
3363             sub { $_[0]{id} },
3364             sub { $_[0]{name} }
3365             );
3366             # {1 => 'Alice', 2 => 'Bob'}
3367              
3368             # Without value selector (stores entire element)
3369             my $dict = $users->ToDictionary(sub { $_[0]{id} });
3370             # {1 => {id => 1, name => 'Alice'}, 2 => {id => 2, name => 'Bob'}}
3371              
3372             # Quick lookup table
3373             my $status_codes = LTSV::LINQ->FromLTSV('access.log')
3374             ->Select(sub { $_[0]{status} })
3375             ->Distinct()
3376             ->ToDictionary(sub { $_ }, sub { 1 });
3377              
3378             B If duplicate keys exist, later values overwrite earlier ones.
3379              
3380             B<.NET LINQ Compatibility:> .NET's C throws C
3381             on duplicate keys. This module silently overwrites with the later value,
3382             following Perl hash semantics. Use C if you need to preserve all
3383             values for each key.
3384              
3385             =item B
3386              
3387             Convert sequence to hash reference with grouped values (multi-value dictionary).
3388              
3389             B
3390              
3391             =over 4
3392              
3393             =item * C<$key_selector> - Function to extract key from element
3394              
3395             =item * C<$value_selector> - (Optional) Function to extract value, defaults to element itself
3396              
3397             =back
3398              
3399             B Hash reference where values are array references
3400              
3401             B
3402              
3403             # Group orders by user ID
3404             my $orders = LTSV::LINQ->From([
3405             {user_id => 1, product => 'Book'},
3406             {user_id => 1, product => 'Pen'},
3407             {user_id => 2, product => 'Notebook'}
3408             ]);
3409              
3410             my $lookup = $orders->ToLookup(
3411             sub { $_[0]{user_id} },
3412             sub { $_[0]{product} }
3413             );
3414             # {
3415             # 1 => ['Book', 'Pen'],
3416             # 2 => ['Notebook']
3417             # }
3418              
3419             # Group LTSV by status code
3420             my $by_status = LTSV::LINQ->FromLTSV('access.log')
3421             ->ToLookup(sub { $_[0]{status} });
3422             # {
3423             # '200' => [{...}, {...}, ...],
3424             # '404' => [{...}, ...],
3425             # '500' => [{...}]
3426             # }
3427              
3428             B Unlike ToDictionary, this preserves all values for each key.
3429              
3430             =item B
3431              
3432             Return default value if sequence is empty, otherwise return the sequence.
3433              
3434             B
3435              
3436             =over 4
3437              
3438             =item * C<$default_value> - (Optional) Default value, defaults to undef
3439              
3440             =back
3441              
3442             B New query with default value if empty (lazy)
3443              
3444             B
3445              
3446             # Return 0 if empty
3447             ->DefaultIfEmpty(0)->ToArray() # (0) if empty, or original data
3448              
3449             # With undef default
3450             ->DefaultIfEmpty()->First() # undef if empty
3451              
3452             # Useful for left joins
3453             ->Where(condition)->DefaultIfEmpty({id => 0, name => 'None'})
3454              
3455             B This is useful for ensuring a sequence always has at least
3456             one element.
3457              
3458             =item B \@labels] [, headers =E \@labels])>
3459              
3460             Write to LTSV file.
3461              
3462             B
3463              
3464             =over 4
3465              
3466             =item * C<$file> - Output file path
3467              
3468             =item * C [\@labels]> - (Optional) Specify output label order.
3469             Labels listed here appear first in the order given; labels present in the
3470             record but not listed are appended in sorted order.
3471              
3472             =item * C [\@labels]> - Alias for C (CSV-LINQ interop).
3473              
3474             =back
3475              
3476             B
3477              
3478             # Default: labels in sorted order
3479             $query->ToLTSV("output.ltsv");
3480              
3481             # Specify label order
3482             $query->ToLTSV("output.ltsv", label_order => [qw(host status bytes url)]);
3483              
3484             # Alias form (CSV-LINQ interoperability)
3485             $query->ToLTSV("output.ltsv", headers => [qw(host status bytes url)]);
3486              
3487             B Labels specified in C/C that do not exist in a
3488             given record are silently skipped for that record.
3489              
3490             =back
3491              
3492             =head2 Utility Methods
3493              
3494             =over 4
3495              
3496             =item B
3497              
3498             Execute action for each element.
3499              
3500             $query->ForEach(sub { print $_[0]{url}, "\n" });
3501              
3502             =back
3503              
3504             =head1 EXAMPLES
3505              
3506             =head2 Basic Filtering
3507              
3508             use LTSV::LINQ;
3509              
3510             # DSL syntax
3511             my @successful = LTSV::LINQ->FromLTSV("access.log")
3512             ->Where(status => '200')
3513             ->ToArray();
3514              
3515             # Code reference
3516             my @errors = LTSV::LINQ->FromLTSV("access.log")
3517             ->Where(sub { $_[0]{status} >= 400 })
3518             ->ToArray();
3519              
3520             =head2 Aggregation
3521              
3522             # Count errors
3523             my $error_count = LTSV::LINQ->FromLTSV("access.log")
3524             ->Where(sub { $_[0]{status} >= 400 })
3525             ->Count();
3526              
3527             # Average bytes for successful requests
3528             my $avg_bytes = LTSV::LINQ->FromLTSV("access.log")
3529             ->Where(status => '200')
3530             ->Average(sub { $_[0]{bytes} });
3531              
3532             print "Average bytes: $avg_bytes\n";
3533              
3534             =head2 Grouping and Ordering
3535              
3536             # Top 10 URLs by request count
3537             my @top_urls = LTSV::LINQ->FromLTSV("access.log")
3538             ->Where(sub { $_[0]{status} eq '200' })
3539             ->GroupBy(sub { $_[0]{url} })
3540             ->Select(sub {
3541             my $g = shift;
3542             return {
3543             URL => $g->{Key},
3544             Count => scalar(@{$g->{Elements}}),
3545             TotalBytes => LTSV::LINQ->From($g->{Elements})
3546             ->Sum(sub { $_[0]{bytes} })
3547             };
3548             })
3549             ->OrderByDescending(sub { $_[0]{Count} })
3550             ->Take(10)
3551             ->ToArray();
3552              
3553             for my $stat (@top_urls) {
3554             printf "%5d requests - %s (%d bytes)\n",
3555             $stat->{Count}, $stat->{URL}, $stat->{TotalBytes};
3556             }
3557              
3558             =head2 Complex Query Chain
3559              
3560             # Multi-step analysis
3561             my @result = LTSV::LINQ->FromLTSV("access.log")
3562             ->Where(status => '200') # Filter successful
3563             ->Select(sub { $_[0]{bytes} }) # Extract bytes
3564             ->Where(sub { $_[0] > 1000 }) # Large responses only
3565             ->OrderByDescending(sub { $_[0] }) # Sort descending
3566             ->Take(100) # Top 100
3567             ->ToArray();
3568              
3569             print "Largest 100 successful responses:\n";
3570             print " ", join(", ", @result), "\n";
3571              
3572             =head2 Lazy Processing of Large Files
3573              
3574             # Process huge file with constant memory
3575             LTSV::LINQ->FromLTSV("huge.log")
3576             ->Where(sub { $_[0]{level} eq 'ERROR' })
3577             ->ForEach(sub {
3578             my $rec = shift;
3579             print "ERROR at $rec->{time}: $rec->{message}\n";
3580             });
3581              
3582             =head2 Quantifiers
3583              
3584             # Check if all requests are successful
3585             my $all_ok = LTSV::LINQ->FromLTSV("access.log")
3586             ->All(sub { $_[0]{status} < 400 });
3587              
3588             print $all_ok ? "All OK\n" : "Some errors\n";
3589              
3590             # Check if any errors exist
3591             my $has_errors = LTSV::LINQ->FromLTSV("access.log")
3592             ->Any(sub { $_[0]{status} >= 500 });
3593              
3594             print "Server errors detected\n" if $has_errors;
3595              
3596             =head2 Data Transformation
3597              
3598             # Read LTSV, transform, write back
3599             LTSV::LINQ->FromLTSV("input.ltsv")
3600             ->Select(sub {
3601             my $rec = shift;
3602             return {
3603             %$rec,
3604             processed => 1,
3605             timestamp => time(),
3606             };
3607             })
3608             ->ToLTSV("output.ltsv");
3609              
3610             =head2 Working with Arrays
3611              
3612             # Query in-memory data
3613             my @data = (
3614             {name => 'Alice', age => 30, city => 'Tokyo'},
3615             {name => 'Bob', age => 25, city => 'Osaka'},
3616             {name => 'Carol', age => 35, city => 'Tokyo'},
3617             );
3618              
3619             my @tokyo_residents = LTSV::LINQ->From(\@data)
3620             ->Where(city => 'Tokyo')
3621             ->OrderBy(sub { $_[0]{age} })
3622             ->ToArray();
3623              
3624             =head1 FEATURES
3625              
3626             =head2 Lazy Evaluation
3627              
3628             All query operations use lazy evaluation via iterators. Data is
3629             processed on-demand, not all at once.
3630              
3631             # Only reads 10 records from file
3632             my @top10 = LTSV::LINQ->FromLTSV("huge.log")
3633             ->Take(10)
3634             ->ToArray();
3635              
3636             =head2 Method Chaining
3637              
3638             All methods (except terminal operations like ToArray) return a new
3639             query object, enabling fluent method chaining.
3640              
3641             ->Where(...)->Select(...)->OrderBy(...)->Take(10)
3642              
3643             =head2 DSL Syntax
3644              
3645             Simple key-value filtering without code references.
3646              
3647             # Readable and concise
3648             ->Where(status => '200', method => 'GET')
3649              
3650             # Instead of
3651             ->Where(sub { $_[0]{status} eq '200' && $_[0]{method} eq 'GET' })
3652              
3653             =head1 ARCHITECTURE
3654              
3655             =head2 Iterator-Based Design
3656              
3657             LTSV::LINQ uses an iterator-based architecture for lazy evaluation.
3658              
3659             B
3660              
3661             Each query operation returns a new query object wrapping an iterator
3662             (a code reference that produces one element per call).
3663              
3664             my $iter = sub {
3665             # Read next element
3666             # Apply transformation
3667             # Return element or undef
3668             };
3669              
3670             my $query = LTSV::LINQ->new($iter);
3671              
3672             B
3673              
3674             =over 4
3675              
3676             =item * B - O(1) memory for most operations
3677              
3678             =item * B - Elements computed on-demand
3679              
3680             =item * B - Iterators chain naturally
3681              
3682             =item * B - Stop processing when done
3683              
3684             =back
3685              
3686             =head2 Method Categories
3687              
3688             The table below shows, for every method, whether it is lazy or eager,
3689             and what it returns. Knowing this prevents surprises about memory use
3690             and iterator consumption.
3691              
3692             Method Category Evaluation Returns
3693             ------ -------- ---------- -------
3694             From Source Lazy (factory) Query
3695             FromLTSV Source Lazy (factory) Query
3696             Range Source Lazy Query
3697             Empty Source Lazy Query
3698             Repeat Source Lazy Query
3699             Where Filter Lazy Query
3700             Select Projection Lazy Query
3701             SelectMany Projection Lazy Query
3702             Concat Concatenation Lazy Query
3703             Zip Concatenation Lazy Query
3704             Take Partitioning Lazy Query
3705             Skip Partitioning Lazy Query
3706             TakeWhile Partitioning Lazy Query
3707             SkipWhile Partitioning Lazy Query
3708             Distinct Set Operation Lazy (1st seq) Query
3709             DefaultIfEmpty Conversion Lazy Query
3710             OrderBy Ordering Eager (full) Query
3711             OrderByDescending Ordering Eager (full) Query
3712             OrderByStr Ordering Eager (full) Query
3713             OrderByStrDescending Ordering Eager (full) Query
3714             OrderByNum Ordering Eager (full) Query
3715             OrderByNumDescending Ordering Eager (full) Query
3716             Reverse Ordering Eager (full) Query
3717             GroupBy Grouping Eager (full) Query
3718             Union Set Operation Eager (2nd seq) Query
3719             Intersect Set Operation Eager (2nd seq) Query
3720             Except Set Operation Eager (2nd seq) Query
3721             Join Join Eager (inner seq) Query
3722             GroupJoin Join Eager (inner seq) Query
3723             All Quantifier Lazy (early exit) Boolean
3724             Any Quantifier Lazy (early exit) Boolean
3725             Contains Quantifier Lazy (early exit) Boolean
3726             SequenceEqual Comparison Lazy (early exit) Boolean
3727             First Element Access Lazy (early exit) Element
3728             FirstOrDefault Element Access Lazy (early exit) Element
3729             Last Element Access Eager (full) Element
3730             LastOrDefault Element Access Eager (full) Element
3731             Single Element Access Lazy (stops at 2) Element
3732             SingleOrDefault Element Access Lazy (stops at 2) Element
3733             ElementAt Element Access Lazy (early exit) Element
3734             ElementAtOrDefault Element Access Lazy (early exit) Element
3735             Count Aggregation Eager (full) Integer
3736             Sum Aggregation Eager (full) Number
3737             Min Aggregation Eager (full) Number
3738             Max Aggregation Eager (full) Number
3739             Average Aggregation Eager (full) Number
3740             AverageOrDefault Aggregation Eager (full) Number or undef
3741             Aggregate Aggregation Eager (full) Scalar
3742             ToArray Conversion Eager (full) Array
3743             ToList Conversion Eager (full) ArrayRef
3744             ToDictionary Conversion Eager (full) HashRef
3745             ToLookup Conversion Eager (full) HashRef
3746             ToLTSV Conversion Eager (full) (file written)
3747             ForEach Utility Eager (full) (void)
3748              
3749             B
3750              
3751             =over 4
3752              
3753             =item * B - returns a new Query immediately; no data is read yet.
3754              
3755             =item * B - reads only as many elements as needed, then stops.
3756              
3757             =item * B - reads until it finds a second match, then stops.
3758              
3759             =item * B - must read the entire input sequence before returning.
3760              
3761             =item * B - the indicated sequence is read
3762             in full up front; the other sequence remains lazy.
3763              
3764             =back
3765              
3766             B
3767              
3768             =over 4
3769              
3770             =item * Chain lazy operations freely - no cost until a terminal is called.
3771              
3772             =item * Each terminal operation exhausts the iterator; to reuse data, call
3773             C first and rebuild with C.
3774              
3775             =item * For very large files, avoid eager operations (C, C,
3776             C, etc.) unless the data fits in memory, or pre-filter with C
3777             to reduce the working set first.
3778              
3779             =back
3780              
3781             =head2 Query Execution Flow
3782              
3783             # Build query (lazy - no execution yet)
3784             my $query = LTSV::LINQ->FromLTSV("access.log")
3785             ->Where(status => '200') # Lazy
3786             ->Select(sub { $_[0]{url} }) # Lazy
3787             ->Distinct(); # Lazy
3788              
3789             # Execute query (terminal operation)
3790             my @results = $query->ToArray(); # Now executes entire chain
3791              
3792             B
3793              
3794             1. FromLTSV opens file and creates iterator
3795             2. Where wraps iterator with filter
3796             3. Select wraps with transformation
3797             4. Distinct wraps with deduplication
3798             5. ToArray pulls elements through chain
3799              
3800             Each element flows through the entire chain before the next element
3801             is read.
3802              
3803             =head2 Memory Characteristics
3804              
3805             B
3806              
3807             These hold at most one element in memory at a time:
3808              
3809             =over 4
3810              
3811             =item * Where, Select, SelectMany, Concat, Zip
3812              
3813             =item * Take, Skip, TakeWhile, SkipWhile
3814              
3815             =item * DefaultIfEmpty
3816              
3817             =item * ForEach, Count, Sum, Min, Max, Average, AverageOrDefault
3818              
3819             =item * First, FirstOrDefault, Any, All, Contains
3820              
3821             =item * Single, SingleOrDefault, ElementAt, ElementAtOrDefault
3822              
3823             =back
3824              
3825             B
3826              
3827             =over 4
3828              
3829             =item * Distinct - hash grows with the number of distinct keys seen
3830              
3831             =back
3832              
3833             B
3834              
3835             The following are partially eager: one sequence is buffered in full,
3836             the other is streamed:
3837              
3838             =over 4
3839              
3840             =item * Union, Intersect, Except - second sequence is fully loaded
3841              
3842             =item * Join, GroupJoin - inner sequence is fully loaded
3843              
3844             =back
3845              
3846             B
3847              
3848             =over 4
3849              
3850             =item * ToArray, ToList, ToDictionary, ToLookup, ToLTSV (O(n))
3851              
3852             =item * OrderBy, OrderByDescending and Str/Num variants, Reverse (O(n))
3853              
3854             =item * GroupBy (O(n))
3855              
3856             =item * Last, LastOrDefault (O(n))
3857              
3858             =item * Aggregate (O(n), O(1) intermediate accumulator)
3859              
3860             =back
3861              
3862             =head1 PERFORMANCE
3863              
3864             =head2 Memory Efficiency
3865              
3866             Lazy evaluation means memory usage is O(1) for most operations,
3867             regardless of input size.
3868              
3869             # Processes 1GB file with constant memory
3870             LTSV::LINQ->FromLTSV("1gb.log")
3871             ->Where(status => '500')
3872             ->ForEach(sub { print $_[0]{url}, "\n" });
3873              
3874             =head2 Terminal Operations
3875              
3876             These operations materialize the entire result set:
3877              
3878             =over 4
3879              
3880             =item * ToArray, ToList
3881              
3882             =item * OrderBy, OrderByDescending, Reverse
3883              
3884             =item * GroupBy
3885              
3886             =item * Last
3887              
3888             =back
3889              
3890             For large datasets, use these operations carefully.
3891              
3892             =head2 Optimization Tips
3893              
3894             =over 4
3895              
3896             =item * Filter early: Place Where clauses first
3897              
3898             # Good: Filter before expensive operations
3899             ->Where(status => '200')->OrderBy(...)->Take(10)
3900              
3901             # Bad: Order all data, then filter
3902             ->OrderBy(...)->Where(status => '200')->Take(10)
3903              
3904             =item * Limit early: Use Take to reduce processing
3905              
3906             # Process only what you need
3907             ->Take(1000)->GroupBy(...)
3908              
3909             =item * Avoid repeated ToArray: Reuse results
3910              
3911             # Bad: Calls ToArray twice
3912             my $count = scalar($query->ToArray());
3913             my @items = $query->ToArray();
3914              
3915             # Good: Call once, reuse
3916             my @items = $query->ToArray();
3917             my $count = scalar(@items);
3918              
3919             =back
3920              
3921             =head1 COMPATIBILITY
3922              
3923             =head2 Perl Version Support
3924              
3925             This module is compatible with B.
3926              
3927             Tested on:
3928              
3929             =over 4
3930              
3931             =item * Perl 5.005_03 (released 1999)
3932              
3933             =item * Perl 5.6.x
3934              
3935             =item * Perl 5.8.x
3936              
3937             =item * Perl 5.10.x - 5.42.x
3938              
3939             =back
3940              
3941             =head2 Compatibility Policy
3942              
3943             B
3944              
3945             This module maintains compatibility with Perl 5.005_03 through careful
3946             coding practices:
3947              
3948             =over 4
3949              
3950             =item * No use of features introduced after 5.005
3951              
3952             =item * C compatibility shim for pre-5.6
3953              
3954             =item * C keyword avoided (5.6+ feature)
3955              
3956             =item * Three-argument C used on Perl 5.6 and later (two-argument form retained for 5.005_03)
3957              
3958             =item * No Unicode features required
3959              
3960             =item * No module dependencies beyond core
3961              
3962             =back
3963              
3964             B
3965              
3966             This module adheres to the B, which was the
3967             final version of JPerl (Japanese Perl). This is not about using the old
3968             interpreter, but about maintaining the B
3969             that made Perl enjoyable.
3970              
3971             B
3972              
3973             Some people think the strength of modern times is the ability to use
3974             modern technology. That thinking is insufficient. The strength of modern
3975             times is the ability to use B technology up to the present day.
3976              
3977             By adhering to the Perl 5.005_03 specification, we gain access to the
3978             entire history of Perl--from 5.005_03 to 5.42 and beyond--rather than
3979             limiting ourselves to only the latest versions.
3980              
3981             Key reasons:
3982              
3983             =over 4
3984              
3985             =item * B - The original Perl approach keeps programming fun and easy
3986              
3987             Perl 5.6 and later introduced character encoding complexity that made
3988             programming harder. The confusion around character handling contributed
3989             to Perl's decline. By staying with the 5.005_03 specification, we maintain
3990             the simplicity that made Perl "rakuda" (camel) -> "raku" (easy/fun).
3991              
3992             =item * B - Preserves the last JPerl version
3993              
3994             Perl 5.005_03 was the final version of JPerl, which handled Japanese text
3995             naturally. Later versions abandoned this approach for Unicode, adding
3996             unnecessary complexity for many use cases.
3997              
3998             =item * B - Runs on ANY Perl version
3999              
4000             Code written to the 5.005_03 specification runs on B Perl versions
4001             from 5.005_03 through 5.42 and beyond. This maximizes compatibility across
4002             two decades of Perl releases.
4003              
4004             =item * B - Real-world enterprise needs
4005              
4006             Many production systems, embedded environments, and enterprise deployments
4007             still run Perl 5.005, 5.6, or 5.8. This module provides modern query
4008             capabilities without requiring upgrades.
4009              
4010             =item * B - Programming should be enjoyable
4011              
4012             As readers of the "Camel Book" (Programming Perl) know, Perl was designed
4013             to make programming enjoyable. The 5.005_03 specification preserves this
4014             original vision.
4015              
4016             =back
4017              
4018             B
4019              
4020             All modules under the ina CPAN account (including mb, Jacode, UTF8-R2,
4021             mb-JSON, and this module) follow this principle: Write to the Perl 5.005_03
4022             specification, test on all versions, maintain programming joy.
4023              
4024             This is not nostalgia--it's a commitment to:
4025              
4026             =over 4
4027              
4028             =item * Simple, maintainable code
4029              
4030             =item * Maximum compatibility
4031              
4032             =item * The original Perl philosophy
4033              
4034             =item * Making programming "raku" (easy and fun)
4035              
4036             =back
4037              
4038             B
4039              
4040             This module uses C instead of traditional make, since Perl 5.005_03
4041             on Microsoft Windows lacks make. All tests pass on Perl 5.005_03 through
4042             modern versions.
4043              
4044             =head2 .NET LINQ Compatibility
4045              
4046             This section documents where LTSV::LINQ's behaviour matches .NET LINQ
4047             exactly, where it intentionally differs, and where it cannot differ due
4048             to Perl's type system.
4049              
4050             B
4051              
4052             =over 4
4053              
4054             =item * C - throws when sequence is empty or has more than one element
4055              
4056             =item * C, C - throw when sequence is empty or no element matches
4057              
4058             =item * C and C
4059             - matching 2- and 3-argument forms
4060              
4061             =item * C - groups are returned in insertion order (first-seen key order)
4062              
4063             =item * C - every outer element appears even with zero inner matches
4064              
4065             =item * C - inner join semantics; unmatched outer elements are dropped
4066              
4067             =item * C / C / C - partially eager (second/inner
4068             sequence buffered up front), matching .NET's hash-join approach
4069              
4070             =item * C, C, C, C - identical semantics
4071              
4072             =item * C / C with early exit
4073              
4074             =back
4075              
4076             B
4077              
4078             =over 4
4079              
4080             =item * C
4081              
4082             .NET throws C when the sequence contains more
4083             than one element. LTSV::LINQ returns C instead. This makes it
4084             more natural in Perl code that checks return values with C.
4085              
4086             If you require strict .NET behaviour (exception on multiple elements),
4087             use C inside an C:
4088              
4089             my $val = eval { $query->Single() };
4090             # $val is undef and $@ is set if empty or multiple
4091              
4092             =item * C
4093              
4094             .NET's C can return a sequence containing C
4095             (the reference-type default). LTSV::LINQ cannot: the iterator protocol
4096             uses C to signal end-of-sequence, so a default value of C
4097             is indistinguishable from EOF and is silently lost.
4098              
4099             # .NET: seq.DefaultIfEmpty() produces one null element
4100             # Perl:
4101             LTSV::LINQ->From([])->DefaultIfEmpty(undef)->ToArray() # () - empty!
4102             LTSV::LINQ->From([])->DefaultIfEmpty(0)->ToArray() # (0) - works
4103              
4104             Use a sentinel value (C<0>, C<''>, C<{}>) and handle it explicitly.
4105              
4106             =item * C smart comparison
4107              
4108             .NET's C is strongly typed: the key type determines the
4109             comparison. In Perl there is no static type, so LTSV::LINQ's C
4110             uses a heuristic: if both keys look like numbers, C=E> is used;
4111             otherwise C. For explicit control, use C (always C)
4112             or C (always C=E>).
4113              
4114             =item * EqualityComparer / IComparer
4115              
4116             .NET LINQ accepts C and C interface objects
4117             for custom equality and ordering. LTSV::LINQ uses code references (C)
4118             that extract a I from each element. This is equivalent in power but
4119             different in calling convention: the sub receives one element and returns a
4120             key, rather than receiving two elements and returning a comparison result.
4121              
4122             =item * C on typed sequences
4123              
4124             .NET's C is type-checked. LTSV::LINQ accepts any two sequences
4125             regardless of element type.
4126              
4127             =item * No query expression syntax
4128              
4129             .NET's C syntax compiles to LINQ
4130             method calls. Perl has no equivalent; use method chaining directly.
4131              
4132             =back
4133              
4134             =head2 Pure Perl Implementation
4135              
4136             B
4137              
4138             This module is implemented in Pure Perl with no XS (C extensions).
4139             Benefits:
4140              
4141             =over 4
4142              
4143             =item * Works on any Perl installation
4144              
4145             =item * No C compiler required
4146              
4147             =item * Easy installation in restricted environments
4148              
4149             =item * Consistent behavior across platforms
4150              
4151             =item * Simpler debugging and maintenance
4152              
4153             =back
4154              
4155             =head2 Core Module Dependencies
4156              
4157             B This module uses only Perl core features available since 5.005.
4158              
4159             No CPAN dependencies required.
4160              
4161             =head1 DIAGNOSTICS
4162              
4163             =head2 Error Messages
4164              
4165             This module may throw the following exceptions:
4166              
4167             =over 4
4168              
4169             =item C
4170              
4171             Thrown by From() when the argument is not an array reference.
4172              
4173             Example:
4174              
4175             LTSV::LINQ->From("string"); # Dies
4176             LTSV::LINQ->From([1, 2, 3]); # OK
4177              
4178             =item C
4179              
4180             Thrown by SelectMany() when the selector function returns anything
4181             other than an ARRAY reference. Wrap the return value in C<[...]>:
4182              
4183             # Wrong - hashref causes die
4184             ->SelectMany(sub { {key => 'val'} })
4185              
4186             # Correct - arrayref
4187             ->SelectMany(sub { [{key => 'val'}] })
4188              
4189             # Correct - empty array for "no results" case
4190             ->SelectMany(sub { [] })
4191              
4192             =item C
4193              
4194             Thrown by First(), Last(), or Average() when called on an empty sequence.
4195              
4196             Methods that throw this error:
4197              
4198             =over 4
4199              
4200             =item * First()
4201              
4202             =item * Last()
4203              
4204             =item * Average()
4205              
4206             =back
4207              
4208             To avoid this error, use the OrDefault variants:
4209              
4210             =over 4
4211              
4212             =item * FirstOrDefault() - returns undef instead of dying
4213              
4214             =item * LastOrDefault() - returns undef instead of dying
4215              
4216             =item * AverageOrDefault() - returns undef instead of dying
4217              
4218             =back
4219              
4220             Example:
4221              
4222             my @empty = ();
4223             LTSV::LINQ->From(\@empty)->First(); # Dies
4224             LTSV::LINQ->From(\@empty)->FirstOrDefault(); # Returns undef
4225              
4226             =item C
4227              
4228             Thrown by First() or Last() with a predicate when no element matches.
4229              
4230             Example:
4231              
4232             my @data = (1, 2, 3);
4233             LTSV::LINQ->From(\@data)->First(sub { $_[0] > 10 }); # Dies
4234             LTSV::LINQ->From(\@data)->FirstOrDefault(sub { $_[0] > 10 }); # Returns undef
4235              
4236             =item C
4237              
4238             Thrown by Single() when the sequence (or matching elements) contains
4239             more than one element. Use First() if multiple matches are acceptable.
4240              
4241             Example:
4242              
4243             my @data = (1, 2, 3);
4244             LTSV::LINQ->From(\@data)->Single(); # Dies (3 elements)
4245             LTSV::LINQ->From(\@data)->Single(sub { $_[0]>1 });# Dies (2 match)
4246             LTSV::LINQ->From(\@data)->Single(sub { $_[0]==1 });# OK (1 match)
4247              
4248             =item C
4249              
4250             Thrown by ElementAt() when the supplied index is less than zero.
4251              
4252             =item C
4253              
4254             Thrown by ElementAt() when the supplied index is equal to or greater
4255             than the number of elements in the sequence. Use ElementAtOrDefault()
4256             to avoid this error.
4257              
4258             Example:
4259              
4260             my @data = (10, 20, 30);
4261             LTSV::LINQ->From(\@data)->ElementAt(2); # OK: returns 30
4262             LTSV::LINQ->From(\@data)->ElementAt(5); # Dies: index out of range
4263             LTSV::LINQ->From(\@data)->ElementAt(-1); # Dies: must be non-negative
4264             LTSV::LINQ->From(\@data)->ElementAtOrDefault(5); # Returns undef
4265              
4266             =item C
4267              
4268             Thrown by Aggregate() when called with a number of arguments other
4269             than 2 (seed + function).
4270              
4271             Example:
4272              
4273             # Correct usage
4274             LTSV::LINQ->From([1,2,3])->Aggregate(0, sub { $_[0] + $_[1] });
4275              
4276             # Incorrect: single argument -- dies
4277             LTSV::LINQ->From([1,2,3])->Aggregate(sub { $_[0] + $_[1] });
4278              
4279             =item CfilenameE': EreasonE>
4280              
4281             File I/O error when FromLTSV() cannot open the specified file.
4282              
4283             Common causes:
4284              
4285             =over 4
4286              
4287             =item * File does not exist
4288              
4289             =item * Insufficient permissions
4290              
4291             =item * Invalid path
4292              
4293             =back
4294              
4295             Example:
4296              
4297             LTSV::LINQ->FromLTSV("/nonexistent/file.ltsv"); # Dies with this error
4298              
4299             =back
4300              
4301             =head2 Methods That May Throw Exceptions
4302              
4303             =over 4
4304              
4305             =item B
4306              
4307             Dies if argument is not an array reference.
4308              
4309             =item B
4310              
4311             Dies if file cannot be opened.
4312              
4313             B The file handle is held open until the iterator is fully
4314             consumed. Partially consumed queries keep their file handles open.
4315             See C in L for details.
4316              
4317             =item B
4318              
4319             Dies if sequence is empty or no element matches predicate.
4320              
4321             Safe alternative: FirstOrDefault()
4322              
4323             =item B
4324              
4325             Dies if sequence is empty or no element matches predicate.
4326              
4327             Safe alternative: LastOrDefault()
4328              
4329             =item B
4330              
4331             Dies if sequence is empty.
4332              
4333             Safe alternative: AverageOrDefault()
4334              
4335             =item B
4336              
4337             Dies with C if the sequence
4338             contains more than one matching element. Also dies with
4339             C if no element matches.
4340              
4341             =item B
4342              
4343             Dies with C if C<$index> is less than 0.
4344             Dies with C if C<$index> is beyond the end of
4345             the sequence.
4346              
4347             =item B
4348              
4349             Dies with C if called
4350             with an argument count other than 2 (seed + function).
4351              
4352             =back
4353              
4354             =head2 Safe Alternatives
4355              
4356             For methods that may throw exceptions, use the OrDefault variants:
4357              
4358             First() -> FirstOrDefault() (returns undef)
4359             Last() -> LastOrDefault() (returns undef)
4360             Average() -> AverageOrDefault() (returns undef)
4361              
4362             Example:
4363              
4364             # Unsafe - may die
4365             my $first = LTSV::LINQ->From(\@data)->First();
4366              
4367             # Safe - returns undef if empty
4368             my $first = LTSV::LINQ->From(\@data)->FirstOrDefault();
4369             if (defined $first) {
4370             # Process $first
4371             }
4372              
4373             =head2 Exception Format and Stack Traces
4374              
4375             All exceptions thrown by this module are plain strings produced by
4376             C. Because no trailing newline is appended, Perl
4377             automatically appends the source location:
4378              
4379             Sequence contains no elements at lib/LTSV/LINQ.pm line 764.
4380              
4381             This is intentional: the location helps when diagnosing unexpected
4382             failures during development.
4383              
4384             When catching exceptions with C, the full string including the
4385             location suffix is available in C<$@>. Use a prefix match if you want
4386             to test only the message text:
4387              
4388             eval { LTSV::LINQ->From([])->First() };
4389             if ($@ =~ /^Sequence contains no elements/) {
4390             # handle empty sequence
4391             }
4392              
4393             If you prefer exceptions without the location suffix, wrap the call
4394             in a thin eval and re-die with a newline:
4395              
4396             eval { $result = $query->First() };
4397             die "$@\n" if $@; # strip " at ... line N" from the message
4398              
4399             =head1 FAQ
4400              
4401             =head2 General Questions
4402              
4403             =over 4
4404              
4405             =item B
4406              
4407             A: LINQ provides:
4408              
4409             =over 4
4410              
4411             =item * Method chaining (more Perl-like)
4412              
4413             =item * Type safety through code
4414              
4415             =item * No string parsing required
4416              
4417             =item * Composable queries
4418              
4419             =back
4420              
4421             =item B
4422              
4423             A: No. Query objects use iterators that can only be consumed once.
4424              
4425             # Wrong - iterator consumed by first ToArray
4426             my $query = LTSV::LINQ->FromLTSV("file.ltsv");
4427             my @first = $query->ToArray(); # OK
4428             my @second = $query->ToArray(); # Empty! Iterator exhausted
4429              
4430             # Right - create new query for each use
4431             my $query1 = LTSV::LINQ->FromLTSV("file.ltsv");
4432             my @first = $query1->ToArray();
4433              
4434             my $query2 = LTSV::LINQ->FromLTSV("file.ltsv");
4435             my @second = $query2->ToArray();
4436              
4437             =item B
4438              
4439             A: Use code reference form with C<||>:
4440              
4441             # OR condition requires code reference
4442             ->Where(sub {
4443             $_[0]{status} == 200 || $_[0]{status} == 304
4444             })
4445              
4446             # DSL only supports AND
4447             ->Where(status => '200') # Single condition only
4448              
4449             =item B
4450              
4451             A: Some operations require multiple passes:
4452              
4453             # This reads the file TWICE
4454             my $avg = $query->Average(...); # Pass 1: Calculate
4455             my @all = $query->ToArray(); # Pass 2: Collect (iterator reset!)
4456              
4457             # Save result instead
4458             my @all = $query->ToArray();
4459             my $avg = LTSV::LINQ->From(\@all)->Average(...);
4460              
4461             =back
4462              
4463             =head2 Performance Questions
4464              
4465             =over 4
4466              
4467             =item B
4468              
4469             A: Use lazy operations and avoid materializing:
4470              
4471             # Good - constant memory
4472             LTSV::LINQ->FromLTSV("huge.log")
4473             ->Where(status => '500')
4474             ->ForEach(sub { print $_[0]{message}, "\n" });
4475              
4476             # Bad - loads everything into memory
4477             my @all = LTSV::LINQ->FromLTSV("huge.log")->ToArray();
4478              
4479             =item B
4480              
4481             A: OrderBy must load all elements into memory to sort them.
4482              
4483             # Slow on 1GB file - loads everything
4484             ->OrderBy(sub { $_[0]{timestamp} })->Take(10)
4485              
4486             # Faster - limit before sorting (if possible)
4487             ->Where(status => '500')->OrderBy(...)->Take(10)
4488              
4489             =item B
4490              
4491             A: Use ForEach or streaming terminal operations:
4492              
4493             # Process 100GB file with 1KB memory
4494             my $error_count = 0;
4495             LTSV::LINQ->FromLTSV("100gb.log")
4496             ->Where(sub { $_[0]{level} eq 'ERROR' })
4497             ->ForEach(sub { $error_count++ });
4498              
4499             print "Errors: $error_count\n";
4500              
4501             =back
4502              
4503             =head2 DSL Questions
4504              
4505             =over 4
4506              
4507             =item B
4508              
4509             A: No. DSL uses string equality (C). Use code reference for numeric:
4510              
4511             # DSL - string comparison
4512             ->Where(status => '200') # $_[0]{status} eq '200'
4513              
4514             # Code ref - numeric comparison
4515             ->Where(sub { $_[0]{status} == 200 })
4516             ->Where(sub { $_[0]{bytes} > 1000 })
4517              
4518             =item B
4519              
4520             A: DSL doesn't support it. Use code reference:
4521              
4522             # Case-insensitive requires code reference
4523             ->Where(sub { lc($_[0]{method}) eq 'get' })
4524              
4525             =item B
4526              
4527             A: No. Use code reference:
4528              
4529             # Regex requires code reference
4530             ->Where(sub { $_[0]{url} =~ m{^/api/} })
4531              
4532             =back
4533              
4534             =head2 Compatibility Questions
4535              
4536             =over 4
4537              
4538             =item B
4539              
4540             A: Yes. Tested on Perl 5.005_03 through 5.40+.
4541              
4542             =item B
4543              
4544             A: No. Pure Perl with no dependencies beyond core.
4545              
4546             =item B
4547              
4548             A: Yes. Pure Perl works on all platforms.
4549              
4550             =item B
4551              
4552             A: Many production systems cannot upgrade. This module provides
4553             modern query capabilities without requiring upgrades.
4554              
4555             =back
4556              
4557             =head1 COOKBOOK
4558              
4559             =head2 Common Patterns
4560              
4561             =over 4
4562              
4563             =item B
4564              
4565             ->OrderByDescending(sub { $_[0]{score} })
4566             ->Take(10)
4567             ->ToArray()
4568              
4569             =item B
4570              
4571             ->GroupBy(sub { $_[0]{category} })
4572             ->Select(sub {
4573             {
4574             Category => $_[0]{Key},
4575             Count => scalar(@{$_[0]{Elements}})
4576             }
4577             })
4578             ->ToArray()
4579              
4580             =item B
4581              
4582             my $total = 0;
4583             ->Select(sub {
4584             $total += $_[0]{amount};
4585             { %{$_[0]}, running_total => $total }
4586             })
4587              
4588             =item B
4589              
4590             # Page 3, size 20
4591             ->Skip(40)->Take(20)->ToArray()
4592              
4593             =item B
4594              
4595             ->Select(sub { $_[0]{category} })
4596             ->Distinct()
4597             ->ToArray()
4598              
4599             =item B
4600              
4601             Note: A query object can only be consumed once. To compute multiple
4602             aggregations over the same source, materialise it first with C.
4603              
4604             my @all = LTSV::LINQ->FromLTSV("access.log")->ToArray();
4605              
4606             my $success_avg = LTSV::LINQ->From(\@all)
4607             ->Where(status => '200')
4608             ->Average(sub { $_[0]{response_time} });
4609              
4610             my $error_avg = LTSV::LINQ->From(\@all)
4611             ->Where(sub { $_[0]{status} >= 400 })
4612             ->Average(sub { $_[0]{response_time} });
4613              
4614             =item B
4615              
4616             A query object wraps a single-pass iterator. Once consumed, it is
4617             exhausted and subsequent terminal operations return empty results or die.
4618              
4619             # WRONG - $q is exhausted after the first Count()
4620             my $q = LTSV::LINQ->FromLTSV("access.log")->Where(status => '200');
4621             my $n = $q->Count(); # OK
4622             my $first = $q->First(); # WRONG: iterator already at EOF
4623              
4624             # RIGHT - snapshot into array, then query as many times as needed
4625             my @rows = LTSV::LINQ->FromLTSV("access.log")->Where(status => '200')->ToArray();
4626             my $n = LTSV::LINQ->From(\@rows)->Count();
4627             my $first = LTSV::LINQ->From(\@rows)->First();
4628              
4629             The snapshot approach is also the correct pattern for any multi-pass
4630             computation such as computing both average and standard deviation,
4631             comparing the same sequence against two different filters, or iterating
4632             once to validate and once to transform.
4633              
4634             =item B
4635              
4636             For files too large to fit in memory, keep the chain fully lazy by
4637             ensuring only one terminal operation is performed per pass:
4638              
4639             # One pass - pick only what you need
4640             my @slow = LTSV::LINQ->FromLTSV("access.log")
4641             ->Where(sub { $_[0]{response_time} > 1000 })
4642             ->OrderByNum(sub { $_[0]{response_time} })
4643             ->Take(20)
4644             ->ToArray();
4645              
4646             # Never do two passes on the same FromLTSV object -
4647             # open the file again for a second pass:
4648             my $count = LTSV::LINQ->FromLTSV("access.log")->Count();
4649             my $sum = LTSV::LINQ->FromLTSV("access.log")
4650             ->Sum(sub { $_[0]{bytes} });
4651              
4652             =back
4653              
4654             =head1 DESIGN PHILOSOPHY
4655              
4656             =head2 Historical Compatibility: Perl 5.005_03
4657              
4658             This module maintains compatibility with Perl 5.005_03 (released 1999-03-28),
4659             following the B.
4660              
4661             B
4662              
4663             =over 4
4664              
4665             =item * B
4666              
4667             Code written in 1998-era Perl should still run in 2026 and beyond.
4668             This demonstrates Perl's commitment to backwards compatibility.
4669              
4670             =item * B
4671              
4672             Some production systems, embedded devices, and enterprise environments
4673             cannot easily upgrade Perl. Maintaining compatibility ensures this module
4674             remains useful in those contexts.
4675              
4676             =item * B
4677              
4678             By avoiding modern Perl features, this module has zero non-core dependencies.
4679             It works with only the Perl core that has existed since 1999.
4680              
4681             =back
4682              
4683             B
4684              
4685             =over 4
4686              
4687             =item * No C keyword - uses package variables
4688              
4689             =item * No C pragma - uses C
4690              
4691             =item * No C improvements from 5.6+
4692              
4693             =item * All features implemented with Perl 5.005-era constructs
4694              
4695             =back
4696              
4697             The code comment C<# use 5.008001; # Lancaster Consensus 2013 for toolchains>
4698             marks where modern code would typically start. We intentionally stay below
4699             this line.
4700              
4701             =head2 US-ASCII Only Policy
4702              
4703             All source code is strictly US-ASCII (bytes 0x00-0x7F). No UTF-8, no
4704             extended characters.
4705              
4706             B
4707              
4708             =over 4
4709              
4710             =item * B
4711              
4712             US-ASCII works everywhere - ancient terminals, modern IDEs, web browsers,
4713             email systems. No encoding issues, ever.
4714              
4715             =item * B
4716              
4717             The code behaves identically regardless of system locale settings.
4718              
4719             =item * B
4720              
4721             Source code (ASCII) vs. data (any encoding). The module processes LTSV
4722             data in any encoding, but its own code remains pure ASCII.
4723              
4724             =back
4725              
4726             This policy is verified by C.
4727              
4728             =head2 The C<$VERSION = $VERSION> Idiom
4729              
4730             You may notice:
4731              
4732             $VERSION = '1.05';
4733             $VERSION = $VERSION;
4734              
4735             This is B, not a typo. Under C, a variable used
4736             only once triggers a warning. The self-assignment ensures C<$VERSION>
4737             appears twice, silencing the warning without requiring C (which
4738             doesn't exist in Perl 5.005).
4739              
4740             This is a well-known idiom from the pre-C era.
4741              
4742             =head2 Design Principles
4743              
4744             =over 4
4745              
4746             =item * B
4747              
4748             Operations return query objects, not arrays. Data is processed on-demand
4749             when terminal operations (C, C, etc.) are called.
4750              
4751             =item * B
4752              
4753             All query operations return new query objects, enabling fluent syntax:
4754              
4755             $query->Where(...)->Select(...)->OrderBy(...)->ToArray()
4756              
4757             =item * B
4758              
4759             Query operations never modify the source data. They create new lazy
4760             iterators.
4761              
4762             =item * B
4763              
4764             We follow LINQ's method names and semantics, but use Perl idioms for
4765             implementation (closures for iterators, hash refs for records).
4766              
4767             =item * B
4768              
4769             This module has zero non-core dependencies. It works with only the Perl
4770             core that has existed since 1999. Even C is optional (stubbed
4771             for Perl E 5.6). This ensures installation succeeds on minimal Perl
4772             installations, avoids dependency chain vulnerabilities, and provides
4773             permanence - the code will work decades into the future.
4774              
4775             =back
4776              
4777             =head1 LIMITATIONS AND KNOWN ISSUES
4778              
4779             =head2 Current Limitations
4780              
4781             =over 4
4782              
4783             =item * B
4784              
4785             Query objects can only be consumed once. The iterator is exhausted
4786             after terminal operations.
4787              
4788             Workaround: Create new query object or save ToArray() result.
4789              
4790             =item * B
4791              
4792             Due to iterator-based design, undef cannot be distinguished from end-of-sequence.
4793             Sequences containing undef values may not work correctly with all operations.
4794              
4795             This is not a practical limitation for LTSV data (which uses hash references),
4796             but affects operations on plain arrays containing undef.
4797              
4798             # Works fine (LTSV data - hash references)
4799             LTSV::LINQ->FromLTSV("file.ltsv")->Contains({status => '200'})
4800              
4801             # Limitation (plain array with undef)
4802             LTSV::LINQ->From([1, undef, 3])->Contains(undef) # May not work
4803              
4804             =item * B
4805              
4806             All operations execute sequentially in a single thread.
4807              
4808             =item * B
4809              
4810             All filtering requires full scan. No index optimization.
4811              
4812             =item * B
4813              
4814             Distinct with custom comparer uses stringified keys. May not work
4815             correctly for complex objects.
4816              
4817             =item * B
4818              
4819             Because the iterator protocol uses C to signal end-of-sequence,
4820             C cannot reliably deliver its C default
4821             to downstream operations.
4822              
4823             # Works correctly (non-undef default)
4824             LTSV::LINQ->From([])->DefaultIfEmpty(0)->ToArray() # (0)
4825             LTSV::LINQ->From([])->DefaultIfEmpty({})->ToArray() # ({})
4826              
4827             # Does NOT work (undef default is indistinguishable from EOF)
4828             LTSV::LINQ->From([])->DefaultIfEmpty(undef)->ToArray() # () - empty!
4829              
4830             Workaround: Use a sentinel value such as C<0>, C<''>, or C<{}> instead
4831             of C, and treat it as "no element" after the fact.
4832              
4833             =back
4834              
4835             =head2 Not Implemented
4836              
4837             The following LINQ methods from the .NET standard library are intentionally
4838             not implemented in LTSV::LINQ. This section explains the design rationale
4839             for each omission.
4840              
4841             =head3 Parallel LINQ (PLINQ) Methods
4842              
4843             The following methods belong to B, the .NET
4844             parallel-execution extension to LINQ introduced in .NET 4.0. They exist
4845             to distribute query execution across multiple CPU cores using the .NET
4846             Thread Pool and Task Parallel Library.
4847              
4848             Perl does not have native shared-memory multithreading that maps onto
4849             this execution model. Perl threads (C) copy the interpreter
4850             state and communicate through shared variables, making them unsuitable
4851             for the fine-grained, automatic work-stealing parallelism that PLINQ
4852             provides. LTSV::LINQ's iterator-based design assumes a single sequential
4853             execution context; introducing PLINQ semantics would require a completely
4854             different architecture and would add heavy dependencies.
4855              
4856             Furthermore, the primary use case for LTSV::LINQ -- parsing and querying
4857             LTSV log files -- is typically I/O-bound rather than CPU-bound.
4858             Parallelizing I/O over a single file provides little benefit and
4859             considerable complexity.
4860              
4861             For these reasons, the entire PLINQ surface is omitted:
4862              
4863             =over 4
4864              
4865             =item * B
4866              
4867             Entry point for PLINQ. Converts an C> into a
4868             C> that the .NET runtime executes in parallel using
4869             multiple threads. Not applicable: Perl lacks the runtime infrastructure.
4870              
4871             =item * B
4872              
4873             Converts a C> back to a sequential C>,
4874             forcing subsequent operators to run on a single thread. Since
4875             C is not implemented, C has no counterpart
4876             to convert from.
4877              
4878             =item * B
4879              
4880             Instructs PLINQ to preserve the source order in the output even during
4881             parallel execution. This is a hint to the PLINQ scheduler; it does not
4882             exist outside of PLINQ. Not applicable.
4883              
4884             =item * B
4885              
4886             Instructs PLINQ that output order does not need to match source order,
4887             potentially allowing more efficient parallel execution. Not applicable.
4888              
4889             =item * B
4890              
4891             PLINQ terminal operator that applies an action to each element in
4892             parallel, without collecting results. It is the parallel equivalent of
4893             C. LTSV::LINQ provides C for sequential iteration.
4894             A parallel C is not applicable.
4895              
4896             =item * B
4897              
4898             Attaches a .NET C to a C>, allowing
4899             cooperative cancellation of a running parallel query. Cancellation tokens
4900             are a .NET threading primitive. Not applicable.
4901              
4902             =item * B
4903              
4904             Sets the maximum number of concurrent tasks that PLINQ may use. A
4905             tuning knob for the PLINQ scheduler. Not applicable.
4906              
4907             =item * B
4908              
4909             Controls whether PLINQ may choose sequential execution for efficiency
4910             (C) or is forced to parallelize (C). Not
4911             applicable.
4912              
4913             =item * B
4914              
4915             Controls how PLINQ merges results from parallel partitions back into the
4916             output stream (buffered, auto-buffered, or not-buffered). Not applicable.
4917              
4918             =back
4919              
4920             =head3 .NET Type System Methods
4921              
4922             The following methods are specific to .NET's static type system. They
4923             exist to work with .NET generics and interface hierarchies, which have
4924             no Perl equivalent.
4925              
4926             =over 4
4927              
4928             =item * B
4929              
4930             Casts each element of a non-generic C to a specified type
4931             C, returning C>. In .NET, C> is needed when
4932             working with legacy APIs that return C (without a type
4933             parameter) and you need to treat the elements as a specific type.
4934              
4935             Perl is dynamically typed. Every Perl value already holds type
4936             information at runtime (scalar, reference, blessed object), and Perl
4937             does not have a concept of a "non-generic enumerable" that needs to be
4938             explicitly cast before it can be queried. There is no meaningful
4939             operation to implement.
4940              
4941             =item * B
4942              
4943             Filters elements of a non-generic C, returning only those
4944             that can be successfully cast to a specified type C. Like C,
4945             it exists to bridge generic and non-generic .NET APIs.
4946              
4947             In LTSV::LINQ, all records from C are hash references.
4948             Records from C are whatever the caller puts in the array.
4949             Perl's C, C, or a C predicate can
4950             perform any type-based filtering the caller needs. A dedicated
4951             C adds no expressiveness.
4952              
4953             # Perl equivalent of OfType for blessed objects of class "Foo":
4954             $query->Where(sub { ref($_[0]) && $_[0]->isa('Foo') })
4955              
4956             =back
4957              
4958             =head3 64-bit and Large-Count Methods
4959              
4960             =over 4
4961              
4962             =item * B
4963              
4964             Returns the number of elements as a 64-bit integer (C in .NET).
4965             On 32-bit .NET platforms, a sequence can theoretically contain more than
4966             C<2**31 - 1> (~2 billion) elements, which would overflow C; hence
4967             the need for C.
4968              
4969             In Perl, integers are represented as native signed integers or floating-
4970             point doubles (C). On 64-bit Perl (which is universal in practice
4971             today), the native integer type is 64 bits, so C already handles
4972             any realistic sequence length. On 32-bit Perl, the floating-point C
4973             provides 53 bits of integer precision (~9 quadrillion), far exceeding
4974             any in-memory sequence. There is no semantic gap between C and
4975             C in Perl.
4976              
4977             =back
4978              
4979             =head3 IEnumerable Conversion Method
4980              
4981             =over 4
4982              
4983             =item * B
4984              
4985             In .NET, C> is used to force evaluation of a query as
4986             C> rather than, for example, C> (which
4987             might be translated to SQL). It is a type-cast at the interface level,
4988             not a data transformation.
4989              
4990             LTSV::LINQ has only one query type: C. There is no
4991             C counterpart that would benefit from being downgraded to
4992             C. The method has no meaningful semantics to implement.
4993              
4994             =back
4995              
4996             =head1 BUGS
4997              
4998             Please report any bugs or feature requests to:
4999              
5000             =over 4
5001              
5002             =item * Email: C
5003              
5004             =back
5005              
5006             =head1 SUPPORT
5007              
5008             =head2 Documentation
5009              
5010             Full documentation is available via:
5011              
5012             perldoc LTSV::LINQ
5013              
5014             =head2 CPAN
5015              
5016             https://metacpan.org/pod/LTSV::LINQ
5017              
5018             =head1 SEE ALSO
5019              
5020             =over 4
5021              
5022             =item * LTSV specification
5023              
5024             http://ltsv.org/
5025              
5026             =item * L - LINQ-style query interface for JSON/JSONL files
5027              
5028             =item * L - LINQ-style query interface for CSV files
5029              
5030             =item * Microsoft LINQ documentation
5031              
5032             https://learn.microsoft.com/en-us/dotnet/csharp/linq/
5033              
5034             =back
5035              
5036             =head1 AUTHOR
5037              
5038             INABA Hitoshi Eina@cpan.orgE
5039              
5040             =head2 Contributors
5041              
5042             Contributions are welcome! See file: CONTRIBUTING.
5043              
5044             =head1 ACKNOWLEDGEMENTS
5045              
5046             =head2 LINQ Technology
5047              
5048             This module is inspired by LINQ (Language Integrated Query), which was
5049             developed by Microsoft Corporation for the .NET Framework.
5050              
5051             LINQ(R) is a registered trademark of Microsoft Corporation.
5052              
5053             We are grateful to Microsoft for pioneering the LINQ technology and
5054             making it a widely recognized programming pattern. The elegance and
5055             power of LINQ has influenced query interfaces across many programming
5056             languages, and this module brings that same capability to LTSV data
5057             processing in Perl.
5058              
5059             This module is not affiliated with, endorsed by, or sponsored by
5060             Microsoft Corporation.
5061              
5062             =head2 References
5063              
5064             This module was inspired by:
5065              
5066             =over 4
5067              
5068             =item * Microsoft LINQ (Language Integrated Query)
5069              
5070             L
5071              
5072             =item * LTSV specification
5073              
5074             L
5075              
5076             =back
5077              
5078             =head1 COPYRIGHT AND LICENSE
5079              
5080             Copyright (c) 2026 INABA Hitoshi
5081              
5082             This library is free software; you can redistribute it and/or modify
5083             it under the same terms as Perl itself.
5084              
5085             =head2 License Details
5086              
5087             This module is released under the same license as Perl itself:
5088              
5089             =over 4
5090              
5091             =item * Artistic License 1.0
5092              
5093             L
5094              
5095             =item * GNU General Public License version 1 or later
5096              
5097             L
5098              
5099             =back
5100              
5101             You may choose either license.
5102              
5103             =head1 DISCLAIMER OF WARRANTY
5104              
5105             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
5106             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT
5107             WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER
5108             PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND,
5109             EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
5110             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
5111             THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS
5112             WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF
5113             ALL NECESSARY SERVICING, REPAIR, OR CORRECTION.
5114              
5115             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
5116             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
5117             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE LIABLE
5118             TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR
5119             CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
5120             SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
5121             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
5122             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
5123             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
5124             SUCH DAMAGES.
5125              
5126             =cut