File Coverage

lib/LTSV/LINQ.pm
Criterion Covered Total %
statement 607 625 97.1
branch 220 264 83.3
condition 46 72 63.8
subroutine 122 123 99.1
pod 56 56 100.0
total 1051 1140 92.1


line stmt bran cond sub pod time code
1             package LTSV::LINQ;
2             ######################################################################
3             #
4             # LTSV::LINQ - LINQ-style query interface for LTSV files
5             #
6             # https://metacpan.org/dist/LTSV-LINQ
7             #
8             # Copyright (c) 2026 INABA Hitoshi
9             ######################################################################
10              
11 12     12   93050 use 5.00503; # Universal Consensus 1998 for primetools
  12         73  
12             # Perl 5.005_03 compatibility for historical toolchains
13             # use 5.008001; # Lancaster Consensus 2013 for toolchains
14              
15             $VERSION = '1.05';
16             $VERSION = $VERSION;
17             # VERSION policy: avoid `our` for 5.005_03 compatibility.
18             # Self-assignment prevents "used only once" warning under `use strict`.
19              
20 12 50   12   425 BEGIN { pop @INC if $INC[-1] eq '.' } # CVE-2016-1238: Important unsafe module load path flaw
21 12     12   72 use strict;
  12         86  
  12         888  
22 12 50   12   364 BEGIN { if ($] < 5.006) { $INC{'warnings.pm'} = 'stub'; eval 'package warnings; sub import {}' } } use warnings; local $^W=1;
  0     12   0  
  0         0  
  12         68  
  12         20  
  12         116553  
23             # warnings.pm compatibility: stub with import() for Perl < 5.6
24              
25             #---------------------------------------------------------------------
26             # Constructor and Iterator Infrastructure
27             #---------------------------------------------------------------------
28              
29             sub new {
30 487     487 1 715 my($class, $iterator) = @_;
31 487         6609 return bless { iterator => $iterator }, $class;
32             }
33              
34             sub iterator {
35 625     625 1 732 my $self = $_[0];
36             # If this object was created by _from_snapshot, _factory provides
37             # a fresh iterator closure each time iterator() is called.
38 625 100       1094 if (exists $self->{_factory}) {
39 141         197 return $self->{_factory}->();
40             }
41 484         660 return $self->{iterator};
42             }
43              
44             #---------------------------------------------------------------------
45             # Data Source Methods
46             #---------------------------------------------------------------------
47              
48             # From - create query from array
49             sub From {
50 316     316 1 1606012 my($class, $source) = @_;
51              
52 316 50       680 if (ref($source) eq 'ARRAY') {
53 316         350 my $i = 0;
54             return $class->new(sub {
55 1191 100   1191   2073 return undef if $i >= scalar(@$source);
56 903         1382 return $source->[$i++];
57 316         1312 });
58             }
59              
60 0         0 die "From() requires ARRAY reference";
61             }
62              
63             # FromLTSV - read from LTSV file
64             sub FromLTSV {
65 6     6 1 122933 my($class, $file) = @_;
66              
67 6         8 my $fh;
68 6 50       14 if ($] >= 5.006) {
69             # Avoid "Too many arguments for open at" error when running with Perl 5.005_03
70 6 50       393 eval q{ open($fh, '<', $file) } or die "Cannot open '$file': $!";
71             }
72             else {
73 0         0 $fh = \do { local *_ };
  0         0  
74 0 0       0 open($fh, "< $file") or die "Cannot open '$file': $!";
75             }
76 6         26 binmode $fh; # Treat as raw bytes; handles all multibyte encodings
77             # and prevents \r\n -> \n translation on Windows
78              
79             return $class->new(sub {
80 20     20   161 while (my $line = <$fh>) {
81 15         17 chomp $line;
82 15         33 $line =~ s/\r\z//; # Remove CR for CRLF files on any platform
83 15 100       20 next unless length $line;
84              
85             my %record = map {
86 14 50       52 /\A(.+?):(.*)\z/ ? ($1, $2) : ()
  48         156  
87             } split /\t/, $line;
88              
89 14 50       59 return \%record if %record;
90             }
91 6         67 close $fh;
92 6         16 return undef;
93 6         38 });
94             }
95              
96             # Range - generate sequence of integers
97             sub Range {
98 4     4 1 181691 my($class, $start, $count) = @_;
99              
100 4         8 my $current = $start;
101 4         6 my $remaining = $count;
102              
103             return $class->new(sub {
104 37 100   37   76 return undef if $remaining <= 0;
105 33         28 $remaining--;
106 33         33 return $current++;
107 4         26 });
108             }
109              
110             # Empty - return empty sequence
111             sub Empty {
112 3     3 1 175658 my($class) = @_;
113              
114             return $class->new(sub {
115 4     4   13 return undef;
116 3         21 });
117             }
118              
119             # Repeat - repeat element specified number of times
120             sub Repeat {
121 4     4 1 152 my($class, $element, $count) = @_;
122              
123 4         7 my $remaining = $count;
124              
125             return $class->new(sub {
126 22 100   22   44 return undef if $remaining <= 0;
127 18         22 $remaining--;
128 18         30 return $element;
129 4         22 });
130             }
131              
132             #---------------------------------------------------------------------
133             # Filtering Methods
134             #---------------------------------------------------------------------
135              
136             # Where - filter elements
137             sub Where {
138 26     26 1 82 my($self, @args) = @_;
139 26         55 my $iter = $self->iterator;
140 26         45 my $class = ref($self);
141              
142             # Support both code reference and DSL form
143 26         34 my $cond;
144 26 100 66     128 if (@args == 1 && ref($args[0]) eq 'CODE') {
145 17         27 $cond = $args[0];
146             }
147             else {
148             # DSL form: Where(key => value, ...)
149 9         21 my %match = @args;
150             $cond = sub {
151 34     34   40 my $row = shift;
152 34         57 for my $k (keys %match) {
153 36 50       64 return 0 unless defined $row->{$k};
154 36 100       99 return 0 unless $row->{$k} eq $match{$k};
155             }
156 16         49 return 1;
157 9         28 };
158             }
159              
160             return $class->new(sub {
161 88     88   119 while (1) {
162 141         233 my $item = $iter->();
163 141 100       242 return undef unless defined $item;
164 116 100       162 return $item if $cond->($item);
165             }
166 26         162 });
167             }
168              
169             #---------------------------------------------------------------------
170             # Projection Methods
171             #---------------------------------------------------------------------
172              
173             # Select - transform elements
174             sub Select {
175 29     29 1 83 my($self, $selector) = @_;
176 29         82 my $iter = $self->iterator;
177 29         52 my $class = ref($self);
178              
179             return $class->new(sub {
180 151     151   167 my $item = $iter->();
181 151 100       279 return undef unless defined $item;
182 123         171 return $selector->($item);
183 29         134 });
184             }
185              
186             # SelectMany - flatten sequences
187             sub SelectMany {
188 5     5 1 4 my($self, $selector) = @_;
189 5         5 my $iter = $self->iterator;
190 5         4 my $class = ref($self);
191              
192 5         4 my @buffer;
193              
194             return $class->new(sub {
195 12     12   8 while (1) {
196 21 100       23 if (@buffer) {
197 7         10 return shift @buffer;
198             }
199              
200 14         15 my $item = $iter->();
201 14 100       19 return undef unless defined $item;
202              
203 11         14 my $result = $selector->($item);
204 11 100       24 unless (ref($result) eq 'ARRAY') {
205 2         15 die "SelectMany: selector must return an ARRAY reference";
206             }
207 9         12 @buffer = @$result;
208             }
209 5         11 });
210             }
211              
212             # Concat - concatenate two sequences
213             sub Concat {
214 10     10 1 33 my($self, $second) = @_;
215 10         17 my $class = ref($self);
216              
217 10         19 my $first_iter = $self->iterator;
218 10         14 my $second_iter;
219 10         16 my $first_done = 0;
220              
221             return $class->new(sub {
222 44 100   44   82 if (!$first_done) {
223 28         42 my $item = $first_iter->();
224 28 100       68 if (defined $item) {
225 18         39 return $item;
226             }
227 10         11 $first_done = 1;
228 10         31 $second_iter = $second->iterator;
229             }
230              
231 26 50       56 return $second_iter ? $second_iter->() : undef;
232 10         43 });
233             }
234              
235             # Zip - combine two sequences element-wise
236             sub Zip {
237 5     5 1 12 my($self, $second, $result_selector) = @_;
238              
239 5         16 my $iter1 = $self->iterator;
240 5         10 my $iter2 = $second->iterator;
241 5         9 my $class = ref($self);
242              
243             return $class->new(sub {
244 17     17   26 my $item1 = $iter1->();
245 17         30 my $item2 = $iter2->();
246              
247             # Return undef if either sequence ends
248 17 100 100     63 return undef unless defined($item1) && defined($item2);
249              
250 12         29 return $result_selector->($item1, $item2);
251 5         22 });
252             }
253              
254             #---------------------------------------------------------------------
255             # Partitioning Methods
256             #---------------------------------------------------------------------
257              
258             # Take - take first N elements
259             sub Take {
260 7     7 1 14 my($self, $count) = @_;
261 7         14 my $iter = $self->iterator;
262 7         12 my $class = ref($self);
263 7         14 my $taken = 0;
264              
265             return $class->new(sub {
266 26 100   26   46 return undef if $taken >= $count;
267 19         26 my $item = $iter->();
268 19 50       39 return undef unless defined $item;
269 19         18 $taken++;
270 19         29 return $item;
271 7         41 });
272             }
273              
274             # Skip - skip first N elements
275             sub Skip {
276 3     3 1 13 my($self, $count) = @_;
277 3         7 my $iter = $self->iterator;
278 3         6 my $class = ref($self);
279 3         4 my $skipped = 0;
280              
281             return $class->new(sub {
282 9     9   19 while ($skipped < $count) {
283 6         7 my $item = $iter->();
284 6 50       9 return undef unless defined $item;
285 6         25 $skipped++;
286             }
287 9         13 return $iter->();
288 3         12 });
289             }
290              
291             # TakeWhile - take while condition is true
292             sub TakeWhile {
293 2     2 1 4 my($self, $predicate) = @_;
294 2         7 my $iter = $self->iterator;
295 2         5 my $class = ref($self);
296 2         4 my $done = 0;
297              
298             return $class->new(sub {
299 7 50   7   16 return undef if $done;
300 7         33 my $item = $iter->();
301 7 50       21 return undef unless defined $item;
302              
303 7 100       16 if ($predicate->($item)) {
304 5         24 return $item;
305             }
306             else {
307 2         7 $done = 1;
308 2         6 return undef;
309             }
310 2         10 });
311             }
312              
313             # SkipWhile - skip elements while predicate is true
314             sub SkipWhile {
315 4     4 1 9 my($self, $predicate) = @_;
316 4         7 my $iter = $self->iterator;
317 4         7 my $class = ref($self);
318 4         4 my $skipping = 1;
319              
320             return $class->new(sub {
321 12     12   15 while (1) {
322 19         47 my $item = $iter->();
323 19 100       37 return undef unless defined $item;
324              
325 16 100       25 if ($skipping) {
326 10 100       18 if (!$predicate->($item)) {
327 3         11 $skipping = 0;
328 3         7 return $item;
329             }
330             }
331             else {
332 6         13 return $item;
333             }
334             }
335 4         27 });
336             }
337              
338             #---------------------------------------------------------------------
339             # Ordering Methods
340             #---------------------------------------------------------------------
341              
342             # OrderBy - sort ascending (smart: numeric when both keys look numeric)
343             sub OrderBy {
344 21     21 1 30 my($self, $key_selector) = @_;
345 21         50 my @items = $self->ToArray();
346 21         122 return LTSV::LINQ::Ordered->_new_ordered(
347             \@items,
348             [{ sel => $key_selector, dir => 1, type => 'smart' }]
349             );
350             }
351              
352             # OrderByDescending - sort descending (smart comparison)
353             sub OrderByDescending {
354 4     4 1 9 my($self, $key_selector) = @_;
355 4         17 my @items = $self->ToArray();
356 4         29 return LTSV::LINQ::Ordered->_new_ordered(
357             \@items,
358             [{ sel => $key_selector, dir => -1, type => 'smart' }]
359             );
360             }
361              
362             # OrderByStr - sort ascending by string comparison
363             sub OrderByStr {
364 21     21 1 31 my($self, $key_selector) = @_;
365 21         1162 my @items = $self->ToArray();
366 21         77 return LTSV::LINQ::Ordered->_new_ordered(
367             \@items,
368             [{ sel => $key_selector, dir => 1, type => 'str' }]
369             );
370             }
371              
372             # OrderByStrDescending - sort descending by string comparison
373             sub OrderByStrDescending {
374 5     5 1 6 my($self, $key_selector) = @_;
375 5         9 my @items = $self->ToArray();
376 5         29 return LTSV::LINQ::Ordered->_new_ordered(
377             \@items,
378             [{ sel => $key_selector, dir => -1, type => 'str' }]
379             );
380             }
381              
382             # OrderByNum - sort ascending by numeric comparison
383             sub OrderByNum {
384 14     14 1 15 my($self, $key_selector) = @_;
385 14         19 my @items = $self->ToArray();
386 14         35 return LTSV::LINQ::Ordered->_new_ordered(
387             \@items,
388             [{ sel => $key_selector, dir => 1, type => 'num' }]
389             );
390             }
391              
392             # OrderByNumDescending - sort descending by numeric comparison
393             sub OrderByNumDescending {
394 5     5 1 6 my($self, $key_selector) = @_;
395 5         10 my @items = $self->ToArray();
396 5         12 return LTSV::LINQ::Ordered->_new_ordered(
397             \@items,
398             [{ sel => $key_selector, dir => -1, type => 'num' }]
399             );
400             }
401              
402             # Reverse - reverse order
403             sub Reverse {
404 1     1 1 3 my($self) = @_;
405 1         4 my @items = reverse $self->ToArray();
406 1         3 my $class = ref($self);
407 1         4 return $class->From(\@items);
408             }
409              
410             #---------------------------------------------------------------------
411             # Grouping Methods
412             #---------------------------------------------------------------------
413              
414             # GroupBy - group elements by key
415             sub GroupBy {
416 5     5 1 14 my($self, $key_selector, $element_selector) = @_;
417 5   33 21   31 $element_selector ||= sub { $_[0] };
  21         58  
418              
419 5         10 my %groups;
420             my @key_order;
421              
422             $self->ForEach(sub {
423 21     21   27 my $item = shift;
424 21         53 my $key = $key_selector->($item);
425 21 50       85 $key = '' unless defined $key;
426 21 100       43 unless (exists $groups{$key}) {
427 13         19 push @key_order, $key;
428             }
429 21         53 push @{$groups{$key}}, $element_selector->($item);
  21         91  
430 5         30 });
431              
432 5         27 my @result;
433 5         11 for my $key (@key_order) {
434             push @result, {
435             Key => $key,
436 13         43 Elements => $groups{$key},
437             };
438             }
439              
440 5         11 my $class = ref($self);
441 5         14 return $class->From(\@result);
442             }
443              
444             #---------------------------------------------------------------------
445             # Set Operations
446             #---------------------------------------------------------------------
447              
448             # Distinct - remove duplicates
449             sub Distinct {
450 12     12 1 26 my($self, $key_selector) = @_;
451 12         22 my $iter = $self->iterator;
452 12         20 my $class = ref($self);
453 12         18 my %seen;
454              
455             return $class->new(sub {
456 47     47   56 while (1) {
457 62         98 my $item = $iter->();
458 62 100       129 return undef unless defined $item;
459              
460 50 100       128 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
461 50 50       107 $key = '' unless defined $key;
462              
463 50 100       141 unless ($seen{$key}++) {
464 35         84 return $item;
465             }
466             }
467 12         75 });
468             }
469              
470             # Internal helper for set operations - make key from item
471             sub _make_key {
472 99     99   149 my($item) = @_;
473              
474 99 50       169 return '' unless defined $item;
475              
476 99 100       242 if (ref($item) eq 'HASH') {
    100          
477             # Hash to stable key
478 9         15 my @pairs = ();
479 9         30 for my $k (sort keys %$item) {
480 14 50       35 my $v = defined($item->{$k}) ? $item->{$k} : '';
481 14         38 push @pairs, "$k\x1F$v"; # \x1F = Unit Separator
482             }
483 9         34 return join("\x1E", @pairs); # \x1E = Record Separator
484             }
485             elsif (ref($item) eq 'ARRAY') {
486             # Array to key
487 3 50       7 return join("\x1E", map { defined($_) ? $_ : '' } @$item);
  6         19  
488             }
489             else {
490             # Scalar
491 87         139 return $item;
492             }
493             }
494              
495             # _from_snapshot - internal helper for GroupJoin.
496             # Returns a LTSV::LINQ object backed by a plain array that can be iterated
497             # multiple times within a single result_selector call.
498             # Each LINQ terminal method (Count, Sum, ToArray, etc.) calls iterator()
499             # to get a fresh iterator. We achieve re-iterability by overriding the
500             # iterator() method so it always creates a new closure over the same array.
501             sub _from_snapshot {
502 68     68   61 my($class_or_self, $aref) = @_;
503              
504 68   33     102 my $class = ref($class_or_self) || $class_or_self;
505              
506             # Build a sentinel sub that, when called, returns a brand-new
507             # index-based iterator every time.
508             my $iter_factory = sub {
509 139     139   83 my $i = 0;
510             return sub {
511 133 100       179 return undef if $i >= scalar(@$aref);
512 71         84 return $aref->[$i++];
513 139         252 };
514 68         102 };
515              
516             # The object stores the factory in place of a plain iterator.
517             # The iterator() accessor returns the result of calling the factory,
518             # so every consumer gets its own fresh iterator starting at index 0.
519 68         61 my $obj = bless {
520             iterator => $iter_factory->(),
521             _factory => $iter_factory,
522             }, $class;
523              
524 68         72 return $obj;
525             }
526              
527             # Union - set union with distinct
528             sub Union {
529 5     5 1 11 my($self, $second, $key_selector) = @_;
530              
531 5         17 return $self->Concat($second)->Distinct($key_selector);
532             }
533              
534             # Intersect - set intersection
535             sub Intersect {
536 5     5 1 14 my($self, $second, $key_selector) = @_;
537              
538             # Build hash of second sequence
539 5         10 my %second_set = ();
540             $second->ForEach(sub {
541 13     13   21 my $item = shift;
542 13 50       27 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
543 13         39 $second_set{$key} = $item;
544 5         24 });
545              
546 5         24 my $class = ref($self);
547 5         12 my $iter = $self->iterator;
548 5         11 my %seen = ();
549              
550             return $class->new(sub {
551 11     11   21 while (defined(my $item = $iter->())) {
552 14 50       31 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
553              
554 14 100       47 next if $seen{$key}++; # Skip duplicates
555 12 100       38 return $item if exists $second_set{$key};
556             }
557 5         10 return undef;
558 5         26 });
559             }
560              
561             # Except - set difference
562             sub Except {
563 5     5 1 11 my($self, $second, $key_selector) = @_;
564              
565             # Build hash of second sequence
566 5         9 my %second_set = ();
567             $second->ForEach(sub {
568 11     11   17 my $item = shift;
569 11 50       27 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
570 11         30 $second_set{$key} = 1;
571 5         41 });
572              
573 5         22 my $class = ref($self);
574 5         11 my $iter = $self->iterator;
575 5         9 my %seen = ();
576              
577             return $class->new(sub {
578 13     13   22 while (defined(my $item = $iter->())) {
579 17 50       34 my $key = $key_selector ? $key_selector->($item) : _make_key($item);
580              
581 17 100       51 next if $seen{$key}++; # Skip duplicates
582 15 100       43 return $item unless exists $second_set{$key};
583             }
584 5         10 return undef;
585 5         26 });
586             }
587              
588             # Join - correlates elements of two sequences
589             sub Join {
590 6     6 1 16 my($self, $inner, $outer_key_selector, $inner_key_selector, $result_selector) = @_;
591              
592             # Build hash table from inner sequence
593 6         11 my %inner_hash = ();
594             $inner->ForEach(sub {
595 12     12   17 my $item = shift;
596 12         49 my $key = $inner_key_selector->($item);
597 12 50       44 $key = _make_key($key) if ref($key);
598 12         14 push @{$inner_hash{$key}}, $item;
  12         42  
599 6         32 });
600              
601             # Process outer sequence with lazy evaluation
602 6         29 my $class = ref($self);
603 6         14 my $iter = $self->iterator;
604 6         9 my @buffer = ();
605              
606             return $class->new(sub {
607 15     15   18 while (1) {
608             # Return from buffer if available
609 24 100       75 return shift @buffer if @buffer;
610              
611             # Get next outer element
612 15         21 my $outer_item = $iter->();
613 15 100       33 return undef unless defined $outer_item;
614              
615             # Find matching inner elements
616 9         32 my $key = $outer_key_selector->($outer_item);
617 9 50       30 $key = _make_key($key) if ref($key);
618              
619 9 100       24 if (exists $inner_hash{$key}) {
620 7         7 for my $inner_item (@{$inner_hash{$key}}) {
  7         16  
621 9         23 push @buffer, $result_selector->($outer_item, $inner_item);
622             }
623             }
624             # If no match, continue to next outer element
625             }
626 6         32 });
627             }
628              
629             # GroupJoin - group join (LEFT OUTER JOIN-like operation)
630             sub GroupJoin {
631 29     29 1 43 my($self, $inner, $outer_key_selector, $inner_key_selector, $result_selector) = @_;
632 29         30 my $class = ref($self);
633 29         31 my $outer_iter = $self->iterator;
634              
635             # 1. Build lookup table from inner sequence.
636             # Group all inner items by their keys for efficient lookup.
637             # The inner sequence is fully materialized into memory here.
638 29         25 my %inner_lookup = ();
639             $inner->ForEach(sub {
640 78     78   48 my $item = shift;
641 78         78 my $key = $inner_key_selector->($item);
642 78 50       149 $key = _make_key($key) if ref($key);
643 78 100       75 $key = '' unless defined $key;
644 78         45 push @{$inner_lookup{$key}}, $item;
  78         113  
645 29         80 });
646              
647             # 2. Return lazy iterator over outer sequence
648             return $class->new(sub {
649 96     96   86 my $outer_item = $outer_iter->();
650 96 100       114 return undef unless defined $outer_item;
651              
652             # Get key from outer item
653 68         69 my $key = $outer_key_selector->($outer_item);
654 68 50       136 $key = _make_key($key) if ref($key);
655 68 100       75 $key = '' unless defined $key;
656              
657             # Get matching inner items (empty array ref if no matches)
658 68 100       81 my $matched_inners = exists $inner_lookup{$key} ? $inner_lookup{$key} : [];
659              
660             # Snapshot the matched items into a plain array.
661             # We create a LTSV::LINQ object whose iterator sub always reads
662             # from a fresh index variable, so the group can be traversed
663             # multiple times inside result_selector (e.g. Count() then Sum()).
664 68         65 my @snapshot = @$matched_inners;
665 68         74 my $inner_group = $class->_from_snapshot(\@snapshot);
666              
667 68         70 return $result_selector->($outer_item, $inner_group);
668 29         96 });
669             }
670              
671             #---------------------------------------------------------------------
672             # Quantifier Methods
673             #---------------------------------------------------------------------
674              
675             # All - test if all elements satisfy condition
676             sub All {
677 4     4 1 7 my($self, $predicate) = @_;
678 4         6 my $iter = $self->iterator;
679              
680 4         5 while (defined(my $item = $iter->())) {
681 8 100       8 return 0 unless $predicate->($item);
682             }
683 3         5 return 1;
684             }
685              
686             # Any - test if any element satisfies condition
687             sub Any {
688 19     19 1 42 my($self, $predicate) = @_;
689 19         26 my $iter = $self->iterator;
690              
691 19 100       28 if ($predicate) {
692 6         13 while (defined(my $item = $iter->())) {
693 9 100       36 return 1 if $predicate->($item);
694             }
695 2         13 return 0;
696             }
697             else {
698 13         13 my $item = $iter->();
699 13 100       36 return defined($item) ? 1 : 0;
700             }
701             }
702              
703             # Contains - check if sequence contains element
704             sub Contains {
705 5     5 1 12 my($self, $value, $comparer) = @_;
706              
707 5 100       13 if ($comparer) {
708 1     1   5 return $self->Any(sub { $comparer->($_[0], $value) });
  1         4  
709             }
710             else {
711             return $self->Any(sub {
712 6     6   10 my $item = $_[0];
713 6   33     61 return (!defined($item) && !defined($value)) ||
714             (defined($item) && defined($value) && $item eq $value);
715 4         16 });
716             }
717             }
718              
719             # SequenceEqual - compare two sequences for equality
720             sub SequenceEqual {
721 5     5 1 10 my($self, $second, $comparer) = @_;
722             $comparer ||= sub {
723 8     8   14 my($a, $b) = @_;
724 8   33     67 return (!defined($a) && !defined($b)) ||
725             (defined($a) && defined($b) && $a eq $b);
726 5   66     66 };
727              
728 5         15 my $iter1 = $self->iterator;
729 5         10 my $iter2 = $second->iterator;
730              
731 5         7 while (1) {
732 14         32 my $item1 = $iter1->();
733 14         23 my $item2 = $iter2->();
734              
735             # Both ended - equal
736 14 100 100     48 return 1 if !defined($item1) && !defined($item2);
737              
738             # One ended - not equal
739 11 100 66     45 return 0 if !defined($item1) || !defined($item2);
740              
741             # Compare items
742 10 100       18 return 0 unless $comparer->($item1, $item2);
743             }
744             }
745              
746             #---------------------------------------------------------------------
747             # Element Access Methods
748             #---------------------------------------------------------------------
749              
750             # First - get first element
751             sub First {
752 8     8 1 31 my($self, $predicate) = @_;
753 8         15 my $iter = $self->iterator;
754              
755 8 100       21 if ($predicate) {
756 3         4 while (defined(my $item = $iter->())) {
757 9 100       12 return $item if $predicate->($item);
758             }
759 1         11 die "No element satisfies the condition";
760             }
761             else {
762 5         12 my $item = $iter->();
763 5 100       23 return $item if defined $item;
764 1         8 die "Sequence contains no elements";
765             }
766             }
767              
768             # FirstOrDefault - get first element or default
769             sub FirstOrDefault {
770 4     4 1 9 my $self = shift;
771 4         7 my($predicate, $default);
772              
773 4 100       19 if (@_ >= 2) {
    50          
774             # Two arguments: ($predicate, $default)
775 2         3 ($predicate, $default) = @_;
776             }
777             elsif (@_ == 1) {
778             # One argument: distinguish CODE (predicate) vs non-CODE (default)
779 2 50       5 if (ref($_[0]) eq 'CODE') {
780 0         0 $predicate = $_[0];
781             }
782             else {
783 2         3 $default = $_[0];
784             }
785             }
786              
787 4         7 my $result = eval { $self->First($predicate) };
  4         8  
788 4 100       16 return $@ ? $default : $result;
789             }
790              
791             # Last - get last element
792             sub Last {
793 3     3 1 9 my($self, $predicate) = @_;
794 3         7 my @items = $self->ToArray();
795              
796 3 50       10 if ($predicate) {
797 0         0 for (my $i = $#items; $i >= 0; $i--) {
798 0 0       0 return $items[$i] if $predicate->($items[$i]);
799             }
800 0         0 die "No element satisfies the condition";
801             }
802             else {
803 3 50       12 die "Sequence contains no elements" unless @items;
804 3         10 return $items[-1];
805             }
806             }
807              
808             # LastOrDefault - return last element or default
809             sub LastOrDefault {
810 9     9 1 41 my $self = shift;
811 9         14 my($predicate, $default);
812              
813 9 100       31 if (@_ >= 2) {
    100          
814             # Two arguments: ($predicate, $default)
815 2         6 ($predicate, $default) = @_;
816             }
817             elsif (@_ == 1) {
818             # One argument: distinguish CODE (predicate) vs non-CODE (default)
819 3 100       8 if (ref($_[0]) eq 'CODE') {
820 2         3 $predicate = $_[0];
821             }
822             else {
823 1         2 $default = $_[0];
824             }
825             }
826              
827 9         22 my @items = $self->ToArray();
828              
829 9 100       15 if ($predicate) {
830 3         12 for (my $i = $#items; $i >= 0; $i--) {
831 7 100       49 return $items[$i] if $predicate->($items[$i]);
832             }
833 2         12 return $default;
834             }
835             else {
836 6 100       24 return @items ? $items[-1] : $default;
837             }
838             }
839              
840             # Single - return the only element
841             sub Single {
842 5     5 1 10 my($self, $predicate) = @_;
843 5         11 my $iter = $self->iterator;
844 5         8 my $found;
845 5         8 my $count = 0;
846              
847 5         10 while (defined(my $item = $iter->())) {
848 8 100 100     24 next if $predicate && !$predicate->($item);
849              
850 6         18 $count++;
851 6 100       13 if ($count > 1) {
852 2         19 die "Sequence contains more than one element";
853             }
854 4         8 $found = $item;
855             }
856              
857 3 100       18 die "Sequence contains no elements" if $count == 0;
858 2         4 return $found;
859             }
860              
861             # SingleOrDefault - return the only element or undef
862             sub SingleOrDefault {
863 4     4 1 9 my($self, $predicate) = @_;
864 4         9 my $iter = $self->iterator;
865 4         6 my $found;
866 4         7 my $count = 0;
867              
868 4         7 while (defined(my $item = $iter->())) {
869 6 100 100     18 next if $predicate && !$predicate->($item);
870              
871 4         8 $count++;
872 4 100       9 if ($count > 1) {
873 1         3 return undef; # More than one element
874             }
875 3         6 $found = $item;
876             }
877              
878 3 100       37 return $count == 1 ? $found : undef;
879             }
880              
881             # ElementAt - return element at specified index
882             sub ElementAt {
883 4     4 1 8 my($self, $index) = @_;
884 4 100       43 die "Index must be non-negative" if $index < 0;
885              
886 3         6 my $iter = $self->iterator;
887 3         4 my $current = 0;
888              
889 3         7 while (defined(my $item = $iter->())) {
890 5 100       14 return $item if $current == $index;
891 3         24 $current++;
892             }
893              
894 1         10 die "Index out of range";
895             }
896              
897             # ElementAtOrDefault - return element at index or undef
898             sub ElementAtOrDefault {
899 3     3 1 5 my($self, $index) = @_;
900 3 100       8 return undef if $index < 0;
901              
902 2         6 my $iter = $self->iterator;
903 2         15 my $current = 0;
904              
905 2         7 while (defined(my $item = $iter->())) {
906 4 100       11 return $item if $current == $index;
907 3         4 $current++;
908             }
909              
910 1         4 return undef;
911             }
912              
913             #---------------------------------------------------------------------
914             # Aggregation Methods
915             #---------------------------------------------------------------------
916              
917             # Count - count elements
918             sub Count {
919 45     45 1 145 my($self, $predicate) = @_;
920              
921 45 50       87 if ($predicate) {
922 0         0 return $self->Where($predicate)->Count();
923             }
924              
925 45         37 my $count = 0;
926 45         68 my $iter = $self->iterator;
927 45         50 $count++ while defined $iter->();
928 45         204 return $count;
929             }
930              
931             # Sum - calculate sum
932             sub Sum {
933 9     9 1 21 my($self, $selector) = @_;
934 9   66 10   30 $selector ||= sub { $_[0] };
  10         40  
935              
936 9         13 my $sum = 0;
937             $self->ForEach(sub {
938 19     19   1167 $sum += $selector->(shift);
939 9         1027 });
940 9         42 return $sum;
941             }
942              
943             # Min - find minimum
944             sub Min {
945 1     1 1 2 my($self, $selector) = @_;
946 1   33 5   5 $selector ||= sub { $_[0] };
  5         5  
947              
948 1         2 my $min;
949             $self->ForEach(sub {
950 5     5   5 my $val = $selector->(shift);
951 5 100 100     13 $min = $val if !defined($min) || $val < $min;
952 1         3 });
953 1         3 return $min;
954             }
955              
956             # Max - find maximum
957             sub Max {
958 1     1 1 2 my($self, $selector) = @_;
959 1   33 5   7 $selector ||= sub { $_[0] };
  5         4  
960              
961 1         1 my $max;
962             $self->ForEach(sub {
963 5     5   6 my $val = $selector->(shift);
964 5 100 100     10 $max = $val if !defined($max) || $val > $max;
965 1         4 });
966 1         4 return $max;
967             }
968              
969             # Average - calculate average
970             sub Average {
971 1     1 1 2 my($self, $selector) = @_;
972 1   33 3   4 $selector ||= sub { $_[0] };
  3         1  
973              
974 1         1 my $sum = 0;
975 1         1 my $count = 0;
976             $self->ForEach(sub {
977 3     3   4 $sum += $selector->(shift);
978 3         9 $count++;
979 1         4 });
980              
981 1 50       4 die "Sequence contains no elements" if $count == 0;
982 1         2 return $sum / $count;
983             }
984              
985             # AverageOrDefault - calculate average or return undef if empty
986             sub AverageOrDefault {
987 2     2 1 12 my($self, $selector) = @_;
988 2   33 3   9 $selector ||= sub { $_[0] };
  3         5  
989              
990 2         3 my $sum = 0;
991 2         3 my $count = 0;
992             $self->ForEach(sub {
993 3     3   4 $sum += $selector->(shift);
994 3         4 $count++;
995 2         7 });
996              
997 2 100       8 return undef if $count == 0;
998 1         5 return $sum / $count;
999             }
1000              
1001             # Aggregate - apply accumulator function over sequence
1002             sub Aggregate {
1003 7     7 1 22 my($self, @args) = @_;
1004              
1005 7         10 my($seed, $func, $result_selector);
1006              
1007 7 100       47 if (@args == 1) {
    100          
    50          
1008             # Aggregate($func) - use first element as seed
1009 2         4 $func = $args[0];
1010 2         5 my $iter = $self->iterator;
1011 2         5 $seed = $iter->();
1012 2 100       18 die "Sequence contains no elements" unless defined $seed;
1013              
1014             # Continue with rest of elements
1015 1         5 while (defined(my $item = $iter->())) {
1016 3         8 $seed = $func->($seed, $item);
1017             }
1018             }
1019             elsif (@args == 2) {
1020             # Aggregate($seed, $func)
1021 4         16 ($seed, $func) = @args;
1022             $self->ForEach(sub {
1023 11     11   24 $seed = $func->($seed, shift);
1024 4         58 });
1025             }
1026             elsif (@args == 3) {
1027             # Aggregate($seed, $func, $result_selector)
1028 1         2 ($seed, $func, $result_selector) = @args;
1029             $self->ForEach(sub {
1030 3     3   6 $seed = $func->($seed, shift);
1031 1         25 });
1032             }
1033             else {
1034 0         0 die "Invalid number of arguments for Aggregate";
1035             }
1036              
1037 6 100       32 return $result_selector ? $result_selector->($seed) : $seed;
1038             }
1039              
1040             #---------------------------------------------------------------------
1041             # Conversion Methods
1042             #---------------------------------------------------------------------
1043              
1044             # ToArray - convert to array
1045             sub ToArray {
1046 264     264 1 451 my($self) = @_;
1047 264         248 my @result;
1048 264         365 my $iter = $self->iterator;
1049              
1050 264         386 while (defined(my $item = $iter->())) {
1051 848         1762 push @result, $item;
1052             }
1053 262         804 return @result;
1054             }
1055              
1056             # ToList - convert to array reference
1057             sub ToList {
1058 0     0 1 0 my($self) = @_;
1059 0         0 return [$self->ToArray()];
1060             }
1061              
1062             # ToDictionary - convert sequence to hash reference
1063             sub ToDictionary {
1064 5     5 1 11 my($self, $key_selector, $value_selector) = @_;
1065              
1066             # Default value selector returns the element itself
1067 5   66 2   40 $value_selector ||= sub { $_[0] };
  2         5  
1068              
1069 5         12 my %dictionary = ();
1070              
1071             $self->ForEach(sub {
1072 11     11   18 my $item = shift;
1073 11         20 my $key = $key_selector->($item);
1074 11         35 my $value = $value_selector->($item);
1075              
1076             # Convert undef key to empty string
1077 11 50       35 $key = '' unless defined $key;
1078              
1079             # Later values overwrite earlier ones (Perl hash behavior)
1080 11         32 $dictionary{$key} = $value;
1081 5         24 });
1082              
1083 5         24 return \%dictionary;
1084             }
1085              
1086             # ToLookup - convert sequence to hash of arrays
1087             sub ToLookup {
1088 5     5 1 11 my($self, $key_selector, $value_selector) = @_;
1089              
1090             # Default value selector returns the element itself
1091 5   66 5   23 $value_selector ||= sub { $_[0] };
  5         8  
1092              
1093 5         8 my %lookup = ();
1094              
1095             $self->ForEach(sub {
1096 9     9   12 my $item = shift;
1097 9         18 my $key = $key_selector->($item);
1098 9         29 my $value = $value_selector->($item);
1099              
1100             # Convert undef key to empty string
1101 9 50       26 $key = '' unless defined $key;
1102              
1103 9         12 push @{$lookup{$key}}, $value;
  9         31  
1104 5         23 });
1105              
1106 5         29 return \%lookup;
1107             }
1108              
1109             # DefaultIfEmpty - return default value if empty
1110             sub DefaultIfEmpty {
1111 6     6 1 11 my($self, $default_value) = @_;
1112             # default_value defaults to undef
1113 6         14 my $has_default_arg = @_ > 1;
1114 6 100       19 if (!$has_default_arg) {
1115 1         2 $default_value = undef;
1116             }
1117              
1118 6         9 my $class = ref($self);
1119 6         1093 my $iter = $self->iterator;
1120 6         9 my $has_elements = 0;
1121 6         9 my $returned_default = 0;
1122              
1123             return $class->new(sub {
1124 11     11   22 my $item = $iter->();
1125 11 100       24 if (defined $item) {
1126 2         11 $has_elements = 1;
1127 2         5 return $item;
1128             }
1129              
1130             # EOF reached
1131 9 100 100     38 if (!$has_elements && !$returned_default) {
1132 5         8 $returned_default = 1;
1133 5         41 return $default_value;
1134             }
1135              
1136 4         12 return undef;
1137 6         31 });
1138             }
1139              
1140             # ToLTSV - write to LTSV file
1141             sub ToLTSV {
1142 6     6 1 7 my($self, $filename) = @_;
1143              
1144 6         11 my $fh;
1145 6 50       7 if ($] >= 5.006) {
1146             # Avoid "Too many arguments for open at" error when running with Perl 5.005_03
1147 6 50       334 eval q{ open($fh, '>', $filename) } or die "Cannot open '$filename': $!";
1148             }
1149             else {
1150 0         0 $fh = \do { local *_ };
  0         0  
1151 0 0       0 open($fh, "> $filename") or die "Cannot open '$filename': $!";
1152             }
1153 6         26 binmode $fh; # Write raw bytes; prevents \r\n translation on Windows
1154             # and is consistent with FromLTSV
1155              
1156             $self->ForEach(sub {
1157 7     7   8 my $record = shift;
1158             # LTSV spec: tab is the field separator; newline terminates the record.
1159             # Sanitize values to prevent structural corruption of the output file.
1160             my $line = join("\t", map {
1161 7 100       20 my $v = defined($record->{$_}) ? $record->{$_} : '';
  10         18  
1162 10         22 $v =~ s/[\t\n\r]/ /g;
1163 10         27 "$_:$v"
1164             } sort keys %$record);
1165 7         37 print $fh $line, "\n";
1166 6         34 });
1167              
1168 6         307 close $fh;
1169 6         25 return 1;
1170             }
1171              
1172             #---------------------------------------------------------------------
1173             # Utility Methods
1174             #---------------------------------------------------------------------
1175              
1176             # ForEach - execute action for each element
1177             sub ForEach {
1178 85     85 1 156 my($self, $action) = @_;
1179 85         122 my $iter = $self->iterator;
1180              
1181 85         120 while (defined(my $item = $iter->())) {
1182 211         269 $action->($item);
1183             }
1184 85         132 return;
1185             }
1186              
1187             1;
1188              
1189             ######################################################################
1190             #
1191             # LTSV::LINQ::Ordered - Ordered query supporting ThenBy/ThenByDescending
1192             #
1193             # Returned by OrderBy* methods. Inherits all LTSV::LINQ methods via @ISA.
1194             # ThenBy* methods are only available on this class, mirroring the way
1195             # .NET LINQ's IOrderedEnumerable exposes ThenBy/ThenByDescending while
1196             # plain IEnumerable does not.
1197             #
1198             # Stability guarantee: every sort uses a Schwartzian-Transform-style
1199             # decorated array that appends the original element index as a final
1200             # tie-breaker. This makes the multi-key sort completely stable on all
1201             # Perl versions including 5.005_03, where built-in sort stability is not
1202             # guaranteed.
1203             ######################################################################
1204              
1205             package LTSV::LINQ::Ordered;
1206              
1207             # 5.005_03-compatible inheritance (no 'use parent', no 'our')
1208             @LTSV::LINQ::Ordered::ISA = ('LTSV::LINQ');
1209              
1210             # _new_ordered($items_aref, $specs_aref) - internal constructor
1211             #
1212             # $specs_aref is an arrayref of sort-spec hashrefs:
1213             # { sel => $code_ref, # key selector: ($item) -> $key
1214             # dir => 1 or -1, # 1 = ascending, -1 = descending
1215             # type => 'smart'|'str'|'num' # comparison family
1216             # }
1217             sub _new_ordered {
1218 97     97   116 my($class, $items, $specs) = @_;
1219             # Use _factory so that iterator() returns a fresh sorted iterator on
1220             # each call (enables re-iteration, e.g. in GroupJoin result selectors).
1221             # Methods like Take/Where/Select that call ref($self)->new(sub{...})
1222             # will create a plain object with an {iterator} field (no _factory),
1223             # so they are unaffected by this override.
1224             return bless {
1225             _items => $items,
1226             _specs => $specs,
1227             _factory => sub {
1228 70     70   129 my @sorted = _perform_sort($items, $specs);
1229 70         73 my $i = 0;
1230 70 100       248 return sub { $i < scalar(@sorted) ? $sorted[$i++] : undef };
  350         552  
1231             },
1232 97         647 }, $class;
1233             }
1234              
1235             # _perform_sort($items_aref, $specs_aref) - core stable multi-key sort
1236             #
1237             # Decorated-array (Schwartzian Transform) technique:
1238             # 1. Build [ orig_index, [key1, key2, ..., keyN], item ] per element
1239             # 2. Sort by key1..keyN in sequence; original index as final tie-breaker
1240             # 3. Strip decoration and return plain item list
1241             #
1242             # The original-index tie-breaker guarantees stability on every Perl version.
1243             sub _perform_sort {
1244 70     70   84 my($items, $specs) = @_;
1245              
1246             # Step 1: decorate
1247             my @decorated = map {
1248 309         230 my $idx = $_;
1249 309         255 my $item = $items->[$idx];
1250 309         235 my @keys = map { _extract_key($_->{sel}->($item), $_->{type}) } @{$specs};
  464         581  
  309         277  
1251 309         501 [$idx, \@keys, $item]
1252 70         118 } 0 .. $#{$items};
  70         134  
1253              
1254             # Step 2: sort
1255             my @sorted_dec = sort {
1256 70         206 my $r = 0;
  457         470  
1257 457         362 for my $i (0 .. $#{$specs}) {
  457         539  
1258 563         687 my $cmp = _compare_keys($a->[1][$i], $b->[1][$i], $specs->[$i]{type});
1259 563 100       701 if ($specs->[$i]{dir} < 0) { $cmp = -$cmp }
  88         71  
1260 563 100       675 if ($cmp != 0) { $r = $cmp; last }
  438         305  
  438         387  
1261             }
1262 457 100       632 $r != 0 ? $r : ($a->[0] <=> $b->[0]);
1263             } @decorated;
1264              
1265             # Step 3: undecorate
1266 70         83 return map { $_->[2] } @sorted_dec;
  309         501  
1267             }
1268              
1269             # _extract_key($raw_value, $type) - normalise one sort key
1270             #
1271             # Returns a scalar (num/str) or a two-element arrayref [flag, value]
1272             # for 'smart' type:
1273             # [0, $numeric_val] - key is numeric
1274             # [1, $string_val ] - key is string
1275             sub _extract_key {
1276 464     464   999 my($val, $type) = @_;
1277 464 100       536 $val = '' unless defined $val;
1278 464 100       590 if ($type eq 'num') {
    100          
1279             # Force numeric; undef/empty/non-numeric treated as 0
1280 113 100 66     421 return defined($val) && length($val) ? $val + 0 : 0;
1281             }
1282             elsif ($type eq 'str') {
1283 155         233 return "$val";
1284             }
1285             else {
1286             # smart: detect whether value looks like a number
1287 196         208 my $t = $val;
1288 196         491 $t =~ s/^\s+|\s+$//g;
1289 196 100       400 if ($t =~ /^[+-]?(?:\d+\.?\d*|\d*\.\d+)(?:[eE][+-]?\d+)?$/) {
1290 45         154 return [0, $t + 0];
1291             }
1292             else {
1293 151         304 return [1, "$val"];
1294             }
1295             }
1296             }
1297              
1298             # _compare_keys($ka, $kb, $type) - compare two extracted keys
1299             sub _compare_keys {
1300 563     563   588 my($ka, $kb, $type) = @_;
1301 563 100       708 if ($type eq 'num') {
    100          
1302 124         131 return $ka <=> $kb;
1303             }
1304             elsif ($type eq 'str') {
1305 178         189 return $ka cmp $kb;
1306             }
1307             else {
1308             # smart: both are [flag, value] arrayrefs
1309 261         286 my $fa = $ka->[0]; my $va = $ka->[1];
  261         235  
1310 261         209 my $fb = $kb->[0]; my $vb = $kb->[1];
  261         206  
1311 261 100 66     648 if ($fa == 0 && $fb == 0) { return $va <=> $vb } # both numeric
  60 50 33     87  
1312 201         286 elsif ($fa == 1 && $fb == 1) { return $va cmp $vb } # both string
1313 0         0 else { return $fa <=> $fb } # mixed: numeric before string
1314             }
1315             }
1316              
1317             # (No iterator() override needed: _factory in {_items,_specs,_factory} objects
1318             # is handled by LTSV::LINQ::iterator(), which calls _factory->() each time.
1319             # Objects produced by Take/Where/Select etc. via ref($self)->new(sub{...})
1320             # store their closure in {iterator} and do not have _factory, so they use
1321             # the normal non-re-entrant path.)
1322              
1323             # _thenby($key_selector, $dir, $type) - shared implementation for all ThenBy*
1324             #
1325             # Non-destructive: builds a new spec list and returns a new
1326             # LTSV::LINQ::Ordered object. The original object is unchanged, so
1327             # branching sort chains work correctly:
1328             #
1329             # my $by_dept = From(\@data)->OrderBy(sub { $_[0]{dept} });
1330             # my $by_dept_name = $by_dept->ThenBy(sub { $_[0]{name} });
1331             # my $by_dept_salary = $by_dept->ThenByNum(sub { $_[0]{salary} });
1332             # # $by_dept_name and $by_dept_salary are independent queries
1333             sub _thenby {
1334 27     27   37 my($self, $key_selector, $dir, $type) = @_;
1335 27         27 my @new_specs = (@{$self->{_specs}}, { sel => $key_selector, dir => $dir, type => $type });
  27         70  
1336 27         39 return LTSV::LINQ::Ordered->_new_ordered($self->{_items}, \@new_specs);
1337             }
1338              
1339             # ThenBy - ascending secondary key, smart comparison
1340 12     12   19 sub ThenBy { my($s,$k)=@_; $s->_thenby($k, 1, 'smart') }
  12         25  
1341              
1342             # ThenByDescending - descending secondary key, smart comparison
1343 1     1   2 sub ThenByDescending { my($s,$k)=@_; $s->_thenby($k, -1, 'smart') }
  1         4  
1344              
1345             # ThenByStr - ascending secondary key, string comparison
1346 6     6   9 sub ThenByStr { my($s,$k)=@_; $s->_thenby($k, 1, 'str') }
  6         12  
1347              
1348             # ThenByStrDescending - descending secondary key, string comparison
1349 1     1   3 sub ThenByStrDescending { my($s,$k)=@_; $s->_thenby($k, -1, 'str') }
  1         4  
1350              
1351             # ThenByNum - ascending secondary key, numeric comparison
1352 5     5   30 sub ThenByNum { my($s,$k)=@_; $s->_thenby($k, 1, 'num') }
  5         12  
1353              
1354             # ThenByNumDescending - descending secondary key, numeric comparison
1355 2     2   3 sub ThenByNumDescending { my($s,$k)=@_; $s->_thenby($k, -1, 'num') }
  2         5  
1356              
1357             1;
1358              
1359             =encoding utf8
1360              
1361             =head1 NAME
1362              
1363             LTSV::LINQ - LINQ-style query interface for LTSV files
1364              
1365             =head1 VERSION
1366              
1367             Version 1.05
1368              
1369             =head1 SYNOPSIS
1370              
1371             use LTSV::LINQ;
1372              
1373             # Read LTSV file and query
1374             my @results = LTSV::LINQ->FromLTSV("access.log")
1375             ->Where(sub { $_[0]{status} eq '200' })
1376             ->Select(sub { $_[0]{url} })
1377             ->Distinct()
1378             ->ToArray();
1379              
1380             # DSL syntax for simple filtering
1381             my @errors = LTSV::LINQ->FromLTSV("access.log")
1382             ->Where(status => '404')
1383             ->ToArray();
1384              
1385             # Grouping and aggregation
1386             my @stats = LTSV::LINQ->FromLTSV("access.log")
1387             ->GroupBy(sub { $_[0]{status} })
1388             ->Select(sub {
1389             my $g = shift;
1390             return {
1391             Status => $g->{Key},
1392             Count => scalar(@{$g->{Elements}})
1393             };
1394             })
1395             ->OrderByDescending(sub { $_[0]{Count} })
1396             ->ToArray();
1397              
1398             =head1 TABLE OF CONTENTS
1399              
1400             =over 4
1401              
1402             =item * L
1403              
1404             =item * L - Complete method reference (60 methods)
1405              
1406             =item * L - 8 practical examples
1407              
1408             =item * L - Lazy evaluation, method chaining, DSL
1409              
1410             =item * L - Iterator design, execution flow
1411              
1412             =item * L - Memory usage, optimization tips
1413              
1414             =item * L - Perl 5.005+ support, pure Perl
1415              
1416             =item * L - Error messages
1417              
1418             =item * L - Common questions and answers
1419              
1420             =item * L - Common patterns
1421              
1422             =item * L - Related resources
1423              
1424             =item * L
1425              
1426             =item * L
1427              
1428             =back
1429              
1430             =head1 DESCRIPTION
1431              
1432             LTSV::LINQ provides a LINQ-style query interface for LTSV (Labeled
1433             Tab-Separated Values) files. It offers a fluent, chainable API for
1434             filtering, transforming, and aggregating LTSV data.
1435              
1436             Key features:
1437              
1438             =over 4
1439              
1440             =item * B - O(1) memory usage for most operations
1441              
1442             =item * B - Fluent, readable query composition
1443              
1444             =item * B - Simple key-value filtering
1445              
1446             =item * B<60 LINQ methods> - Comprehensive query capabilities
1447              
1448             =item * B - No XS dependencies
1449              
1450             =item * B - Works on ancient and modern Perl
1451              
1452             =back
1453              
1454             =head2 What is LTSV?
1455              
1456             LTSV (Labeled Tab-Separated Values) is a text format for structured logs and
1457             data records. Each line consists of tab-separated fields, where each field is
1458             a C pair. A single LTSV record occupies exactly one line.
1459              
1460             B
1461              
1462             time:2026-02-13T10:00:00 host:192.0.2.1 status:200 url:/index.html bytes:1024
1463              
1464             =head3 LTSV Characteristics
1465              
1466             =over 4
1467              
1468             =item * B
1469              
1470             A complete record is always a single newline-terminated line. This makes
1471             streaming processing trivial: read a line, parse it, process it, discard it.
1472             There is no multi-line quoting problem, no block parser required.
1473              
1474             =item * B
1475              
1476             Fields are separated by a single horizontal tab character (C<0x09>).
1477             The tab is a C0 control character in the ASCII range (C<0x00>-C<0x7F>),
1478             which has an important consequence for multibyte character encodings.
1479              
1480             =item * B
1481              
1482             Within each field, the label and value are separated by a single colon
1483             (C<0x3A>, US-ASCII C<:>). This is also a plain ASCII character with the same
1484             multibyte-safety guarantees as the tab.
1485              
1486             =back
1487              
1488             =head3 LTSV Advantages
1489              
1490             =over 4
1491              
1492             =item * B
1493              
1494             This is perhaps the most important technical advantage of LTSV over formats
1495             such as CSV (comma-delimited) or TSV without labels.
1496              
1497             In many multibyte character encodings used across Asia and beyond, a
1498             single logical character is represented by a sequence of two or more bytes.
1499             The danger in older encodings is that a byte within a multibyte sequence can
1500             coincidentally equal the byte value of an ASCII delimiter, causing a naive
1501             byte-level parser to split the field in the wrong place.
1502              
1503             The following table shows well-known encodings and their byte ranges:
1504              
1505             Encoding First byte range Following byte range
1506             ---------- -------------------- -------------------------------
1507             Big5 0x81-0xFE 0x40-0x7E, 0xA1-0xFE
1508             Big5-HKSCS 0x81-0xFE 0x40-0x7E, 0xA1-0xFE
1509             CP932X 0x81-0x9F, 0xE0-0xFC 0x40-0x7E, 0x80-0xFC
1510             EUC-JP 0x8E-0x8F, 0xA1-0xFE 0xA1-0xFE
1511             GB 18030 0x81-0xFE 0x30-0x39, 0x40-0x7E, 0x80-0xFE
1512             GBK 0x81-0xFE 0x40-0x7E, 0x80-0xFE
1513             Shift_JIS 0x81-0x9F, 0xE0-0xFC 0x40-0x7E, 0x80-0xFC
1514             RFC 2279 0xC2-0xF4 0x80-0xBF
1515             UHC 0x81-0xFE 0x41-0x5A, 0x61-0x7A, 0x81-0xFE
1516             UTF-8 0xC2-0xF4 0x80-0xBF
1517             WTF-8 0xC2-0xF4 0x80-0xBF
1518              
1519             The tab character is C<0x09>. The colon is C<0x3A>. Both values are
1520             strictly below C<0x40>, the lower bound of any following byte in the encodings
1521             listed above. Neither C<0x09> nor C<0x3A> appears anywhere as a first byte
1522             either. Therefore:
1523              
1524             TAB (0x09) never appears as a byte within any multibyte character
1525             in Big5, Big5-HKSCS, CP932X, EUC-JP, GB 18030, GBK, Shift_JIS,
1526             RFC 2279, UHC, UTF-8, or WTF-8.
1527             ':' (0x3A) never appears as a byte within any multibyte character
1528             in the same set of encodings.
1529              
1530             This means that LTSV files containing values in B of those encodings
1531             can be parsed correctly by a B on tab and colon,
1532             with no knowledge of the encoding whatsoever. There is no need to decode
1533             the text before parsing, and no risk of a misidentified delimiter.
1534              
1535             By contrast, CSV has encoding problems of a different kind.
1536             The comma (C<0x2C>) and the double-quote (C<0x22>) do B appear as
1537             following bytes in Shift_JIS or Big5, so they are not directly confused with
1538             multibyte character content. However, the backslash (C<0x5C>) B
1539             appear as a valid following byte in both Shift_JIS (following byte range
1540             C<0x40>-C<0x7E> includes C<0x5C>) and Big5 (same range). Many CSV
1541             parsers and the C runtime on Windows use backslash or backslash-like
1542             sequences for escaping, so a naive byte-level search for the escape
1543             character can be misled by a multibyte character whose second byte is
1544             C<0x5C>. Beyond this, CSV's quoting rules are underspecified (RFC 4180
1545             vs. Excel vs. custom dialects differ), which makes writing a correct,
1546             encoding-aware CSV parser considerably harder than parsing LTSV.
1547             LTSV sidesteps all of these issues by choosing delimiters (tab and colon)
1548             that fall below C<0x40>, outside every following-byte range of every traditional
1549             multibyte encoding.
1550              
1551             UTF-8 is safe for all ASCII delimiters because continuation bytes are
1552             always in the range C<0x80>-C<0xBF>, never overlapping ASCII. But LTSV's
1553             choice of tab and colon also makes it safe for the traditional multibyte
1554             encodings that predate Unicode, which is critical for systems that still
1555             operate on traditional-encoded data.
1556              
1557             =item * B
1558              
1559             Every field carries its own label. A record is human-readable without a
1560             separate schema or header line. Fields can appear in any order, and
1561             optional fields can simply be omitted. Adding a new field to some records
1562             does not break parsers that do not know about it.
1563              
1564             =item * B
1565              
1566             Because each record is one line, LTSV files can be processed with line-by-line
1567             streaming. Memory usage is proportional to the longest single record, not
1568             the total file size. This is why C in this module uses a lazy
1569             iterator rather than loading the whole file.
1570              
1571             =item * B
1572              
1573             Standard Unix text tools (C, C, C, C, C) work
1574             naturally on LTSV files. A field can be located with a pattern like
1575             C without any special parser. This makes ad-hoc
1576             analysis and shell scripting straightforward.
1577              
1578             =item * B
1579              
1580             CSV requires quoting fields that contain commas or newlines, and the quoting
1581             rules differ between implementations (RFC 4180 vs. Microsoft Excel vs. others).
1582             LTSV has no quoting: the tab delimiter and the colon separator do not appear
1583             inside values in any of the supported encodings (by the multibyte-safety
1584             argument above), so no escaping mechanism is needed.
1585              
1586             =item * B
1587              
1588             LTSV originated in the Japanese web industry as a structured log format for
1589             HTTP access logs. Many web servers (Apache, Nginx) and log aggregation tools
1590             support LTSV output or parsing. The format is particularly popular for
1591             application and infrastructure logging where grep-ability and streaming
1592             analysis matter.
1593              
1594             =back
1595              
1596             For the formal LTSV specification, see L.
1597              
1598             =head2 What is LINQ?
1599              
1600             LINQ (Language Integrated Query) is a set of query capabilities introduced
1601             in the .NET Framework 3.5 (C# 3.0, 2007) by Microsoft. It defines a
1602             unified model for querying and transforming data from diverse sources --
1603             in-memory collections, relational databases (LINQ to SQL), XML documents
1604             (LINQ to XML), and more -- using a single, consistent API.
1605              
1606             This module brings LINQ-style querying to Perl, applied specifically to
1607             LTSV data sources.
1608              
1609             =head3 LINQ Characteristics
1610              
1611             =over 4
1612              
1613             =item * B
1614              
1615             LINQ provides a single set of operators that works uniformly across
1616             data sources. Whether the source is an array, a file, or a database,
1617             the same C, C
1618             LTSV::LINQ follows this principle: the same methods work on in-memory
1619             arrays (C) and LTSV files (C) alike.
1620              
1621             =item * B
1622              
1623             LINQ queries express I to retrieve, not I to retrieve it.
1624             A query like C<-EWhere(sub { $_[0]{status} >= 400 })-ESelect(...)>
1625             describes the intent clearly, without explicit loop management.
1626             This reduces cognitive overhead and makes queries easier to read and verify.
1627              
1628             =item * B
1629              
1630             Each LINQ operator takes a sequence and returns a new sequence (or a
1631             scalar result for terminal operators). Because operators are ordinary
1632             method calls that return objects, they compose naturally:
1633              
1634             $query->Where(...)->Select(...)->OrderBy(...)->GroupBy(...)->ToArray()
1635              
1636             Any intermediate result is itself a valid query object, ready for
1637             further transformation or immediate consumption.
1638              
1639             =item * B
1640              
1641             Intermediate operators (C, C
1642             execute immediately. They construct a chain of iterator closures.
1643             Evaluation is deferred until a terminal operator (C, C,
1644             C, C, C, etc.) pulls items through the chain.
1645             This means:
1646              
1647             =over 4
1648              
1649             =item - Memory usage is bounded by the window of data in flight, not by the
1650             total data size. A CSelect-ETake(10)> over a million-line
1651             file reads at most 10 records past the first matching one.
1652              
1653             =item - Short-circuiting is free. C stops at the first match.
1654             C stops as soon as one match is found.
1655              
1656             =item - Pipelines can be built without executing them, and executed
1657             multiple times by wrapping in a factory (see C<_from_snapshot>).
1658              
1659             =back
1660              
1661             =item * B
1662              
1663             LINQ's design makes chaining natural. In C# this is supported by
1664             extension methods; in Perl it is supported by returning C<$self>-class
1665             objects from every intermediate operator. The result is readable,
1666             left-to-right query expressions.
1667              
1668             =item * B
1669              
1670             A LINQ query object is a description of a computation, not its result.
1671             You can pass query objects around, inspect them, extend them, and decide
1672             later when to execute them. This separation is valuable in library and
1673             framework code.
1674              
1675             =back
1676              
1677             =head3 LINQ Advantages for LTSV Processing
1678              
1679             =over 4
1680              
1681             =item * B
1682              
1683             LTSV log analysis often involves the same logical steps: filter records
1684             by a condition, extract a field, aggregate. LINQ methods map directly
1685             onto these steps, making the code read like a description of the analysis.
1686              
1687             =item * B
1688              
1689             Web server access logs can be gigabytes in size. LTSV::LINQ's lazy
1690             C iterator reads one line at a time. Combined with C
1691             and C, only the needed records are ever in memory simultaneously.
1692              
1693             =item * B
1694              
1695             Unlike C# LINQ (which has query comprehension syntax C
1696             select ...>), LTSV::LINQ works with ordinary Perl method calls and
1697             anonymous subroutines. There is no source filter, no parser extension,
1698             and no dependency on modern Perl features. The same code runs on Perl
1699             5.005_03 and Perl 5.40.
1700              
1701             =item * B
1702              
1703             A C clause stored in a variable can be applied to multiple
1704             data sources. Query logic can be parameterized and reused across scripts.
1705              
1706             =back
1707              
1708             For the original LINQ documentation, see
1709             L.
1710              
1711             =head1 METHODS
1712              
1713             =head2 Complete Method Reference
1714              
1715             This module implements 60 LINQ-style methods organized into 15 categories:
1716              
1717             =over 4
1718              
1719             =item * B: From, FromLTSV, Range, Empty, Repeat
1720              
1721             =item * B: Where (with DSL)
1722              
1723             =item * B: Select, SelectMany
1724              
1725             =item * B: Concat, Zip
1726              
1727             =item * B: Take, Skip, TakeWhile, SkipWhile
1728              
1729             =item * B: OrderBy, OrderByDescending, OrderByStr, OrderByStrDescending, OrderByNum, OrderByNumDescending, Reverse, ThenBy, ThenByDescending, ThenByStr, ThenByStrDescending, ThenByNum, ThenByNumDescending
1730              
1731             =item * B: GroupBy
1732              
1733             =item * B: Distinct, Union, Intersect, Except
1734              
1735             =item * B: Join, GroupJoin
1736              
1737             =item * B: All, Any, Contains
1738              
1739             =item * B: SequenceEqual
1740              
1741             =item * B: First, FirstOrDefault, Last, LastOrDefault, Single, SingleOrDefault, ElementAt, ElementAtOrDefault
1742              
1743             =item * B: Count, Sum, Min, Max, Average, AverageOrDefault, Aggregate
1744              
1745             =item * B: ToArray, ToList, ToDictionary, ToLookup, ToLTSV, DefaultIfEmpty
1746              
1747             =item * B: ForEach
1748              
1749             =back
1750              
1751             B
1752              
1753             Method Category Lazy? Returns
1754             ===================== ============== ===== ================
1755             From Data Source Yes Query
1756             FromLTSV Data Source Yes Query
1757             Range Data Source Yes Query
1758             Empty Data Source Yes Query
1759             Repeat Data Source Yes Query
1760             Where Filtering Yes Query
1761             Select Projection Yes Query
1762             SelectMany Projection Yes Query
1763             Concat Concatenation Yes Query
1764             Zip Concatenation Yes Query
1765             Take Partitioning Yes Query
1766             Skip Partitioning Yes Query
1767             TakeWhile Partitioning Yes Query
1768             SkipWhile Partitioning Yes Query
1769             OrderBy Ordering No* OrderedQuery
1770             OrderByDescending Ordering No* OrderedQuery
1771             OrderByStr Ordering No* OrderedQuery
1772             OrderByStrDescending Ordering No* OrderedQuery
1773             OrderByNum Ordering No* OrderedQuery
1774             OrderByNumDescending Ordering No* OrderedQuery
1775             Reverse Ordering No* Query
1776             ThenBy Ordering No* OrderedQuery
1777             ThenByDescending Ordering No* OrderedQuery
1778             ThenByStr Ordering No* OrderedQuery
1779             ThenByStrDescending Ordering No* OrderedQuery
1780             ThenByNum Ordering No* OrderedQuery
1781             ThenByNumDescending Ordering No* OrderedQuery
1782             GroupBy Grouping No* Query
1783             Distinct Set Operation Yes Query
1784             Union Set Operation No* Query
1785             Intersect Set Operation No* Query
1786             Except Set Operation No* Query
1787             Join Join No* Query
1788             GroupJoin Join No* Query
1789             All Quantifier No Boolean
1790             Any Quantifier No Boolean
1791             Contains Quantifier No Boolean
1792             SequenceEqual Comparison No Boolean
1793             First Element Access No Element
1794             FirstOrDefault Element Access No Element
1795             Last Element Access No* Element
1796             LastOrDefault Element Access No* Element or undef
1797             Single Element Access No* Element
1798             SingleOrDefault Element Access No* Element or undef
1799             ElementAt Element Access No* Element
1800             ElementAtOrDefault Element Access No* Element or undef
1801             Count Aggregation No Integer
1802             Sum Aggregation No Number
1803             Min Aggregation No Number
1804             Max Aggregation No Number
1805             Average Aggregation No Number
1806             AverageOrDefault Aggregation No Number or undef
1807             Aggregate Aggregation No Any
1808             DefaultIfEmpty Conversion Yes Query
1809             ToArray Conversion No Array
1810             ToList Conversion No ArrayRef
1811             ToDictionary Conversion No HashRef
1812             ToLookup Conversion No HashRef
1813             ToLTSV Conversion No Boolean
1814             ForEach Utility No Void
1815              
1816             * Materializing operation (loads all data into memory)
1817             OrderedQuery = LTSV::LINQ::Ordered (subclass of LTSV::LINQ;
1818             all LTSV::LINQ methods available plus ThenBy* methods)
1819              
1820             =head2 Data Source Methods
1821              
1822             =over 4
1823              
1824             =item B
1825              
1826             Create a query from an array.
1827              
1828             my $query = LTSV::LINQ->From([{name => 'Alice'}, {name => 'Bob'}]);
1829              
1830             =item B
1831              
1832             Create a query from an LTSV file.
1833              
1834             my $query = LTSV::LINQ->FromLTSV("access.log");
1835              
1836             B C opens the file immediately and
1837             holds the file handle open until the iterator reaches end-of-file.
1838             If the query is not fully consumed (e.g. you call C or C
1839             and stop early), the file handle remains open until the query object
1840             is garbage collected.
1841              
1842             This is harmless for a small number of files, but if you open many
1843             LTSV files concurrently without consuming them fully, you may exhaust
1844             the OS file descriptor limit. In such cases, consume the query fully
1845             or use C to materialise the data and close the file
1846             immediately:
1847              
1848             # File closed as soon as all records are loaded
1849             my @records = LTSV::LINQ->FromLTSV("access.log")->ToArray();
1850              
1851             =item B
1852              
1853             Generate a sequence of integers.
1854              
1855             my $query = LTSV::LINQ->Range(1, 10); # 1, 2, ..., 10
1856              
1857             =item B
1858              
1859             Create an empty sequence.
1860              
1861             B Empty LTSV::LINQ query
1862              
1863             B
1864              
1865             my $empty = LTSV::LINQ->Empty();
1866             $empty->Count(); # 0
1867              
1868             # Conditional empty sequence
1869             my $result = $condition ? $query : LTSV::LINQ->Empty();
1870              
1871             B Equivalent to C but more explicit.
1872              
1873             =item B
1874              
1875             Repeat the same element a specified number of times.
1876              
1877             B
1878              
1879             =over 4
1880              
1881             =item * C<$element> - Element to repeat
1882              
1883             =item * C<$count> - Number of times to repeat
1884              
1885             =back
1886              
1887             B LTSV::LINQ query with repeated elements
1888              
1889             B
1890              
1891             # Repeat scalar
1892             LTSV::LINQ->Repeat('x', 5)->ToArray(); # ('x', 'x', 'x', 'x', 'x')
1893              
1894             # Repeat reference (same reference repeated)
1895             my $item = {id => 1};
1896             LTSV::LINQ->Repeat($item, 3)->ToArray(); # ($item, $item, $item)
1897              
1898             # Generate default values
1899             LTSV::LINQ->Repeat(0, 10)->ToArray(); # (0, 0, 0, ..., 0)
1900              
1901             B The element reference is repeated, not cloned.
1902              
1903             =back
1904              
1905             =head2 Filtering Methods
1906              
1907             =over 4
1908              
1909             =item B
1910              
1911             =item B value, ...)>
1912              
1913             Filter elements. Accepts either a code reference or DSL form.
1914              
1915             B
1916              
1917             ->Where(sub { $_[0]{status} == 200 })
1918             ->Where(sub { $_[0]{status} >= 400 && $_[0]{bytes} > 1000 })
1919              
1920             The code reference receives each element as C<$_[0]> and should return
1921             true to include the element, false to exclude it.
1922              
1923             B
1924              
1925             The DSL (Domain Specific Language) form provides a concise syntax for
1926             simple equality comparisons. All conditions are combined with AND logic.
1927              
1928             # Single condition
1929             ->Where(status => '200')
1930              
1931             # Multiple conditions (AND)
1932             ->Where(status => '200', method => 'GET')
1933              
1934             # Equivalent to:
1935             ->Where(sub {
1936             $_[0]{status} eq '200' && $_[0]{method} eq 'GET'
1937             })
1938              
1939             B
1940              
1941             =over 4
1942              
1943             =item * Arguments must be an even number of C value> pairs
1944              
1945             The DSL form interprets its arguments as a flat list of key-value pairs.
1946             Passing an odd number of arguments produces a Perl warning
1947             (C) and the unpaired key
1948             receives C as its value, which will never match. Always use
1949             complete pairs:
1950              
1951             ->Where(status => '200') # correct: 1 pair
1952             ->Where(status => '200', method => 'GET') # correct: 2 pairs
1953             ->Where(status => '200', 'method') # wrong: 3 args, Perl warning
1954              
1955             =item * All comparisons are string equality (C)
1956              
1957             =item * All conditions are combined with AND
1958              
1959             =item * Undefined values are treated as failures
1960              
1961             =item * For numeric or OR logic, use code reference form
1962              
1963             =back
1964              
1965             B
1966              
1967             # DSL: Simple and readable
1968             ->Where(status => '200')
1969             ->Where(user => 'alice', role => 'admin')
1970              
1971             # Code ref: Complex logic
1972             ->Where(sub { $_[0]{status} >= 400 && $_[0]{status} < 500 })
1973             ->Where(sub { $_[0]{user} eq 'alice' || $_[0]{user} eq 'bob' })
1974              
1975             =back
1976              
1977             =head2 Projection Methods
1978              
1979             =over 4
1980              
1981             =item B
1982              
1983             Transform each element using the provided selector function.
1984              
1985             The selector receives each element as C<$_[0]> and should return
1986             the transformed value.
1987              
1988             B
1989              
1990             =over 4
1991              
1992             =item * C<$selector> - Code reference that transforms each element
1993              
1994             =back
1995              
1996             B New query with transformed elements (lazy)
1997              
1998             B
1999              
2000             # Extract single field
2001             ->Select(sub { $_[0]{url} })
2002              
2003             # Transform to new structure
2004             ->Select(sub {
2005             {
2006             path => $_[0]{url},
2007             code => $_[0]{status}
2008             }
2009             })
2010              
2011             # Calculate derived values
2012             ->Select(sub { $_[0]{bytes} * 8 }) # bytes to bits
2013              
2014             B Select preserves one-to-one mapping. For one-to-many, use
2015             SelectMany.
2016              
2017             =item B
2018              
2019             Flatten nested sequences into a single sequence.
2020              
2021             The selector should return an array reference. All arrays are flattened
2022             into a single sequence.
2023              
2024             B
2025              
2026             =over 4
2027              
2028             =item * C<$selector> - Code reference returning array reference
2029              
2030             =back
2031              
2032             B New query with flattened elements (lazy)
2033              
2034             B
2035              
2036             # Flatten array of arrays
2037             my @nested = ([1, 2], [3, 4], [5]);
2038             LTSV::LINQ->From(\@nested)
2039             ->SelectMany(sub { $_[0] })
2040             ->ToArray(); # (1, 2, 3, 4, 5)
2041              
2042             # Expand related records
2043             ->SelectMany(sub {
2044             my $user = shift;
2045             return [ map {
2046             { user => $user->{name}, role => $_ }
2047             } @{$user->{roles}} ];
2048             })
2049              
2050             B
2051              
2052             =over 4
2053              
2054             =item * Flattening nested arrays
2055              
2056             =item * Expanding one-to-many relationships
2057              
2058             =item * Generating multiple outputs per input
2059              
2060             =back
2061              
2062             B The selector B return an ARRAY reference. If it returns
2063             any other value (e.g. a hashref or scalar), this method throws an exception:
2064              
2065             die "SelectMany: selector must return an ARRAY reference"
2066              
2067             This matches the behaviour of .NET LINQ's C, which requires
2068             the selector to return an C. Always wrap results in C<[...]>:
2069              
2070             ->SelectMany(sub { [ $_[0]{items} ] }) # correct: arrayref
2071             ->SelectMany(sub { $_[0]{items} }) # wrong: dies at runtime
2072              
2073             =back
2074              
2075             =head2 Concatenation Methods
2076              
2077             =over 4
2078              
2079             =item B
2080              
2081             Concatenate two sequences into one.
2082              
2083             B
2084              
2085             =over 4
2086              
2087             =item * C<$second> - Second sequence (LTSV::LINQ object)
2088              
2089             =back
2090              
2091             B New query with both sequences concatenated (lazy)
2092              
2093             B
2094              
2095             # Combine two data sources
2096             my $q1 = LTSV::LINQ->From([1, 2, 3]);
2097             my $q2 = LTSV::LINQ->From([4, 5, 6]);
2098             $q1->Concat($q2)->ToArray(); # (1, 2, 3, 4, 5, 6)
2099              
2100             # Merge LTSV files
2101             LTSV::LINQ->FromLTSV("jan.log")
2102             ->Concat(LTSV::LINQ->FromLTSV("feb.log"))
2103             ->Where(status => '500')
2104              
2105             B This operation is lazy - sequences are read on-demand.
2106              
2107             =item B
2108              
2109             Combine two sequences element-wise using a result selector function.
2110              
2111             B
2112              
2113             =over 4
2114              
2115             =item * C<$second> - Second sequence (LTSV::LINQ object)
2116              
2117             =item * C<$result_selector> - Function to combine elements: ($first, $second) -> $result
2118              
2119             =back
2120              
2121             B New query with combined elements (lazy)
2122              
2123             B
2124              
2125             # Combine numbers
2126             my $numbers = LTSV::LINQ->From([1, 2, 3]);
2127             my $letters = LTSV::LINQ->From(['a', 'b', 'c']);
2128             $numbers->Zip($letters, sub {
2129             my($num, $letter) = @_;
2130             return "$num-$letter";
2131             })->ToArray(); # ('1-a', '2-b', '3-c')
2132              
2133             # Create key-value pairs
2134             my $keys = LTSV::LINQ->From(['name', 'age', 'city']);
2135             my $values = LTSV::LINQ->From(['Alice', 30, 'NYC']);
2136             $keys->Zip($values, sub {
2137             return {$_[0] => $_[1]};
2138             })->ToArray();
2139              
2140             # Stops at shorter sequence
2141             LTSV::LINQ->From([1, 2, 3, 4])
2142             ->Zip(LTSV::LINQ->From(['a', 'b']), sub { [$_[0], $_[1]] })
2143             ->ToArray(); # ([1, 'a'], [2, 'b'])
2144              
2145             B Iteration stops when either sequence ends.
2146              
2147             =back
2148              
2149             =head2 Partitioning Methods
2150              
2151             =over 4
2152              
2153             =item B
2154              
2155             Take the first N elements from the sequence.
2156              
2157             B
2158              
2159             =over 4
2160              
2161             =item * C<$count> - Number of elements to take (integer >= 0)
2162              
2163             =back
2164              
2165             B New query limited to first N elements (lazy)
2166              
2167             B
2168              
2169             # Top 10 results
2170             ->OrderByDescending(sub { $_[0]{score} })
2171             ->Take(10)
2172              
2173             # First record only
2174             ->Take(1)->ToArray()
2175              
2176             # Limit large file processing
2177             LTSV::LINQ->FromLTSV("huge.log")->Take(1000)
2178              
2179             B Take(0) returns empty sequence. Negative values treated as 0.
2180              
2181             =item B
2182              
2183             Skip the first N elements, return the rest.
2184              
2185             B
2186              
2187             =over 4
2188              
2189             =item * C<$count> - Number of elements to skip (integer >= 0)
2190              
2191             =back
2192              
2193             B New query skipping first N elements (lazy)
2194              
2195             B
2196              
2197             # Skip header row
2198             ->Skip(1)
2199              
2200             # Pagination: page 3, size 20
2201             ->Skip(40)->Take(20)
2202              
2203             # Skip first batch
2204             ->Skip(1000)->ForEach(sub { ... })
2205              
2206             B
2207              
2208             =over 4
2209              
2210             =item * Pagination
2211              
2212             =item * Skipping header rows
2213              
2214             =item * Processing in batches
2215              
2216             =back
2217              
2218             =item B
2219              
2220             Take elements while the predicate is true. Stops at first false.
2221              
2222             B
2223              
2224             =over 4
2225              
2226             =item * C<$predicate> - Code reference returning boolean
2227              
2228             =back
2229              
2230             B New query taking elements while predicate holds (lazy)
2231              
2232             B
2233              
2234             # Take while value is small
2235             ->TakeWhile(sub { $_[0]{count} < 100 })
2236              
2237             # Take while timestamp is in range
2238             ->TakeWhile(sub { $_[0]{time} lt '2026-02-01' })
2239              
2240             # Process until error
2241             ->TakeWhile(sub { $_[0]{status} < 400 })
2242              
2243             B TakeWhile stops immediately when predicate returns false.
2244             It does NOT filter - it terminates the sequence.
2245              
2246             # Different from Where:
2247             ->TakeWhile(sub { $_[0] < 5 }) # 1,2,3,4 then STOP
2248             ->Where(sub { $_[0] < 5 }) # 1,2,3,4 (checks all)
2249              
2250             =item B
2251              
2252             Skip elements while the predicate is true. Returns rest after first false.
2253              
2254             B
2255              
2256             =over 4
2257              
2258             =item * C<$predicate> - Code reference returning boolean
2259              
2260             =back
2261              
2262             B New query skipping initial elements (lazy)
2263              
2264             B
2265              
2266             # Skip header lines
2267             ->SkipWhile(sub { $_[0]{line} =~ /^#/ })
2268              
2269             # Skip while value is small
2270             ->SkipWhile(sub { $_[0]{count} < 100 })
2271              
2272             # Process after certain timestamp
2273             ->SkipWhile(sub { $_[0]{time} lt '2026-02-01' })
2274              
2275             B SkipWhile only skips initial elements. Once predicate is
2276             false, all remaining elements are included.
2277              
2278             [1,2,3,4,5,2,1]->SkipWhile(sub { $_[0] < 4 }) # (4,5,2,1)
2279              
2280             =back
2281              
2282             =head2 Ordering Methods
2283              
2284             B C and C use a Schwartzian-Transform
2285             decorated-array technique that appends the original element index as a
2286             final tie-breaker. This guarantees completely stable multi-key sorting on
2287             B, where built-in C stability
2288             is not guaranteed.
2289              
2290             B LTSV::LINQ provides three families:
2291              
2292             =over 4
2293              
2294             =item * C / C / C / C
2295              
2296             Smart comparison: numeric (C=E>) when both keys look numeric,
2297             string (C) otherwise. Convenient for LTSV data where field values
2298             are always strings but commonly hold numbers.
2299              
2300             =item * C / C / C / C
2301              
2302             Unconditional string comparison (C). Use when keys must sort
2303             lexicographically regardless of content (e.g. version strings, codes).
2304              
2305             =item * C / C / C / C
2306              
2307             Unconditional numeric comparison (C=E>). Use when keys are
2308             always numeric. Undefined or empty values are treated as C<0>.
2309              
2310             =back
2311              
2312             B C methods return a C
2313             object (a subclass of C). This mirrors the way .NET LINQ's
2314             C returns C>, which exposes C and
2315             C. All C methods (C, C
2316             C, etc.) are available on the returned object through inheritance.
2317             C methods are B available on C objects,
2318             not on plain C objects.
2319              
2320             B C always returns a B C
2321             object; the original is unchanged. Branching sort chains work correctly:
2322              
2323             my $by_dept = LTSV::LINQ->From(\@data)->OrderBy(sub { $_[0]{dept} });
2324             my $asc = $by_dept->ThenBy(sub { $_[0]{name} });
2325             my $desc = $by_dept->ThenByNum(sub { $_[0]{salary} });
2326             # $asc and $desc are completely independent queries
2327              
2328             =over 4
2329              
2330             =item B
2331              
2332             Sort in ascending order using smart comparison: if both keys look like
2333             numbers (integers, decimals, negative, or exponential notation), numeric
2334             comparison (C=E>) is used; otherwise string comparison (C)
2335             is used. Returns a C object.
2336              
2337             ->OrderBy(sub { $_[0]{timestamp} }) # string keys: lexicographic
2338             ->OrderBy(sub { $_[0]{bytes} }) # "1024", "256" -> numeric (256, 1024)
2339              
2340             B When you need explicit control over the comparison type, use
2341             C (always C) or C (always C=E>).
2342              
2343             =item B
2344              
2345             Sort in descending order using the same smart comparison as C.
2346             Returns a C object.
2347              
2348             ->OrderByDescending(sub { $_[0]{count} })
2349              
2350             =item B
2351              
2352             Sort in ascending order using string comparison (C) unconditionally.
2353             Returns a C object.
2354              
2355             ->OrderByStr(sub { $_[0]{code} }) # "10" lt "9" (lexicographic)
2356              
2357             =item B
2358              
2359             Sort in descending order using string comparison (C) unconditionally.
2360             Returns a C object.
2361              
2362             ->OrderByStrDescending(sub { $_[0]{name} })
2363              
2364             =item B
2365              
2366             Sort in ascending order using numeric comparison (C=E>)
2367             unconditionally. Returns a C object.
2368              
2369             ->OrderByNum(sub { $_[0]{bytes} }) # 9 < 10 (numeric)
2370              
2371             B Undefined or empty values are treated as C<0>.
2372              
2373             =item B
2374              
2375             Sort in descending order using numeric comparison (C=E>)
2376             unconditionally. Returns a C object.
2377              
2378             ->OrderByNumDescending(sub { $_[0]{response_time} })
2379              
2380             =item B
2381              
2382             Reverse the order.
2383              
2384             ->Reverse()
2385              
2386             =item B
2387              
2388             Add an ascending secondary sort key using smart comparison. Must be
2389             called on a C object (i.e., after C).
2390             Returns a new C object; the original is unchanged.
2391              
2392             ->OrderBy(sub { $_[0]{dept} })->ThenBy(sub { $_[0]{name} })
2393              
2394             =item B
2395              
2396             Add a descending secondary sort key using smart comparison.
2397              
2398             ->OrderBy(sub { $_[0]{dept} })->ThenByDescending(sub { $_[0]{salary} })
2399              
2400             =item B
2401              
2402             Add an ascending secondary sort key using string comparison (C).
2403              
2404             ->OrderByStr(sub { $_[0]{dept} })->ThenByStr(sub { $_[0]{code} })
2405              
2406             =item B
2407              
2408             Add a descending secondary sort key using string comparison (C).
2409              
2410             ->OrderByStr(sub { $_[0]{dept} })->ThenByStrDescending(sub { $_[0]{name} })
2411              
2412             =item B
2413              
2414             Add an ascending secondary sort key using numeric comparison (C=E>).
2415              
2416             ->OrderByStr(sub { $_[0]{dept} })->ThenByNum(sub { $_[0]{salary} })
2417              
2418             =item B
2419              
2420             Add a descending secondary sort key using numeric comparison (C=E>).
2421             Undefined or empty values are treated as C<0>.
2422              
2423             ->OrderByStr(sub { $_[0]{host} })->ThenByNumDescending(sub { $_[0]{bytes} })
2424              
2425             =back
2426              
2427             =head2 Grouping Methods
2428              
2429             =over 4
2430              
2431             =item B
2432              
2433             Group elements by key.
2434              
2435             B New query where each element is a hashref with two fields:
2436              
2437             =over 4
2438              
2439             =item * C - The group key (string)
2440              
2441             =item * C - Array reference of elements in the group
2442              
2443             =back
2444              
2445             B This operation is eager - the entire sequence is loaded into memory
2446             immediately. Groups are returned in the order their keys first appear in
2447             the source sequence, matching the behaviour of .NET LINQ's C.
2448              
2449             B
2450              
2451             # Group access log by status code
2452             my @groups = LTSV::LINQ->FromLTSV('access.log')
2453             ->GroupBy(sub { $_[0]{status} })
2454             ->ToArray();
2455              
2456             for my $g (@groups) {
2457             printf "status=%s count=%d\n", $g->{Key}, scalar @{$g->{Elements}};
2458             }
2459              
2460             # With element selector
2461             ->GroupBy(sub { $_[0]{status} }, sub { $_[0]{path} })
2462              
2463             B C is a plain array reference, not a LTSV::LINQ object.
2464             To apply further LINQ operations on a group, wrap it with C:
2465              
2466             for my $g (@groups) {
2467             my $total = LTSV::LINQ->From($g->{Elements})
2468             ->Sum(sub { $_[0]{bytes} });
2469             printf "status=%s total_bytes=%d\n", $g->{Key}, $total;
2470             }
2471              
2472             =back
2473              
2474             =head2 Set Operations
2475              
2476             B
2477              
2478             =over 4
2479              
2480             =item * C is fully lazy: elements are tested one by one as the
2481             output sequence is consumed.
2482              
2483             =item * C, C, C are B: when
2484             the method is called, the B sequence is consumed in full and
2485             stored in an in-memory hash for O(1) lookup. The B sequence is
2486             then iterated lazily. This matches the behaviour of .NET LINQ, which
2487             also buffers the second (hash-side) sequence up front.
2488              
2489             =back
2490              
2491             =over 4
2492              
2493             =item B
2494              
2495             Remove duplicate elements.
2496              
2497             B
2498              
2499             =over 4
2500              
2501             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2502             Extracts a comparison key from each element. This is a single-argument
2503             function (unlike Perl's C comparator), and is I a two-argument
2504             comparison function.
2505              
2506             =back
2507              
2508             ->Distinct()
2509             ->Distinct(sub { lc($_[0]) }) # case-insensitive strings
2510             ->Distinct(sub { $_[0]{id} }) # hashref: dedupe by field
2511              
2512             =item B
2513              
2514             Produce set union of two sequences (no duplicates).
2515              
2516             B
2517              
2518             =over 4
2519              
2520             =item * C<$second> - Second sequence (LTSV::LINQ object)
2521              
2522             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2523             Single-argument key extraction function (not a two-argument sort comparator).
2524              
2525             =back
2526              
2527             B New query with elements from both sequences (distinct)
2528              
2529             B B The first sequence is iterated lazily;
2530             the second is fully consumed at call time and stored in memory.
2531              
2532             B
2533              
2534             # Simple union
2535             my $q1 = LTSV::LINQ->From([1, 2, 3]);
2536             my $q2 = LTSV::LINQ->From([3, 4, 5]);
2537             $q1->Union($q2)->ToArray(); # (1, 2, 3, 4, 5)
2538              
2539             # Case-insensitive union
2540             ->Union($other, sub { lc($_[0]) })
2541              
2542             B Equivalent to Concat()->Distinct(). Automatically removes duplicates.
2543              
2544             =item B
2545              
2546             Produce set intersection of two sequences.
2547              
2548             B
2549              
2550             =over 4
2551              
2552             =item * C<$second> - Second sequence (LTSV::LINQ object)
2553              
2554             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2555             Single-argument key extraction function (not a two-argument sort comparator).
2556              
2557             =back
2558              
2559             B New query with common elements only (distinct)
2560              
2561             B B The second sequence is fully consumed
2562             at call time and stored in a hash; the first is iterated lazily.
2563              
2564             B
2565              
2566             # Common elements
2567             LTSV::LINQ->From([1, 2, 3])
2568             ->Intersect(LTSV::LINQ->From([2, 3, 4]))
2569             ->ToArray(); # (2, 3)
2570              
2571             # Find users in both lists
2572             $users1->Intersect($users2, sub { $_[0]{id} })
2573              
2574             B Only includes elements present in both sequences.
2575              
2576             =item B
2577              
2578             Produce set difference (elements in first but not in second).
2579              
2580             B
2581              
2582             =over 4
2583              
2584             =item * C<$second> - Second sequence (LTSV::LINQ object)
2585              
2586             =item * C<$key_selector> - (Optional) Code ref: C<($element) -E $key>.
2587             Single-argument key extraction function (not a two-argument sort comparator).
2588              
2589             =back
2590              
2591             B New query with elements only in first sequence (distinct)
2592              
2593             B B The second sequence is fully consumed
2594             at call time and stored in a hash; the first is iterated lazily.
2595              
2596             B
2597              
2598             # Set difference
2599             LTSV::LINQ->From([1, 2, 3])
2600             ->Except(LTSV::LINQ->From([2, 3, 4]))
2601             ->ToArray(); # (1)
2602              
2603             # Find users in first list but not second
2604             $all_users->Except($inactive_users, sub { $_[0]{id} })
2605              
2606             B Returns elements from first sequence not present in second.
2607              
2608             =back
2609              
2610             =head2 Join Operations
2611              
2612             B Both C and C are B:
2613             when the method is called, the B sequence is consumed in full and
2614             stored in an in-memory lookup table (hash of arrays, keyed by inner key).
2615             The B sequence is then iterated lazily, producing results on demand.
2616              
2617             This matches the behaviour of .NET LINQ's hash-join implementation.
2618             The memory cost is O(inner size); for very large inner sequences, consider
2619             reversing the join or pre-filtering the inner sequence before passing it.
2620              
2621             =over 4
2622              
2623             =item B
2624              
2625             Correlate elements of two sequences based on matching keys (inner join).
2626              
2627             B
2628              
2629             =over 4
2630              
2631             =item * C<$inner> - Inner sequence (LTSV::LINQ object)
2632              
2633             =item * C<$outer_key_selector> - Function to extract key from outer element
2634              
2635             =item * C<$inner_key_selector> - Function to extract key from inner element
2636              
2637             =item * C<$result_selector> - Function to create result: ($outer_item, $inner_item) -> $result
2638              
2639             =back
2640              
2641             B Query with joined results
2642              
2643             B
2644              
2645             # Join users with their orders
2646             my $users = LTSV::LINQ->From([
2647             {id => 1, name => 'Alice'},
2648             {id => 2, name => 'Bob'}
2649             ]);
2650              
2651             my $orders = LTSV::LINQ->From([
2652             {user_id => 1, product => 'Book'},
2653             {user_id => 1, product => 'Pen'},
2654             {user_id => 2, product => 'Notebook'}
2655             ]);
2656              
2657             $users->Join(
2658             $orders,
2659             sub { $_[0]{id} }, # outer key
2660             sub { $_[0]{user_id} }, # inner key
2661             sub {
2662             my($user, $order) = @_;
2663             return {
2664             name => $user->{name},
2665             product => $order->{product}
2666             };
2667             }
2668             )->ToArray();
2669             # [{name => 'Alice', product => 'Book'},
2670             # {name => 'Alice', product => 'Pen'},
2671             # {name => 'Bob', product => 'Notebook'}]
2672              
2673             # Join LTSV files by request ID
2674             LTSV::LINQ->FromLTSV('access.log')->Join(
2675             LTSV::LINQ->FromLTSV('error.log'),
2676             sub { $_[0]{request_id} },
2677             sub { $_[0]{request_id} },
2678             sub {
2679             my($access, $error) = @_;
2680             return {
2681             url => $access->{url},
2682             error => $error->{message}
2683             };
2684             }
2685             )
2686              
2687             B This is an inner join - only matching elements are returned.
2688             The inner sequence is fully loaded into memory.
2689              
2690             =item B
2691              
2692             Correlates elements of two sequences with group join (LEFT OUTER JOIN-like).
2693             Each outer element is matched with a group of inner elements (possibly empty).
2694              
2695             B
2696              
2697             =over 4
2698              
2699             =item * C<$inner> - Inner sequence (LTSV::LINQ object)
2700              
2701             =item * C<$outer_key_selector> - Function to extract key from outer element
2702              
2703             =item * C<$inner_key_selector> - Function to extract key from inner element
2704              
2705             =item * C<$result_selector> - Function: ($outer_item, $inner_group) -> $result.
2706             The C<$inner_group> is a LTSV::LINQ object containing matched inner elements
2707             (empty sequence if no matches).
2708              
2709             =back
2710              
2711             B New query with one result per outer element (lazy)
2712              
2713             B
2714              
2715             # Order count per user (including users with no orders)
2716             my $users = LTSV::LINQ->From([
2717             {id => 1, name => 'Alice'},
2718             {id => 2, name => 'Bob'},
2719             {id => 3, name => 'Carol'}
2720             ]);
2721              
2722             my $orders = LTSV::LINQ->From([
2723             {user_id => 1, product => 'Book', amount => 10},
2724             {user_id => 1, product => 'Pen', amount => 5},
2725             {user_id => 2, product => 'Notebook', amount => 15}
2726             ]);
2727              
2728             $users->GroupJoin(
2729             $orders,
2730             sub { $_[0]{id} },
2731             sub { $_[0]{user_id} },
2732             sub {
2733             my($user, $orders) = @_;
2734             return {
2735             name => $user->{name},
2736             count => $orders->Count(),
2737             total => $orders->Sum(sub { $_[0]{amount} })
2738             };
2739             }
2740             )->ToArray();
2741             # [
2742             # {name => 'Alice', count => 2, total => 15},
2743             # {name => 'Bob', count => 1, total => 15},
2744             # {name => 'Carol', count => 0, total => 0}, # no orders
2745             # ]
2746              
2747             # Flat list with no-match rows included (LEFT OUTER JOIN, cf. Join for inner join)
2748             $users->GroupJoin(
2749             $orders,
2750             sub { $_[0]{id} },
2751             sub { $_[0]{user_id} },
2752             sub {
2753             my($user, $user_orders) = @_;
2754             my @rows = $user_orders->ToArray();
2755             return @rows
2756             ? [ map { {name => $user->{name}, product => $_->{product}} } @rows ]
2757             : [ {name => $user->{name}, product => 'none'} ];
2758             }
2759             )->SelectMany(sub { $_[0] }) # Flatten the array references
2760             ->ToArray();
2761              
2762             B Unlike Join, every outer element appears in the result even when
2763             there are no matching inner elements (LEFT OUTER JOIN semantics).
2764             The inner sequence is fully loaded into memory.
2765              
2766             B The C<$inner_group> LTSV::LINQ object is highly flexible.
2767             It is specifically designed to be iterated multiple times within the
2768             result selector (e.g., calling C followed by C) because
2769             it generates a fresh iterator for every terminal operation.
2770              
2771             =back
2772              
2773             =head2 Quantifier Methods
2774              
2775             =over 4
2776              
2777             =item B
2778              
2779             Test if all elements satisfy condition.
2780              
2781             ->All(sub { $_[0]{status} == 200 })
2782              
2783             =item B
2784              
2785             Test if any element satisfies condition.
2786              
2787             ->Any(sub { $_[0]{status} >= 400 })
2788             ->Any() # Test if sequence is non-empty
2789              
2790             =item B
2791              
2792             Check if sequence contains specified element.
2793              
2794             B
2795              
2796             =over 4
2797              
2798             =item * C<$value> - Value to search for
2799              
2800             =item * C<$comparer> - (Optional) Custom comparison function
2801              
2802             =back
2803              
2804             B Boolean (1 or 0)
2805              
2806             B
2807              
2808             # Simple search
2809             ->Contains(5) # 1 if found, 0 otherwise
2810              
2811             # Case-insensitive search
2812             ->Contains('foo', sub { lc($_[0]) eq lc($_[1]) })
2813              
2814             # Check for undef
2815             ->Contains(undef)
2816              
2817             =item B
2818              
2819             Determine if two sequences are equal (same elements in same order).
2820              
2821             B
2822              
2823             =over 4
2824              
2825             =item * C<$second> - Second sequence (LTSV::LINQ object)
2826              
2827             =item * C<$comparer> - (Optional) Comparison function ($a, $b) -> boolean
2828              
2829             =back
2830              
2831             B Boolean (1 if equal, 0 otherwise)
2832              
2833             B
2834              
2835             # Same sequences
2836             LTSV::LINQ->From([1, 2, 3])
2837             ->SequenceEqual(LTSV::LINQ->From([1, 2, 3])) # 1 (true)
2838              
2839             # Different elements
2840             LTSV::LINQ->From([1, 2, 3])
2841             ->SequenceEqual(LTSV::LINQ->From([1, 2, 4])) # 0 (false)
2842              
2843             # Different lengths
2844             LTSV::LINQ->From([1, 2])
2845             ->SequenceEqual(LTSV::LINQ->From([1, 2, 3])) # 0 (false)
2846              
2847             # Case-insensitive comparison
2848             $seq1->SequenceEqual($seq2, sub { lc($_[0]) eq lc($_[1]) })
2849              
2850             B Order matters. Both content AND order must match.
2851              
2852             =back
2853              
2854             =head2 Element Access Methods
2855              
2856             =over 4
2857              
2858             =item B
2859              
2860             Get first element. Dies if empty.
2861              
2862             ->First()
2863             ->First(sub { $_[0]{status} == 404 })
2864              
2865             =item B
2866              
2867             Get first element or default value.
2868              
2869             ->FirstOrDefault(undef, {})
2870              
2871             =item B
2872              
2873             Get last element. Dies if empty.
2874              
2875             ->Last()
2876              
2877             =item B
2878              
2879             Get last element or default value. Never throws exceptions.
2880              
2881             B
2882              
2883             =over 4
2884              
2885             =item * C<$predicate> - (Optional) Condition
2886              
2887             =item * C<$default> - (Optional) Value to return when no element is found.
2888             Defaults to C when omitted.
2889              
2890             =back
2891              
2892             B Last element or C<$default>
2893              
2894             B
2895              
2896             # Get last element (undef if empty)
2897             ->LastOrDefault()
2898              
2899             # Specify a default value
2900             LTSV::LINQ->From([])->LastOrDefault(undef, 0) # 0
2901              
2902             # With predicate and default
2903             ->LastOrDefault(sub { $_[0] % 2 == 0 }, -1) # Last even, or -1
2904              
2905             =item B
2906              
2907             Get the only element. Dies if sequence has zero or more than one element.
2908              
2909             B
2910              
2911             =over 4
2912              
2913             =item * C<$predicate> - (Optional) Condition
2914              
2915             =back
2916              
2917             B Single element
2918              
2919             B
2920             - Dies with "Sequence contains no elements" if empty
2921             - Dies with "Sequence contains more than one element" if multiple elements
2922              
2923             B<.NET LINQ Compatibility:> Exception messages match .NET LINQ behavior exactly.
2924              
2925             B Uses lazy evaluation. Stops iterating immediately when
2926             second element is found (does not load entire sequence).
2927              
2928             B
2929              
2930             # Exactly one element
2931             LTSV::LINQ->From([5])->Single() # 5
2932              
2933             # With predicate
2934             ->Single(sub { $_[0] > 10 })
2935              
2936             # Memory-efficient: stops at 2nd element
2937             LTSV::LINQ->FromLTSV("huge.log")->Single(sub { $_[0]{id} eq '999' })
2938              
2939             =item B
2940              
2941             Get the only element, or undef if zero or multiple elements.
2942              
2943             B Single element or undef (if 0 or 2+ elements)
2944              
2945             B<.NET LINQ Compatibility:> B .NET's C throws
2946             C when the sequence contains more than one
2947             element. LTSV::LINQ returns C in that case instead of throwing,
2948             which makes it more convenient for Perl code that checks return values.
2949             If you require the strict .NET behaviour (exception on multiple elements),
2950             use C wrapped in C.
2951              
2952             B Uses lazy evaluation. Memory-efficient.
2953              
2954             B
2955              
2956             LTSV::LINQ->From([5])->SingleOrDefault() # 5
2957             LTSV::LINQ->From([])->SingleOrDefault() # undef (empty)
2958             LTSV::LINQ->From([1,2])->SingleOrDefault() # undef (multiple)
2959              
2960             =item B
2961              
2962             Get element at specified index. Dies if out of range.
2963              
2964             B
2965              
2966             =over 4
2967              
2968             =item * C<$index> - Zero-based index
2969              
2970             =back
2971              
2972             B Element at index
2973              
2974             B Dies if index is negative or out of range
2975              
2976             B Uses lazy evaluation (iterator-based). Does NOT load
2977             entire sequence into memory. Stops iterating once target index is reached.
2978              
2979             B
2980              
2981             ->ElementAt(0) # First element
2982             ->ElementAt(2) # Third element
2983              
2984             # Memory-efficient for large files
2985             LTSV::LINQ->FromLTSV("huge.log")->ElementAt(10) # Reads only 11 lines
2986              
2987             =item B
2988              
2989             Get element at index, or undef if out of range.
2990              
2991             B Element or undef
2992              
2993             B Uses lazy evaluation (iterator-based). Memory-efficient.
2994              
2995             B
2996              
2997             ->ElementAtOrDefault(0) # First element
2998             ->ElementAtOrDefault(99) # undef if out of range
2999              
3000             =back
3001              
3002             =head2 Aggregation Methods
3003              
3004             All aggregation methods are B - they consume the
3005             entire sequence and return a scalar value.
3006              
3007             =over 4
3008              
3009             =item B
3010              
3011             Count the number of elements.
3012              
3013             B
3014              
3015             =over 4
3016              
3017             =item * C<$predicate> - (Optional) Code reference to filter elements
3018              
3019             =back
3020              
3021             B Integer count
3022              
3023             B
3024              
3025             # Count all
3026             ->Count() # 1000
3027              
3028             # Count with condition
3029             ->Count(sub { $_[0]{status} >= 400 }) # 42
3030              
3031             # Equivalent to
3032             ->Where(sub { $_[0]{status} >= 400 })->Count()
3033              
3034             B O(n) - must iterate entire sequence
3035              
3036             =item B
3037              
3038             Calculate sum of numeric values.
3039              
3040             B
3041              
3042             =over 4
3043              
3044             =item * C<$selector> - (Optional) Code reference to extract value.
3045             Default: identity function
3046              
3047             =back
3048              
3049             B Numeric sum
3050              
3051             B
3052              
3053             # Sum of values
3054             LTSV::LINQ->From([1, 2, 3, 4, 5])->Sum() # 15
3055              
3056             # Sum of field
3057             ->Sum(sub { $_[0]{bytes} })
3058              
3059             # Sum with transformation
3060             ->Sum(sub { $_[0]{price} * $_[0]{quantity} })
3061              
3062             B Non-numeric values may produce warnings. Use numeric context.
3063              
3064             B Returns C<0>.
3065              
3066             =item B
3067              
3068             Find minimum value.
3069              
3070             B
3071              
3072             =over 4
3073              
3074             =item * C<$selector> - (Optional) Code reference to extract value
3075              
3076             =back
3077              
3078             B Minimum value, or C if sequence is empty.
3079              
3080             B
3081              
3082             # Minimum of values
3083             ->Min()
3084              
3085             # Minimum of field
3086             ->Min(sub { $_[0]{response_time} })
3087              
3088             # Oldest timestamp
3089             ->Min(sub { $_[0]{timestamp} })
3090              
3091             =item B
3092              
3093             Find maximum value.
3094              
3095             B
3096              
3097             =over 4
3098              
3099             =item * C<$selector> - (Optional) Code reference to extract value
3100              
3101             =back
3102              
3103             B Maximum value, or C if sequence is empty.
3104              
3105             B
3106              
3107             # Maximum of values
3108             ->Max()
3109              
3110             # Maximum of field
3111             ->Max(sub { $_[0]{bytes} })
3112              
3113             # Latest timestamp
3114             ->Max(sub { $_[0]{timestamp} })
3115              
3116             =item B
3117              
3118             Calculate arithmetic mean.
3119              
3120             B
3121              
3122             =over 4
3123              
3124             =item * C<$selector> - (Optional) Code reference to extract value
3125              
3126             =back
3127              
3128             B Numeric average (floating point)
3129              
3130             B
3131              
3132             # Average of values
3133             LTSV::LINQ->From([1, 2, 3, 4, 5])->Average() # 3
3134              
3135             # Average of field
3136             ->Average(sub { $_[0]{bytes} })
3137              
3138             # Average response time
3139             ->Average(sub { $_[0]{response_time} })
3140              
3141             B Dies with "Sequence contains no elements".
3142             Unlike C (returns 0) and C/C (return C), C
3143             throws on an empty sequence. Use C to avoid the exception.
3144              
3145             B Returns floating point. Use C for integer result.
3146              
3147             =item B
3148              
3149             Calculate arithmetic mean, or return undef if sequence is empty.
3150              
3151             B
3152              
3153             =over 4
3154              
3155             =item * C<$selector> - (Optional) Code reference to extract value
3156              
3157             =back
3158              
3159             B Numeric average (floating point), or undef if empty
3160              
3161             B
3162              
3163             # Safe average - returns undef for empty sequence
3164             my @empty = ();
3165             my $avg = LTSV::LINQ->From(\@empty)->AverageOrDefault(); # undef
3166              
3167             # With data
3168             LTSV::LINQ->From([1, 2, 3])->AverageOrDefault(); # 2
3169              
3170             # With selector
3171             ->AverageOrDefault(sub { $_[0]{value} })
3172              
3173             B Unlike Average(), this method never throws an exception.
3174              
3175             =item B
3176              
3177             Apply an accumulator function over a sequence.
3178              
3179             B
3180              
3181             =over 4
3182              
3183             =item * C - Use first element as seed
3184              
3185             =item * C - Explicit seed value
3186              
3187             =item * C - Transform result
3188              
3189             =back
3190              
3191             B
3192              
3193             =over 4
3194              
3195             =item * C<$seed> - Initial accumulator value (optional for first signature)
3196              
3197             =item * C<$func> - Code reference: ($accumulator, $element) -> $new_accumulator
3198              
3199             =item * C<$result_selector> - (Optional) Transform final result
3200              
3201             =back
3202              
3203             B Accumulated value
3204              
3205             B
3206              
3207             # Sum (without seed)
3208             LTSV::LINQ->From([1,2,3,4])->Aggregate(sub { $_[0] + $_[1] }) # 10
3209              
3210             # Product (with seed)
3211             LTSV::LINQ->From([2,3,4])->Aggregate(1, sub { $_[0] * $_[1] }) # 24
3212              
3213             # Concatenate strings
3214             LTSV::LINQ->From(['a','b','c'])
3215             ->Aggregate('', sub { $_[0] ? "$_[0],$_[1]" : $_[1] }) # 'a,b,c'
3216              
3217             # With result selector
3218             LTSV::LINQ->From([1,2,3])
3219             ->Aggregate(0,
3220             sub { $_[0] + $_[1] }, # accumulate
3221             sub { "Sum: $_[0]" }) # transform result
3222             # "Sum: 6"
3223              
3224             # Build complex structure
3225             ->Aggregate([], sub {
3226             my($list, $item) = @_;
3227             push @$list, uc($item);
3228             return $list;
3229             })
3230              
3231             B<.NET LINQ Compatibility:> Supports all three .NET signatures.
3232              
3233             =back
3234              
3235             =head2 Conversion Methods
3236              
3237             =over 4
3238              
3239             =item B
3240              
3241             Convert to array.
3242              
3243             my @array = $query->ToArray();
3244              
3245             =item B
3246              
3247             Convert to array reference.
3248              
3249             my $arrayref = $query->ToList();
3250              
3251             =item B
3252              
3253             Convert sequence to hash reference with unique keys.
3254              
3255             B
3256              
3257             =over 4
3258              
3259             =item * C<$key_selector> - Function to extract key from element
3260              
3261             =item * C<$value_selector> - (Optional) Function to extract value, defaults to element itself
3262              
3263             =back
3264              
3265             B Hash reference
3266              
3267             B
3268              
3269             # ID to name mapping
3270             my $users = LTSV::LINQ->From([
3271             {id => 1, name => 'Alice'},
3272             {id => 2, name => 'Bob'}
3273             ]);
3274              
3275             my $dict = $users->ToDictionary(
3276             sub { $_[0]{id} },
3277             sub { $_[0]{name} }
3278             );
3279             # {1 => 'Alice', 2 => 'Bob'}
3280              
3281             # Without value selector (stores entire element)
3282             my $dict = $users->ToDictionary(sub { $_[0]{id} });
3283             # {1 => {id => 1, name => 'Alice'}, 2 => {id => 2, name => 'Bob'}}
3284              
3285             # Quick lookup table
3286             my $status_codes = LTSV::LINQ->FromLTSV('access.log')
3287             ->Select(sub { $_[0]{status} })
3288             ->Distinct()
3289             ->ToDictionary(sub { $_ }, sub { 1 });
3290              
3291             B If duplicate keys exist, later values overwrite earlier ones.
3292              
3293             B<.NET LINQ Compatibility:> .NET's C throws C
3294             on duplicate keys. This module silently overwrites with the later value,
3295             following Perl hash semantics. Use C if you need to preserve all
3296             values for each key.
3297              
3298             =item B
3299              
3300             Convert sequence to hash reference with grouped values (multi-value dictionary).
3301              
3302             B
3303              
3304             =over 4
3305              
3306             =item * C<$key_selector> - Function to extract key from element
3307              
3308             =item * C<$value_selector> - (Optional) Function to extract value, defaults to element itself
3309              
3310             =back
3311              
3312             B Hash reference where values are array references
3313              
3314             B
3315              
3316             # Group orders by user ID
3317             my $orders = LTSV::LINQ->From([
3318             {user_id => 1, product => 'Book'},
3319             {user_id => 1, product => 'Pen'},
3320             {user_id => 2, product => 'Notebook'}
3321             ]);
3322              
3323             my $lookup = $orders->ToLookup(
3324             sub { $_[0]{user_id} },
3325             sub { $_[0]{product} }
3326             );
3327             # {
3328             # 1 => ['Book', 'Pen'],
3329             # 2 => ['Notebook']
3330             # }
3331              
3332             # Group LTSV by status code
3333             my $by_status = LTSV::LINQ->FromLTSV('access.log')
3334             ->ToLookup(sub { $_[0]{status} });
3335             # {
3336             # '200' => [{...}, {...}, ...],
3337             # '404' => [{...}, ...],
3338             # '500' => [{...}]
3339             # }
3340              
3341             B Unlike ToDictionary, this preserves all values for each key.
3342              
3343             =item B
3344              
3345             Return default value if sequence is empty, otherwise return the sequence.
3346              
3347             B
3348              
3349             =over 4
3350              
3351             =item * C<$default_value> - (Optional) Default value, defaults to undef
3352              
3353             =back
3354              
3355             B New query with default value if empty (lazy)
3356              
3357             B
3358              
3359             # Return 0 if empty
3360             ->DefaultIfEmpty(0)->ToArray() # (0) if empty, or original data
3361              
3362             # With undef default
3363             ->DefaultIfEmpty()->First() # undef if empty
3364              
3365             # Useful for left joins
3366             ->Where(condition)->DefaultIfEmpty({id => 0, name => 'None'})
3367              
3368             B This is useful for ensuring a sequence always has at least
3369             one element.
3370              
3371             =item B
3372              
3373             Write to LTSV file.
3374              
3375             $query->ToLTSV("output.ltsv");
3376              
3377             =back
3378              
3379             =head2 Utility Methods
3380              
3381             =over 4
3382              
3383             =item B
3384              
3385             Execute action for each element.
3386              
3387             $query->ForEach(sub { print $_[0]{url}, "\n" });
3388              
3389             =back
3390              
3391             =head1 EXAMPLES
3392              
3393             =head2 Basic Filtering
3394              
3395             use LTSV::LINQ;
3396              
3397             # DSL syntax
3398             my @successful = LTSV::LINQ->FromLTSV("access.log")
3399             ->Where(status => '200')
3400             ->ToArray();
3401              
3402             # Code reference
3403             my @errors = LTSV::LINQ->FromLTSV("access.log")
3404             ->Where(sub { $_[0]{status} >= 400 })
3405             ->ToArray();
3406              
3407             =head2 Aggregation
3408              
3409             # Count errors
3410             my $error_count = LTSV::LINQ->FromLTSV("access.log")
3411             ->Where(sub { $_[0]{status} >= 400 })
3412             ->Count();
3413              
3414             # Average bytes for successful requests
3415             my $avg_bytes = LTSV::LINQ->FromLTSV("access.log")
3416             ->Where(status => '200')
3417             ->Average(sub { $_[0]{bytes} });
3418              
3419             print "Average bytes: $avg_bytes\n";
3420              
3421             =head2 Grouping and Ordering
3422              
3423             # Top 10 URLs by request count
3424             my @top_urls = LTSV::LINQ->FromLTSV("access.log")
3425             ->Where(sub { $_[0]{status} eq '200' })
3426             ->GroupBy(sub { $_[0]{url} })
3427             ->Select(sub {
3428             my $g = shift;
3429             return {
3430             URL => $g->{Key},
3431             Count => scalar(@{$g->{Elements}}),
3432             TotalBytes => LTSV::LINQ->From($g->{Elements})
3433             ->Sum(sub { $_[0]{bytes} })
3434             };
3435             })
3436             ->OrderByDescending(sub { $_[0]{Count} })
3437             ->Take(10)
3438             ->ToArray();
3439              
3440             for my $stat (@top_urls) {
3441             printf "%5d requests - %s (%d bytes)\n",
3442             $stat->{Count}, $stat->{URL}, $stat->{TotalBytes};
3443             }
3444              
3445             =head2 Complex Query Chain
3446              
3447             # Multi-step analysis
3448             my @result = LTSV::LINQ->FromLTSV("access.log")
3449             ->Where(status => '200') # Filter successful
3450             ->Select(sub { $_[0]{bytes} }) # Extract bytes
3451             ->Where(sub { $_[0] > 1000 }) # Large responses only
3452             ->OrderByDescending(sub { $_[0] }) # Sort descending
3453             ->Take(100) # Top 100
3454             ->ToArray();
3455              
3456             print "Largest 100 successful responses:\n";
3457             print " ", join(", ", @result), "\n";
3458              
3459             =head2 Lazy Processing of Large Files
3460              
3461             # Process huge file with constant memory
3462             LTSV::LINQ->FromLTSV("huge.log")
3463             ->Where(sub { $_[0]{level} eq 'ERROR' })
3464             ->ForEach(sub {
3465             my $rec = shift;
3466             print "ERROR at $rec->{time}: $rec->{message}\n";
3467             });
3468              
3469             =head2 Quantifiers
3470              
3471             # Check if all requests are successful
3472             my $all_ok = LTSV::LINQ->FromLTSV("access.log")
3473             ->All(sub { $_[0]{status} < 400 });
3474              
3475             print $all_ok ? "All OK\n" : "Some errors\n";
3476              
3477             # Check if any errors exist
3478             my $has_errors = LTSV::LINQ->FromLTSV("access.log")
3479             ->Any(sub { $_[0]{status} >= 500 });
3480              
3481             print "Server errors detected\n" if $has_errors;
3482              
3483             =head2 Data Transformation
3484              
3485             # Read LTSV, transform, write back
3486             LTSV::LINQ->FromLTSV("input.ltsv")
3487             ->Select(sub {
3488             my $rec = shift;
3489             return {
3490             %$rec,
3491             processed => 1,
3492             timestamp => time(),
3493             };
3494             })
3495             ->ToLTSV("output.ltsv");
3496              
3497             =head2 Working with Arrays
3498              
3499             # Query in-memory data
3500             my @data = (
3501             {name => 'Alice', age => 30, city => 'Tokyo'},
3502             {name => 'Bob', age => 25, city => 'Osaka'},
3503             {name => 'Carol', age => 35, city => 'Tokyo'},
3504             );
3505              
3506             my @tokyo_residents = LTSV::LINQ->From(\@data)
3507             ->Where(city => 'Tokyo')
3508             ->OrderBy(sub { $_[0]{age} })
3509             ->ToArray();
3510              
3511             =head1 FEATURES
3512              
3513             =head2 Lazy Evaluation
3514              
3515             All query operations use lazy evaluation via iterators. Data is
3516             processed on-demand, not all at once.
3517              
3518             # Only reads 10 records from file
3519             my @top10 = LTSV::LINQ->FromLTSV("huge.log")
3520             ->Take(10)
3521             ->ToArray();
3522              
3523             =head2 Method Chaining
3524              
3525             All methods (except terminal operations like ToArray) return a new
3526             query object, enabling fluent method chaining.
3527              
3528             ->Where(...)->Select(...)->OrderBy(...)->Take(10)
3529              
3530             =head2 DSL Syntax
3531              
3532             Simple key-value filtering without code references.
3533              
3534             # Readable and concise
3535             ->Where(status => '200', method => 'GET')
3536              
3537             # Instead of
3538             ->Where(sub { $_[0]{status} eq '200' && $_[0]{method} eq 'GET' })
3539              
3540             =head1 ARCHITECTURE
3541              
3542             =head2 Iterator-Based Design
3543              
3544             LTSV::LINQ uses an iterator-based architecture for lazy evaluation.
3545              
3546             B
3547              
3548             Each query operation returns a new query object wrapping an iterator
3549             (a code reference that produces one element per call).
3550              
3551             my $iter = sub {
3552             # Read next element
3553             # Apply transformation
3554             # Return element or undef
3555             };
3556              
3557             my $query = LTSV::LINQ->new($iter);
3558              
3559             B
3560              
3561             =over 4
3562              
3563             =item * B - O(1) memory for most operations
3564              
3565             =item * B - Elements computed on-demand
3566              
3567             =item * B - Iterators chain naturally
3568              
3569             =item * B - Stop processing when done
3570              
3571             =back
3572              
3573             =head2 Method Categories
3574              
3575             The table below shows, for every method, whether it is lazy or eager,
3576             and what it returns. Knowing this prevents surprises about memory use
3577             and iterator consumption.
3578              
3579             Method Category Evaluation Returns
3580             ------ -------- ---------- -------
3581             From Source Lazy (factory) Query
3582             FromLTSV Source Lazy (factory) Query
3583             Range Source Lazy Query
3584             Empty Source Lazy Query
3585             Repeat Source Lazy Query
3586             Where Filter Lazy Query
3587             Select Projection Lazy Query
3588             SelectMany Projection Lazy Query
3589             Concat Concatenation Lazy Query
3590             Zip Concatenation Lazy Query
3591             Take Partitioning Lazy Query
3592             Skip Partitioning Lazy Query
3593             TakeWhile Partitioning Lazy Query
3594             SkipWhile Partitioning Lazy Query
3595             Distinct Set Operation Lazy (1st seq) Query
3596             DefaultIfEmpty Conversion Lazy Query
3597             OrderBy Ordering Eager (full) Query
3598             OrderByDescending Ordering Eager (full) Query
3599             OrderByStr Ordering Eager (full) Query
3600             OrderByStrDescending Ordering Eager (full) Query
3601             OrderByNum Ordering Eager (full) Query
3602             OrderByNumDescending Ordering Eager (full) Query
3603             Reverse Ordering Eager (full) Query
3604             GroupBy Grouping Eager (full) Query
3605             Union Set Operation Eager (2nd seq) Query
3606             Intersect Set Operation Eager (2nd seq) Query
3607             Except Set Operation Eager (2nd seq) Query
3608             Join Join Eager (inner seq) Query
3609             GroupJoin Join Eager (inner seq) Query
3610             All Quantifier Lazy (early exit) Boolean
3611             Any Quantifier Lazy (early exit) Boolean
3612             Contains Quantifier Lazy (early exit) Boolean
3613             SequenceEqual Comparison Lazy (early exit) Boolean
3614             First Element Access Lazy (early exit) Element
3615             FirstOrDefault Element Access Lazy (early exit) Element
3616             Last Element Access Eager (full) Element
3617             LastOrDefault Element Access Eager (full) Element
3618             Single Element Access Lazy (stops at 2) Element
3619             SingleOrDefault Element Access Lazy (stops at 2) Element
3620             ElementAt Element Access Lazy (early exit) Element
3621             ElementAtOrDefault Element Access Lazy (early exit) Element
3622             Count Aggregation Eager (full) Integer
3623             Sum Aggregation Eager (full) Number
3624             Min Aggregation Eager (full) Number
3625             Max Aggregation Eager (full) Number
3626             Average Aggregation Eager (full) Number
3627             AverageOrDefault Aggregation Eager (full) Number or undef
3628             Aggregate Aggregation Eager (full) Scalar
3629             ToArray Conversion Eager (full) Array
3630             ToList Conversion Eager (full) ArrayRef
3631             ToDictionary Conversion Eager (full) HashRef
3632             ToLookup Conversion Eager (full) HashRef
3633             ToLTSV Conversion Eager (full) (file written)
3634             ForEach Utility Eager (full) (void)
3635              
3636             B
3637              
3638             =over 4
3639              
3640             =item * B - returns a new Query immediately; no data is read yet.
3641              
3642             =item * B - reads only as many elements as needed, then stops.
3643              
3644             =item * B - reads until it finds a second match, then stops.
3645              
3646             =item * B - must read the entire input sequence before returning.
3647              
3648             =item * B - the indicated sequence is read
3649             in full up front; the other sequence remains lazy.
3650              
3651             =back
3652              
3653             B
3654              
3655             =over 4
3656              
3657             =item * Chain lazy operations freely - no cost until a terminal is called.
3658              
3659             =item * Each terminal operation exhausts the iterator; to reuse data, call
3660             C first and rebuild with C.
3661              
3662             =item * For very large files, avoid eager operations (C, C,
3663             C, etc.) unless the data fits in memory, or pre-filter with C
3664             to reduce the working set first.
3665              
3666             =back
3667              
3668             =head2 Query Execution Flow
3669              
3670             # Build query (lazy - no execution yet)
3671             my $query = LTSV::LINQ->FromLTSV("access.log")
3672             ->Where(status => '200') # Lazy
3673             ->Select(sub { $_[0]{url} }) # Lazy
3674             ->Distinct(); # Lazy
3675              
3676             # Execute query (terminal operation)
3677             my @results = $query->ToArray(); # Now executes entire chain
3678              
3679             B
3680              
3681             1. FromLTSV opens file and creates iterator
3682             2. Where wraps iterator with filter
3683             3. Select wraps with transformation
3684             4. Distinct wraps with deduplication
3685             5. ToArray pulls elements through chain
3686              
3687             Each element flows through the entire chain before the next element
3688             is read.
3689              
3690             =head2 Memory Characteristics
3691              
3692             B
3693              
3694             These hold at most one element in memory at a time:
3695              
3696             =over 4
3697              
3698             =item * Where, Select, SelectMany, Concat, Zip
3699              
3700             =item * Take, Skip, TakeWhile, SkipWhile
3701              
3702             =item * DefaultIfEmpty
3703              
3704             =item * ForEach, Count, Sum, Min, Max, Average, AverageOrDefault
3705              
3706             =item * First, FirstOrDefault, Any, All, Contains
3707              
3708             =item * Single, SingleOrDefault, ElementAt, ElementAtOrDefault
3709              
3710             =back
3711              
3712             B
3713              
3714             =over 4
3715              
3716             =item * Distinct - hash grows with the number of distinct keys seen
3717              
3718             =back
3719              
3720             B
3721              
3722             The following are partially eager: one sequence is buffered in full,
3723             the other is streamed:
3724              
3725             =over 4
3726              
3727             =item * Union, Intersect, Except - second sequence is fully loaded
3728              
3729             =item * Join, GroupJoin - inner sequence is fully loaded
3730              
3731             =back
3732              
3733             B
3734              
3735             =over 4
3736              
3737             =item * ToArray, ToList, ToDictionary, ToLookup, ToLTSV (O(n))
3738              
3739             =item * OrderBy, OrderByDescending and Str/Num variants, Reverse (O(n))
3740              
3741             =item * GroupBy (O(n))
3742              
3743             =item * Last, LastOrDefault (O(n))
3744              
3745             =item * Aggregate (O(n), O(1) intermediate accumulator)
3746              
3747             =back
3748              
3749             =head1 PERFORMANCE
3750              
3751             =head2 Memory Efficiency
3752              
3753             Lazy evaluation means memory usage is O(1) for most operations,
3754             regardless of input size.
3755              
3756             # Processes 1GB file with constant memory
3757             LTSV::LINQ->FromLTSV("1gb.log")
3758             ->Where(status => '500')
3759             ->ForEach(sub { print $_[0]{url}, "\n" });
3760              
3761             =head2 Terminal Operations
3762              
3763             These operations materialize the entire result set:
3764              
3765             =over 4
3766              
3767             =item * ToArray, ToList
3768              
3769             =item * OrderBy, OrderByDescending, Reverse
3770              
3771             =item * GroupBy
3772              
3773             =item * Last
3774              
3775             =back
3776              
3777             For large datasets, use these operations carefully.
3778              
3779             =head2 Optimization Tips
3780              
3781             =over 4
3782              
3783             =item * Filter early: Place Where clauses first
3784              
3785             # Good: Filter before expensive operations
3786             ->Where(status => '200')->OrderBy(...)->Take(10)
3787              
3788             # Bad: Order all data, then filter
3789             ->OrderBy(...)->Where(status => '200')->Take(10)
3790              
3791             =item * Limit early: Use Take to reduce processing
3792              
3793             # Process only what you need
3794             ->Take(1000)->GroupBy(...)
3795              
3796             =item * Avoid repeated ToArray: Reuse results
3797              
3798             # Bad: Calls ToArray twice
3799             my $count = scalar($query->ToArray());
3800             my @items = $query->ToArray();
3801              
3802             # Good: Call once, reuse
3803             my @items = $query->ToArray();
3804             my $count = scalar(@items);
3805              
3806             =back
3807              
3808             =head1 COMPATIBILITY
3809              
3810             =head2 Perl Version Support
3811              
3812             This module is compatible with B.
3813              
3814             Tested on:
3815              
3816             =over 4
3817              
3818             =item * Perl 5.005_03 (released 1999)
3819              
3820             =item * Perl 5.6.x
3821              
3822             =item * Perl 5.8.x
3823              
3824             =item * Perl 5.10.x - 5.42.x
3825              
3826             =back
3827              
3828             =head2 Compatibility Policy
3829              
3830             B
3831              
3832             This module maintains compatibility with Perl 5.005_03 through careful
3833             coding practices:
3834              
3835             =over 4
3836              
3837             =item * No use of features introduced after 5.005
3838              
3839             =item * C compatibility shim for pre-5.6
3840              
3841             =item * C keyword avoided (5.6+ feature)
3842              
3843             =item * Three-argument C used on Perl 5.6 and later (two-argument form retained for 5.005_03)
3844              
3845             =item * No Unicode features required
3846              
3847             =item * No module dependencies beyond core
3848              
3849             =back
3850              
3851             B
3852              
3853             This module adheres to the B, which was the
3854             final version of JPerl (Japanese Perl). This is not about using the old
3855             interpreter, but about maintaining the B
3856             that made Perl enjoyable.
3857              
3858             B
3859              
3860             Some people think the strength of modern times is the ability to use
3861             modern technology. That thinking is insufficient. The strength of modern
3862             times is the ability to use B technology up to the present day.
3863              
3864             By adhering to the Perl 5.005_03 specification, we gain access to the
3865             entire history of Perl--from 5.005_03 to 5.42 and beyond--rather than
3866             limiting ourselves to only the latest versions.
3867              
3868             Key reasons:
3869              
3870             =over 4
3871              
3872             =item * B - The original Perl approach keeps programming fun and easy
3873              
3874             Perl 5.6 and later introduced character encoding complexity that made
3875             programming harder. The confusion around character handling contributed
3876             to Perl's decline. By staying with the 5.005_03 specification, we maintain
3877             the simplicity that made Perl "rakuda" (camel) -> "raku" (easy/fun).
3878              
3879             =item * B - Preserves the last JPerl version
3880              
3881             Perl 5.005_03 was the final version of JPerl, which handled Japanese text
3882             naturally. Later versions abandoned this approach for Unicode, adding
3883             unnecessary complexity for many use cases.
3884              
3885             =item * B - Runs on ANY Perl version
3886              
3887             Code written to the 5.005_03 specification runs on B Perl versions
3888             from 5.005_03 through 5.42 and beyond. This maximizes compatibility across
3889             two decades of Perl releases.
3890              
3891             =item * B - Real-world enterprise needs
3892              
3893             Many production systems, embedded environments, and enterprise deployments
3894             still run Perl 5.005, 5.6, or 5.8. This module provides modern query
3895             capabilities without requiring upgrades.
3896              
3897             =item * B - Programming should be enjoyable
3898              
3899             As readers of the "Camel Book" (Programming Perl) know, Perl was designed
3900             to make programming enjoyable. The 5.005_03 specification preserves this
3901             original vision.
3902              
3903             =back
3904              
3905             B
3906              
3907             All modules under the ina CPAN account (including mb, Jacode, UTF8-R2,
3908             mb-JSON, and this module) follow this principle: Write to the Perl 5.005_03
3909             specification, test on all versions, maintain programming joy.
3910              
3911             This is not nostalgia--it's a commitment to:
3912              
3913             =over 4
3914              
3915             =item * Simple, maintainable code
3916              
3917             =item * Maximum compatibility
3918              
3919             =item * The original Perl philosophy
3920              
3921             =item * Making programming "raku" (easy and fun)
3922              
3923             =back
3924              
3925             B
3926              
3927             This module uses C instead of traditional make, since Perl 5.005_03
3928             on Microsoft Windows lacks make. All tests pass on Perl 5.005_03 through
3929             modern versions.
3930              
3931             =head2 .NET LINQ Compatibility
3932              
3933             This section documents where LTSV::LINQ's behaviour matches .NET LINQ
3934             exactly, where it intentionally differs, and where it cannot differ due
3935             to Perl's type system.
3936              
3937             B
3938              
3939             =over 4
3940              
3941             =item * C - throws when sequence is empty or has more than one element
3942              
3943             =item * C, C - throw when sequence is empty or no element matches
3944              
3945             =item * C and C
3946             - matching 2- and 3-argument forms
3947              
3948             =item * C - groups are returned in insertion order (first-seen key order)
3949              
3950             =item * C - every outer element appears even with zero inner matches
3951              
3952             =item * C - inner join semantics; unmatched outer elements are dropped
3953              
3954             =item * C / C / C - partially eager (second/inner
3955             sequence buffered up front), matching .NET's hash-join approach
3956              
3957             =item * C, C, C, C - identical semantics
3958              
3959             =item * C / C with early exit
3960              
3961             =back
3962              
3963             B
3964              
3965             =over 4
3966              
3967             =item * C
3968              
3969             .NET throws C when the sequence contains more
3970             than one element. LTSV::LINQ returns C instead. This makes it
3971             more natural in Perl code that checks return values with C.
3972              
3973             If you require strict .NET behaviour (exception on multiple elements),
3974             use C inside an C:
3975              
3976             my $val = eval { $query->Single() };
3977             # $val is undef and $@ is set if empty or multiple
3978              
3979             =item * C
3980              
3981             .NET's C can return a sequence containing C
3982             (the reference-type default). LTSV::LINQ cannot: the iterator protocol
3983             uses C to signal end-of-sequence, so a default value of C
3984             is indistinguishable from EOF and is silently lost.
3985              
3986             # .NET: seq.DefaultIfEmpty() produces one null element
3987             # Perl:
3988             LTSV::LINQ->From([])->DefaultIfEmpty(undef)->ToArray() # () - empty!
3989             LTSV::LINQ->From([])->DefaultIfEmpty(0)->ToArray() # (0) - works
3990              
3991             Use a sentinel value (C<0>, C<''>, C<{}>) and handle it explicitly.
3992              
3993             =item * C smart comparison
3994              
3995             .NET's C is strongly typed: the key type determines the
3996             comparison. In Perl there is no static type, so LTSV::LINQ's C
3997             uses a heuristic: if both keys look like numbers, C=E> is used;
3998             otherwise C. For explicit control, use C (always C)
3999             or C (always C=E>).
4000              
4001             =item * EqualityComparer / IComparer
4002              
4003             .NET LINQ accepts C and C interface objects
4004             for custom equality and ordering. LTSV::LINQ uses code references (C)
4005             that extract a I from each element. This is equivalent in power but
4006             different in calling convention: the sub receives one element and returns a
4007             key, rather than receiving two elements and returning a comparison result.
4008              
4009             =item * C on typed sequences
4010              
4011             .NET's C is type-checked. LTSV::LINQ accepts any two sequences
4012             regardless of element type.
4013              
4014             =item * No query expression syntax
4015              
4016             .NET's C syntax compiles to LINQ
4017             method calls. Perl has no equivalent; use method chaining directly.
4018              
4019             =back
4020              
4021             =head2 Pure Perl Implementation
4022              
4023             B
4024              
4025             This module is implemented in Pure Perl with no XS (C extensions).
4026             Benefits:
4027              
4028             =over 4
4029              
4030             =item * Works on any Perl installation
4031              
4032             =item * No C compiler required
4033              
4034             =item * Easy installation in restricted environments
4035              
4036             =item * Consistent behavior across platforms
4037              
4038             =item * Simpler debugging and maintenance
4039              
4040             =back
4041              
4042             =head2 Core Module Dependencies
4043              
4044             B This module uses only Perl core features available since 5.005.
4045              
4046             No CPAN dependencies required.
4047              
4048             =head1 DIAGNOSTICS
4049              
4050             =head2 Error Messages
4051              
4052             This module may throw the following exceptions:
4053              
4054             =over 4
4055              
4056             =item C
4057              
4058             Thrown by From() when the argument is not an array reference.
4059              
4060             Example:
4061              
4062             LTSV::LINQ->From("string"); # Dies
4063             LTSV::LINQ->From([1, 2, 3]); # OK
4064              
4065             =item C
4066              
4067             Thrown by SelectMany() when the selector function returns anything
4068             other than an ARRAY reference. Wrap the return value in C<[...]>:
4069              
4070             # Wrong - hashref causes die
4071             ->SelectMany(sub { {key => 'val'} })
4072              
4073             # Correct - arrayref
4074             ->SelectMany(sub { [{key => 'val'}] })
4075              
4076             # Correct - empty array for "no results" case
4077             ->SelectMany(sub { [] })
4078              
4079             =item C
4080              
4081             Thrown by First(), Last(), or Average() when called on an empty sequence.
4082              
4083             Methods that throw this error:
4084              
4085             =over 4
4086              
4087             =item * First()
4088              
4089             =item * Last()
4090              
4091             =item * Average()
4092              
4093             =back
4094              
4095             To avoid this error, use the OrDefault variants:
4096              
4097             =over 4
4098              
4099             =item * FirstOrDefault() - returns undef instead of dying
4100              
4101             =item * LastOrDefault() - returns undef instead of dying
4102              
4103             =item * AverageOrDefault() - returns undef instead of dying
4104              
4105             =back
4106              
4107             Example:
4108              
4109             my @empty = ();
4110             LTSV::LINQ->From(\@empty)->First(); # Dies
4111             LTSV::LINQ->From(\@empty)->FirstOrDefault(); # Returns undef
4112              
4113             =item C
4114              
4115             Thrown by First() or Last() with a predicate when no element matches.
4116              
4117             Example:
4118              
4119             my @data = (1, 2, 3);
4120             LTSV::LINQ->From(\@data)->First(sub { $_[0] > 10 }); # Dies
4121             LTSV::LINQ->From(\@data)->FirstOrDefault(sub { $_[0] > 10 }); # Returns undef
4122              
4123             =item C
4124              
4125             File I/O error when FromLTSV() cannot open the specified file.
4126              
4127             Common causes:
4128              
4129             =over 4
4130              
4131             =item * File does not exist
4132              
4133             =item * Insufficient permissions
4134              
4135             =item * Invalid path
4136              
4137             =back
4138              
4139             Example:
4140              
4141             LTSV::LINQ->FromLTSV("/nonexistent/file.ltsv"); # Dies with this error
4142              
4143             =back
4144              
4145             =head2 Methods That May Throw Exceptions
4146              
4147             =over 4
4148              
4149             =item B
4150              
4151             Dies if argument is not an array reference.
4152              
4153             =item B
4154              
4155             Dies if file cannot be opened.
4156              
4157             B The file handle is held open until the iterator is fully
4158             consumed. Partially consumed queries keep their file handles open.
4159             See C in L for details.
4160              
4161             =item B
4162              
4163             Dies if sequence is empty or no element matches predicate.
4164              
4165             Safe alternative: FirstOrDefault()
4166              
4167             =item B
4168              
4169             Dies if sequence is empty or no element matches predicate.
4170              
4171             Safe alternative: LastOrDefault()
4172              
4173             =item B
4174              
4175             Dies if sequence is empty.
4176              
4177             Safe alternative: AverageOrDefault()
4178              
4179             =back
4180              
4181             =head2 Safe Alternatives
4182              
4183             For methods that may throw exceptions, use the OrDefault variants:
4184              
4185             First() -> FirstOrDefault() (returns undef)
4186             Last() -> LastOrDefault() (returns undef)
4187             Average() -> AverageOrDefault() (returns undef)
4188              
4189             Example:
4190              
4191             # Unsafe - may die
4192             my $first = LTSV::LINQ->From(\@data)->First();
4193              
4194             # Safe - returns undef if empty
4195             my $first = LTSV::LINQ->From(\@data)->FirstOrDefault();
4196             if (defined $first) {
4197             # Process $first
4198             }
4199              
4200             =head2 Exception Format and Stack Traces
4201              
4202             All exceptions thrown by this module are plain strings produced by
4203             C. Because no trailing newline is appended, Perl
4204             automatically appends the source location:
4205              
4206             Sequence contains no elements at lib/LTSV/LINQ.pm line 764.
4207              
4208             This is intentional: the location helps when diagnosing unexpected
4209             failures during development.
4210              
4211             When catching exceptions with C, the full string including the
4212             location suffix is available in C<$@>. Use a prefix match if you want
4213             to test only the message text:
4214              
4215             eval { LTSV::LINQ->From([])->First() };
4216             if ($@ =~ /^Sequence contains no elements/) {
4217             # handle empty sequence
4218             }
4219              
4220             If you prefer exceptions without the location suffix, wrap the call
4221             in a thin eval and re-die with a newline:
4222              
4223             eval { $result = $query->First() };
4224             die "$@\n" if $@; # strip " at ... line N" from the message
4225              
4226             =head1 FAQ
4227              
4228             =head2 General Questions
4229              
4230             =over 4
4231              
4232             =item B
4233              
4234             A: LINQ provides:
4235              
4236             =over 4
4237              
4238             =item * Method chaining (more Perl-like)
4239              
4240             =item * Type safety through code
4241              
4242             =item * No string parsing required
4243              
4244             =item * Composable queries
4245              
4246             =back
4247              
4248             =item B
4249              
4250             A: No. Query objects use iterators that can only be consumed once.
4251              
4252             # Wrong - iterator consumed by first ToArray
4253             my $query = LTSV::LINQ->FromLTSV("file.ltsv");
4254             my @first = $query->ToArray(); # OK
4255             my @second = $query->ToArray(); # Empty! Iterator exhausted
4256              
4257             # Right - create new query for each use
4258             my $query1 = LTSV::LINQ->FromLTSV("file.ltsv");
4259             my @first = $query1->ToArray();
4260              
4261             my $query2 = LTSV::LINQ->FromLTSV("file.ltsv");
4262             my @second = $query2->ToArray();
4263              
4264             =item B
4265              
4266             A: Use code reference form with C<||>:
4267              
4268             # OR condition requires code reference
4269             ->Where(sub {
4270             $_[0]{status} == 200 || $_[0]{status} == 304
4271             })
4272              
4273             # DSL only supports AND
4274             ->Where(status => '200') # Single condition only
4275              
4276             =item B
4277              
4278             A: Some operations require multiple passes:
4279              
4280             # This reads the file TWICE
4281             my $avg = $query->Average(...); # Pass 1: Calculate
4282             my @all = $query->ToArray(); # Pass 2: Collect (iterator reset!)
4283              
4284             # Save result instead
4285             my @all = $query->ToArray();
4286             my $avg = LTSV::LINQ->From(\@all)->Average(...);
4287              
4288             =back
4289              
4290             =head2 Performance Questions
4291              
4292             =over 4
4293              
4294             =item B
4295              
4296             A: Use lazy operations and avoid materializing:
4297              
4298             # Good - constant memory
4299             LTSV::LINQ->FromLTSV("huge.log")
4300             ->Where(status => '500')
4301             ->ForEach(sub { print $_[0]{message}, "\n" });
4302              
4303             # Bad - loads everything into memory
4304             my @all = LTSV::LINQ->FromLTSV("huge.log")->ToArray();
4305              
4306             =item B
4307              
4308             A: OrderBy must load all elements into memory to sort them.
4309              
4310             # Slow on 1GB file - loads everything
4311             ->OrderBy(sub { $_[0]{timestamp} })->Take(10)
4312              
4313             # Faster - limit before sorting (if possible)
4314             ->Where(status => '500')->OrderBy(...)->Take(10)
4315              
4316             =item B
4317              
4318             A: Use ForEach or streaming terminal operations:
4319              
4320             # Process 100GB file with 1KB memory
4321             my $error_count = 0;
4322             LTSV::LINQ->FromLTSV("100gb.log")
4323             ->Where(sub { $_[0]{level} eq 'ERROR' })
4324             ->ForEach(sub { $error_count++ });
4325              
4326             print "Errors: $error_count\n";
4327              
4328             =back
4329              
4330             =head2 DSL Questions
4331              
4332             =over 4
4333              
4334             =item B
4335              
4336             A: No. DSL uses string equality (C). Use code reference for numeric:
4337              
4338             # DSL - string comparison
4339             ->Where(status => '200') # $_[0]{status} eq '200'
4340              
4341             # Code ref - numeric comparison
4342             ->Where(sub { $_[0]{status} == 200 })
4343             ->Where(sub { $_[0]{bytes} > 1000 })
4344              
4345             =item B
4346              
4347             A: DSL doesn't support it. Use code reference:
4348              
4349             # Case-insensitive requires code reference
4350             ->Where(sub { lc($_[0]{method}) eq 'get' })
4351              
4352             =item B
4353              
4354             A: No. Use code reference:
4355              
4356             # Regex requires code reference
4357             ->Where(sub { $_[0]{url} =~ m{^/api/} })
4358              
4359             =back
4360              
4361             =head2 Compatibility Questions
4362              
4363             =over 4
4364              
4365             =item B
4366              
4367             A: Yes. Tested on Perl 5.005_03 through 5.40+.
4368              
4369             =item B
4370              
4371             A: No. Pure Perl with no dependencies beyond core.
4372              
4373             =item B
4374              
4375             A: Yes. Pure Perl works on all platforms.
4376              
4377             =item B
4378              
4379             A: Many production systems cannot upgrade. This module provides
4380             modern query capabilities without requiring upgrades.
4381              
4382             =back
4383              
4384             =head1 COOKBOOK
4385              
4386             =head2 Common Patterns
4387              
4388             =over 4
4389              
4390             =item B
4391              
4392             ->OrderByDescending(sub { $_[0]{score} })
4393             ->Take(10)
4394             ->ToArray()
4395              
4396             =item B
4397              
4398             ->GroupBy(sub { $_[0]{category} })
4399             ->Select(sub {
4400             {
4401             Category => $_[0]{Key},
4402             Count => scalar(@{$_[0]{Elements}})
4403             }
4404             })
4405             ->ToArray()
4406              
4407             =item B
4408              
4409             my $total = 0;
4410             ->Select(sub {
4411             $total += $_[0]{amount};
4412             { %{$_[0]}, running_total => $total }
4413             })
4414              
4415             =item B
4416              
4417             # Page 3, size 20
4418             ->Skip(40)->Take(20)->ToArray()
4419              
4420             =item B
4421              
4422             ->Select(sub { $_[0]{category} })
4423             ->Distinct()
4424             ->ToArray()
4425              
4426             =item B
4427              
4428             Note: A query object can only be consumed once. To compute multiple
4429             aggregations over the same source, materialise it first with C.
4430              
4431             my @all = LTSV::LINQ->FromLTSV("access.log")->ToArray();
4432              
4433             my $success_avg = LTSV::LINQ->From(\@all)
4434             ->Where(status => '200')
4435             ->Average(sub { $_[0]{response_time} });
4436              
4437             my $error_avg = LTSV::LINQ->From(\@all)
4438             ->Where(sub { $_[0]{status} >= 400 })
4439             ->Average(sub { $_[0]{response_time} });
4440              
4441             =item B
4442              
4443             A query object wraps a single-pass iterator. Once consumed, it is
4444             exhausted and subsequent terminal operations return empty results or die.
4445              
4446             # WRONG - $q is exhausted after the first Count()
4447             my $q = LTSV::LINQ->FromLTSV("access.log")->Where(status => '200');
4448             my $n = $q->Count(); # OK
4449             my $first = $q->First(); # WRONG: iterator already at EOF
4450              
4451             # RIGHT - snapshot into array, then query as many times as needed
4452             my @rows = LTSV::LINQ->FromLTSV("access.log")->Where(status => '200')->ToArray();
4453             my $n = LTSV::LINQ->From(\@rows)->Count();
4454             my $first = LTSV::LINQ->From(\@rows)->First();
4455              
4456             The snapshot approach is also the correct pattern for any multi-pass
4457             computation such as computing both average and standard deviation,
4458             comparing the same sequence against two different filters, or iterating
4459             once to validate and once to transform.
4460              
4461             =item B
4462              
4463             For files too large to fit in memory, keep the chain fully lazy by
4464             ensuring only one terminal operation is performed per pass:
4465              
4466             # One pass - pick only what you need
4467             my @slow = LTSV::LINQ->FromLTSV("access.log")
4468             ->Where(sub { $_[0]{response_time} > 1000 })
4469             ->OrderByNum(sub { $_[0]{response_time} })
4470             ->Take(20)
4471             ->ToArray();
4472              
4473             # Never do two passes on the same FromLTSV object -
4474             # open the file again for a second pass:
4475             my $count = LTSV::LINQ->FromLTSV("access.log")->Count();
4476             my $sum = LTSV::LINQ->FromLTSV("access.log")
4477             ->Sum(sub { $_[0]{bytes} });
4478              
4479             =back
4480              
4481             =head1 DESIGN PHILOSOPHY
4482              
4483             =head2 Historical Compatibility: Perl 5.005_03
4484              
4485             This module maintains compatibility with Perl 5.005_03 (released 1999-03-28),
4486             following the B.
4487              
4488             B
4489              
4490             =over 4
4491              
4492             =item * B
4493              
4494             Code written in 1998-era Perl should still run in 2026 and beyond.
4495             This demonstrates Perl's commitment to backwards compatibility.
4496              
4497             =item * B
4498              
4499             Some production systems, embedded devices, and enterprise environments
4500             cannot easily upgrade Perl. Maintaining compatibility ensures this module
4501             remains useful in those contexts.
4502              
4503             =item * B
4504              
4505             By avoiding modern Perl features, this module has zero non-core dependencies.
4506             It works with only the Perl core that has existed since 1999.
4507              
4508             =back
4509              
4510             B
4511              
4512             =over 4
4513              
4514             =item * No C keyword - uses package variables
4515              
4516             =item * No C pragma - uses C
4517              
4518             =item * No C improvements from 5.6+
4519              
4520             =item * All features implemented with Perl 5.005-era constructs
4521              
4522             =back
4523              
4524             The code comment C<# use 5.008001; # Lancaster Consensus 2013 for toolchains>
4525             marks where modern code would typically start. We intentionally stay below
4526             this line.
4527              
4528             =head2 US-ASCII Only Policy
4529              
4530             All source code is strictly US-ASCII (bytes 0x00-0x7F). No UTF-8, no
4531             extended characters.
4532              
4533             B
4534              
4535             =over 4
4536              
4537             =item * B
4538              
4539             US-ASCII works everywhere - ancient terminals, modern IDEs, web browsers,
4540             email systems. No encoding issues, ever.
4541              
4542             =item * B
4543              
4544             The code behaves identically regardless of system locale settings.
4545              
4546             =item * B
4547              
4548             Source code (ASCII) vs. data (any encoding). The module processes LTSV
4549             data in any encoding, but its own code remains pure ASCII.
4550              
4551             =back
4552              
4553             This policy is verified by C.
4554              
4555             =head2 The C<$VERSION = $VERSION> Idiom
4556              
4557             You may notice:
4558              
4559             $VERSION = '1.05';
4560             $VERSION = $VERSION;
4561              
4562             This is B, not a typo. Under C, a variable used
4563             only once triggers a warning. The self-assignment ensures C<$VERSION>
4564             appears twice, silencing the warning without requiring C (which
4565             doesn't exist in Perl 5.005).
4566              
4567             This is a well-known idiom from the pre-C era.
4568              
4569             =head2 Design Principles
4570              
4571             =over 4
4572              
4573             =item * B
4574              
4575             Operations return query objects, not arrays. Data is processed on-demand
4576             when terminal operations (C, C, etc.) are called.
4577              
4578             =item * B
4579              
4580             All query operations return new query objects, enabling fluent syntax:
4581              
4582             $query->Where(...)->Select(...)->OrderBy(...)->ToArray()
4583              
4584             =item * B
4585              
4586             Query operations never modify the source data. They create new lazy
4587             iterators.
4588              
4589             =item * B
4590              
4591             We follow LINQ's method names and semantics, but use Perl idioms for
4592             implementation (closures for iterators, hash refs for records).
4593              
4594             =item * B
4595              
4596             This module has zero non-core dependencies. It works with only the Perl
4597             core that has existed since 1999. Even C is optional (stubbed
4598             for Perl E 5.6). This ensures installation succeeds on minimal Perl
4599             installations, avoids dependency chain vulnerabilities, and provides
4600             permanence - the code will work decades into the future.
4601              
4602             =back
4603              
4604             =head1 LIMITATIONS AND KNOWN ISSUES
4605              
4606             =head2 Current Limitations
4607              
4608             =over 4
4609              
4610             =item * B
4611              
4612             Query objects can only be consumed once. The iterator is exhausted
4613             after terminal operations.
4614              
4615             Workaround: Create new query object or save ToArray() result.
4616              
4617             =item * B
4618              
4619             Due to iterator-based design, undef cannot be distinguished from end-of-sequence.
4620             Sequences containing undef values may not work correctly with all operations.
4621              
4622             This is not a practical limitation for LTSV data (which uses hash references),
4623             but affects operations on plain arrays containing undef.
4624              
4625             # Works fine (LTSV data - hash references)
4626             LTSV::LINQ->FromLTSV("file.ltsv")->Contains({status => '200'})
4627              
4628             # Limitation (plain array with undef)
4629             LTSV::LINQ->From([1, undef, 3])->Contains(undef) # May not work
4630              
4631             =item * B
4632              
4633             All operations execute sequentially in a single thread.
4634              
4635             =item * B
4636              
4637             All filtering requires full scan. No index optimization.
4638              
4639             =item * B
4640              
4641             Distinct with custom comparer uses stringified keys. May not work
4642             correctly for complex objects.
4643              
4644             =item * B
4645              
4646             Because the iterator protocol uses C to signal end-of-sequence,
4647             C cannot reliably deliver its C default
4648             to downstream operations.
4649              
4650             # Works correctly (non-undef default)
4651             LTSV::LINQ->From([])->DefaultIfEmpty(0)->ToArray() # (0)
4652             LTSV::LINQ->From([])->DefaultIfEmpty({})->ToArray() # ({})
4653              
4654             # Does NOT work (undef default is indistinguishable from EOF)
4655             LTSV::LINQ->From([])->DefaultIfEmpty(undef)->ToArray() # () - empty!
4656              
4657             Workaround: Use a sentinel value such as C<0>, C<''>, or C<{}> instead
4658             of C, and treat it as "no element" after the fact.
4659              
4660             =back
4661              
4662             =head2 Not Implemented
4663              
4664             The following LINQ methods from the .NET standard library are intentionally
4665             not implemented in LTSV::LINQ. This section explains the design rationale
4666             for each omission.
4667              
4668             =head3 Parallel LINQ (PLINQ) Methods
4669              
4670             The following methods belong to B, the .NET
4671             parallel-execution extension to LINQ introduced in .NET 4.0. They exist
4672             to distribute query execution across multiple CPU cores using the .NET
4673             Thread Pool and Task Parallel Library.
4674              
4675             Perl does not have native shared-memory multithreading that maps onto
4676             this execution model. Perl threads (C) copy the interpreter
4677             state and communicate through shared variables, making them unsuitable
4678             for the fine-grained, automatic work-stealing parallelism that PLINQ
4679             provides. LTSV::LINQ's iterator-based design assumes a single sequential
4680             execution context; introducing PLINQ semantics would require a completely
4681             different architecture and would add heavy dependencies.
4682              
4683             Furthermore, the primary use case for LTSV::LINQ -- parsing and querying
4684             LTSV log files -- is typically I/O-bound rather than CPU-bound.
4685             Parallelizing I/O over a single file provides little benefit and
4686             considerable complexity.
4687              
4688             For these reasons, the entire PLINQ surface is omitted:
4689              
4690             =over 4
4691              
4692             =item * B
4693              
4694             Entry point for PLINQ. Converts an C> into a
4695             C> that the .NET runtime executes in parallel using
4696             multiple threads. Not applicable: Perl lacks the runtime infrastructure.
4697              
4698             =item * B
4699              
4700             Converts a C> back to a sequential C>,
4701             forcing subsequent operators to run on a single thread. Since
4702             C is not implemented, C has no counterpart
4703             to convert from.
4704              
4705             =item * B
4706              
4707             Instructs PLINQ to preserve the source order in the output even during
4708             parallel execution. This is a hint to the PLINQ scheduler; it does not
4709             exist outside of PLINQ. Not applicable.
4710              
4711             =item * B
4712              
4713             Instructs PLINQ that output order does not need to match source order,
4714             potentially allowing more efficient parallel execution. Not applicable.
4715              
4716             =item * B
4717              
4718             PLINQ terminal operator that applies an action to each element in
4719             parallel, without collecting results. It is the parallel equivalent of
4720             C. LTSV::LINQ provides C for sequential iteration.
4721             A parallel C is not applicable.
4722              
4723             =item * B
4724              
4725             Attaches a .NET C to a C>, allowing
4726             cooperative cancellation of a running parallel query. Cancellation tokens
4727             are a .NET threading primitive. Not applicable.
4728              
4729             =item * B
4730              
4731             Sets the maximum number of concurrent tasks that PLINQ may use. A
4732             tuning knob for the PLINQ scheduler. Not applicable.
4733              
4734             =item * B
4735              
4736             Controls whether PLINQ may choose sequential execution for efficiency
4737             (C) or is forced to parallelize (C). Not
4738             applicable.
4739              
4740             =item * B
4741              
4742             Controls how PLINQ merges results from parallel partitions back into the
4743             output stream (buffered, auto-buffered, or not-buffered). Not applicable.
4744              
4745             =back
4746              
4747             =head3 .NET Type System Methods
4748              
4749             The following methods are specific to .NET's static type system. They
4750             exist to work with .NET generics and interface hierarchies, which have
4751             no Perl equivalent.
4752              
4753             =over 4
4754              
4755             =item * B
4756              
4757             Casts each element of a non-generic C to a specified type
4758             C, returning C>. In .NET, C> is needed when
4759             working with legacy APIs that return C (without a type
4760             parameter) and you need to treat the elements as a specific type.
4761              
4762             Perl is dynamically typed. Every Perl value already holds type
4763             information at runtime (scalar, reference, blessed object), and Perl
4764             does not have a concept of a "non-generic enumerable" that needs to be
4765             explicitly cast before it can be queried. There is no meaningful
4766             operation to implement.
4767              
4768             =item * B
4769              
4770             Filters elements of a non-generic C, returning only those
4771             that can be successfully cast to a specified type C. Like C,
4772             it exists to bridge generic and non-generic .NET APIs.
4773              
4774             In LTSV::LINQ, all records from C are hash references.
4775             Records from C are whatever the caller puts in the array.
4776             Perl's C, C, or a C predicate can
4777             perform any type-based filtering the caller needs. A dedicated
4778             C adds no expressiveness.
4779              
4780             # Perl equivalent of OfType for blessed objects of class "Foo":
4781             $query->Where(sub { ref($_[0]) && $_[0]->isa('Foo') })
4782              
4783             =back
4784              
4785             =head3 64-bit and Large-Count Methods
4786              
4787             =over 4
4788              
4789             =item * B
4790              
4791             Returns the number of elements as a 64-bit integer (C in .NET).
4792             On 32-bit .NET platforms, a sequence can theoretically contain more than
4793             C<2**31 - 1> (~2 billion) elements, which would overflow C; hence
4794             the need for C.
4795              
4796             In Perl, integers are represented as native signed integers or floating-
4797             point doubles (C). On 64-bit Perl (which is universal in practice
4798             today), the native integer type is 64 bits, so C already handles
4799             any realistic sequence length. On 32-bit Perl, the floating-point C
4800             provides 53 bits of integer precision (~9 quadrillion), far exceeding
4801             any in-memory sequence. There is no semantic gap between C and
4802             C in Perl.
4803              
4804             =back
4805              
4806             =head3 IEnumerable Conversion Method
4807              
4808             =over 4
4809              
4810             =item * B
4811              
4812             In .NET, C> is used to force evaluation of a query as
4813             C> rather than, for example, C> (which
4814             might be translated to SQL). It is a type-cast at the interface level,
4815             not a data transformation.
4816              
4817             LTSV::LINQ has only one query type: C. There is no
4818             C counterpart that would benefit from being downgraded to
4819             C. The method has no meaningful semantics to implement.
4820              
4821             =back
4822              
4823             =head1 BUGS
4824              
4825             Please report any bugs or feature requests to:
4826              
4827             =over 4
4828              
4829             =item * Email: C
4830              
4831             =back
4832              
4833             =head1 SUPPORT
4834              
4835             =head2 Documentation
4836              
4837             Full documentation is available via:
4838              
4839             perldoc LTSV::LINQ
4840              
4841             =head2 CPAN
4842              
4843             https://metacpan.org/pod/LTSV::LINQ
4844              
4845             =head1 SEE ALSO
4846              
4847             =over 4
4848              
4849             =item * LTSV specification
4850              
4851             http://ltsv.org/
4852              
4853             =item * Microsoft LINQ documentation
4854              
4855             https://learn.microsoft.com/en-us/dotnet/csharp/linq/
4856              
4857             =back
4858              
4859             =head1 AUTHOR
4860              
4861             INABA Hitoshi Eina@cpan.orgE
4862              
4863             =head2 Contributors
4864              
4865             Contributions are welcome! See file: CONTRIBUTING.
4866              
4867             =head1 ACKNOWLEDGEMENTS
4868              
4869             =head2 LINQ Technology
4870              
4871             This module is inspired by LINQ (Language Integrated Query), which was
4872             developed by Microsoft Corporation for the .NET Framework.
4873              
4874             LINQ(R) is a registered trademark of Microsoft Corporation.
4875              
4876             We are grateful to Microsoft for pioneering the LINQ technology and
4877             making it a widely recognized programming pattern. The elegance and
4878             power of LINQ has influenced query interfaces across many programming
4879             languages, and this module brings that same capability to LTSV data
4880             processing in Perl.
4881              
4882             This module is not affiliated with, endorsed by, or sponsored by
4883             Microsoft Corporation.
4884              
4885             =head2 References
4886              
4887             This module was inspired by:
4888              
4889             =over 4
4890              
4891             =item * Microsoft LINQ (Language Integrated Query)
4892              
4893             L
4894              
4895             =item * LTSV specification
4896              
4897             L
4898              
4899             =back
4900              
4901             =head1 COPYRIGHT AND LICENSE
4902              
4903             Copyright (c) 2026 INABA Hitoshi
4904              
4905             This library is free software; you can redistribute it and/or modify
4906             it under the same terms as Perl itself.
4907              
4908             =head2 License Details
4909              
4910             This module is released under the same license as Perl itself:
4911              
4912             =over 4
4913              
4914             =item * Artistic License 1.0
4915              
4916             L
4917              
4918             =item * GNU General Public License version 1 or later
4919              
4920             L
4921              
4922             =back
4923              
4924             You may choose either license.
4925              
4926             =head1 DISCLAIMER OF WARRANTY
4927              
4928             BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
4929             FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT
4930             WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER
4931             PARTIES PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND,
4932             EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
4933             WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
4934             THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS
4935             WITH YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF
4936             ALL NECESSARY SERVICING, REPAIR, OR CORRECTION.
4937              
4938             IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
4939             WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
4940             REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE LIABLE
4941             TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, OR
4942             CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
4943             SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
4944             RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
4945             FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
4946             SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
4947             SUCH DAMAGES.
4948              
4949             =cut