|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Parse::ExuberantCTags::Merge;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
47439
 | 
 use 5.006001;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
    | 
| 
4
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
11
 | 
 use strict;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
    | 
| 
5
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
9
 | 
 use warnings;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '1.01';  | 
| 
8
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
10
 | 
 use constant DEBUG => 0;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
174
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
10
 | 
 use constant SMALL_DEFAULT       => 2**22;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
    | 
| 
11
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
10
 | 
 use constant SUPER_SMALL_DEFAULT => 2**17;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
87
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
13
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
11
 | 
 use constant FILENAME            => 0;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90
 | 
    | 
| 
14
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
7
 | 
 use constant SORTED              => 1;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
152
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
8
 | 
 use constant MRG_LINE            => 0;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
    | 
| 
17
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
20
 | 
 use constant MRG_FH              => 1;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
133
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use Class::XSAccessor  | 
| 
20
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   constructor => 'new',  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   accessors => {  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     small_size_threshold       => 'small_size_threshold',  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     super_small_size_threshold => 'super_small_size_threshold',  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     tempdir                    => 'tempdir',  | 
| 
25
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
1669
 | 
   };  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5819
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
27
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
568
 | 
 use Carp ();  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
28
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
2421
 | 
 use File::Temp ();  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54941
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
    | 
| 
29
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
37
 | 
 use File::Spec ();  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
    | 
| 
30
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
2314
 | 
 use Parse::ExuberantCTags::Merge::SimpleScopeGuard;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3785
 | 
    | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_file {  | 
| 
33
 | 
21
 | 
 
 | 
 
 | 
  
21
  
 | 
  
1
  
 | 
14360
 | 
   my $self = shift;  | 
| 
34
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
   my $file = shift;  | 
| 
35
 | 
21
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
55
 | 
   Carp::croak("Need file argument")  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if not defined $file;  | 
| 
37
 | 
21
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
319
 | 
   Carp::croak("Input file '$file' does not exist")  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if not -f $file;  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
40
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
   my %opts = @_;  | 
| 
41
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
   my $sorted = $opts{sorted};  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
21
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
93
 | 
   $self->{files} ||= [];  | 
| 
44
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
   push @{$self->{files}}, [$file, $sorted];  | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
    | 
| 
45
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
103
 | 
   return();  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub write {  | 
| 
50
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
  
0
  
 | 
696
 | 
   my $self = shift;  | 
| 
51
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   my $outfile = shift;  | 
| 
52
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
   Carp::croak("Need output file argument")  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if not defined $outfile;  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # determine temporary directory  | 
| 
56
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
   my $tmpdir = $self->tempdir;  | 
| 
57
 | 
12
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
39
 | 
   if (not defined $tmpdir or not -d $tmpdir) {  | 
| 
58
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
188
 | 
     $tmpdir = File::Spec->tmpdir();  | 
| 
59
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     $self->tempdir($tmpdir);  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
   my $total_size = 0;  | 
| 
63
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
   my $sorted_size   = 0;  | 
| 
64
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   my $unsorted_size = 0;  | 
| 
65
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   my @sorted;  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @unsorted;  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
68
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
   my $files = $self->{files};  | 
| 
69
 | 
12
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
61
 | 
   Carp::croak("Need input files")  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if not defined $files or @$files == 0;  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # only one sorted input file => copy  | 
| 
73
 | 
12
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
53
 | 
   if (@$files == 1 and $files->[0][SORTED]) {  | 
| 
74
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     warn "Only one sorted input file => copying" if DEBUG;  | 
| 
75
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     my $infile = $files->[0][FILENAME];  | 
| 
76
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     open my $fh, '<', $infile  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or die "Opening input file '$infile' for reading failed: $!";  | 
| 
78
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
65
 | 
     open my $ofh, '>', $outfile  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or die "Opening output file '$outfile' for writing failed: $!";  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     print $ofh "!_TAG_FILE_SORTED	1	  /0=unsorted, 1=sorted/\n";  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     local $/ = \1000000;  | 
| 
84
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     while (<$fh>) {  | 
| 
85
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
       print $ofh $_;  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
87
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     close $fh;  | 
| 
88
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
     close $ofh;  | 
| 
89
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     return(1);  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # calculate the file sizes  | 
| 
93
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
   foreach my $file (@$files) {  | 
| 
94
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     my $fname = $file->[FILENAME];  | 
| 
95
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
228
 | 
     my $s = -s $fname;  | 
| 
96
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     $total_size += $s;  | 
| 
97
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
46
 | 
     if ($file->[SORTED]) {  | 
| 
98
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
       $sorted_size += $s;  | 
| 
99
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
       push @sorted, $fname;  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
102
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
       $unsorted_size += $s;  | 
| 
103
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
       push @unsorted, $fname;  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # get size thresholds  | 
| 
108
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
   my $threshold_super_small = $self->super_small_size_threshold();  | 
| 
109
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
   $threshold_super_small = SUPER_SMALL_DEFAULT if not defined $threshold_super_small;  | 
| 
110
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
   my $threshold_small = $self->small_size_threshold();  | 
| 
111
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
   $threshold_small = SMALL_DEFAULT if not defined $threshold_small;  | 
| 
112
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
   warn "Thresholds: tiny=$threshold_super_small small=$threshold_small" if DEBUG > 1;  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # storage of temporary files and guard to clean them up on scope exit  | 
| 
115
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
   my @tmpfiles;  | 
| 
116
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128
 | 
   my $guard = Parse::ExuberantCTags::Merge::SimpleScopeGuard->new(files => \@tmpfiles);  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # select sort strategy  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # everything small, sort all in memory regardless  | 
| 
121
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
   if ($total_size < $threshold_super_small) {  | 
| 
122
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     warn "Total size < super-small-threshold => memory sort" if DEBUG;  | 
| 
123
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
305
 | 
     open my $ofh, '>', $outfile  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or die "Could not open output file '$outfile' for writing: $!";  | 
| 
125
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     return $self->_memory_sort($ofh, @sorted, @unsorted);  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # This must handle the unsorted files  | 
| 
129
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   if (@unsorted) {  | 
| 
130
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     warn "There are unsorted files..." if DEBUG;  | 
| 
131
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     if ($unsorted_size < $threshold_small) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # unsorted files are small and will be sorted in memory  | 
| 
133
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       warn "Unsorted files small => memory sort" if DEBUG;  | 
| 
134
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       my ($tfh, $tmpfile);  | 
| 
135
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
       if (@sorted) { # if there are sorted files (must be largish), use a tempfile  | 
| 
136
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         ($tfh, $tmpfile) = File::Temp::tempfile(  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           "ctagsSortXXXXXX", UNLINK => 0, DIR => $tmpdir  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
139
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
365
 | 
         push @tmpfiles, $tmpfile;  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else { # unsorted only => use real output file  | 
| 
142
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
50
 | 
         open $tfh, '>', $outfile  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           or die "Could not open output file '$outfile' for writing: $!";  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
145
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
       $self->_memory_sort($tfh, @unsorted);  | 
| 
146
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
       close $tfh;  | 
| 
147
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
       if (not @sorted) { # only unsorted data => done!  | 
| 
148
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         return 1;  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
150
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       push @sorted, $tmpfile;  | 
| 
151
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
       $sorted_size += -s $tmpfile;  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($sorted_size < $threshold_small) {  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # handle everything with Sort::External  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # don't bother with merge-sorting the small sorted files  | 
| 
156
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
       warn "Sorted files small or not existant => external sort for all" if DEBUG;  | 
| 
157
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
46
 | 
       open my $ofh, '>', $outfile  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or die "Could not open output file '$outfile' for writing: $!";  | 
| 
159
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
       return $self->_external_sort($ofh, @unsorted, @sorted);  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # both are large. First do an external sort on the unsorted files,  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # then do a merge sort  | 
| 
164
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       warn "potentially large files => external sort for unsorted files" if DEBUG;  | 
| 
165
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
       my ($tfh, $tmpfile) = File::Temp::tempfile(  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "ctagsSortXXXXXX", UNLINK => 0, DIR => $tmpdir  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       );  | 
| 
168
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
383
 | 
       push @tmpfiles, $tmpfile;  | 
| 
169
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
       $self->_external_sort($tfh, @unsorted);  | 
| 
170
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
300
 | 
       close $tfh;  | 
| 
171
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
       push @sorted, $tmpfile;  | 
| 
172
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
       $sorted_size += -s $tmpfile;  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } # end if there is unsorted data  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # at this point, there should be only sorted files  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # left => merge sort  | 
| 
178
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   warn "running merge sort" if DEBUG;  | 
| 
179
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
343
 | 
   open my $ofh, '>', $outfile  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     or die "Could not open output file '$outfile' for writing: $!";  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
   return $self->_merge_sort($ofh, @sorted);  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _merge_sort {  | 
| 
187
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
6
 | 
   warn "running _merge_sort" if DEBUG;  | 
| 
188
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   my $self = shift;  | 
| 
189
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $ofh = shift;  | 
| 
190
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
   my @infiles = @_;  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
   print $ofh "!_TAG_FILE_SORTED	1	  /0=unsorted, 1=sorted/\n";  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   local $/ = "\n";  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # get the first lines and create a list of simple structs for sorting  | 
| 
197
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
242
 | 
   my @files =  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     map {  | 
| 
199
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
       open my $fh, '<', $_ or die "Can't open input file '$_' for reading: $!";  | 
| 
200
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
       my $first = <$fh>;  | 
| 
201
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
       $first = <$fh> if $first =~ /^!_TAG_FILE_SORTED\t/; # skip magic line  | 
| 
202
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
       [$first, $fh]  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     @infiles;  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # initial sort of the first lines  | 
| 
207
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
   @files = sort {$a->[MRG_LINE] cmp $b->[MRG_LINE]} @files;  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # keep sorting until all sources run out  | 
| 
210
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   while (@files) {  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # first file in the list always has the next "lowest" line  | 
| 
212
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     my $next = $files[0];  | 
| 
213
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     print $ofh $next->[MRG_LINE];  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # fetch a new line for this file handle  | 
| 
216
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     my $fh = $next->[MRG_FH];  | 
| 
217
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
     $next->[MRG_LINE] = <$fh>;  | 
| 
218
 | 
24
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     if (not defined $next->[MRG_LINE]) {  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # eof, lose the file  | 
| 
220
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
       splice(@files, 0, 1);  | 
| 
221
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
       next;  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # one pass of bubble sort to propagate the new line to its place  | 
| 
225
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     for (my $i = 1; $i < @files; ++$i) {  | 
| 
226
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
       if (($files[$i-1][MRG_LINE] cmp $files[$i][MRG_LINE]) == 1) {  | 
| 
227
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $tmp = $files[$i-1];  | 
| 
228
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $files[$i-1] = $files[$i];  | 
| 
229
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $files[$i] = $tmp;  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
231
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
         last;  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } # end while there are files  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
278
 | 
   return(1);  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _external_sort {  | 
| 
242
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
4
 | 
   warn "running _external_sort" if DEBUG;  | 
| 
243
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   my $self = shift;  | 
| 
244
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $ofh = shift;  | 
| 
245
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   my @infiles = @_;  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
   print $ofh "!_TAG_FILE_SORTED	1	  /0=unsorted, 1=sorted/\n";  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
249
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
921
 | 
   require Sort::External;  | 
| 
250
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3308
 | 
   my $exsort = Sort::External->new(  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     mem_threshold => 1024**2 * 32, # todo: configuration  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1040
 | 
   local $/ = "\n";  | 
| 
255
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   foreach my $infile (@infiles) {  | 
| 
256
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
96
 | 
     open my $fh, '<', $infile  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or die "Could not open input file '$infile' for reading: $!";  | 
| 
258
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
     my $first_line = <$fh>;  | 
| 
259
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     $exsort->feed($first_line) if $first_line !~ /^!_TAG_FILE_SORTED/;  | 
| 
260
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     while (<$fh>) {  | 
| 
261
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
       $exsort->feed($_);  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
263
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     close $fh;  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
265
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   $exsort->finish();  | 
| 
266
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
   while (defined($_ = $exsort->fetch)) {  | 
| 
267
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     print $ofh $_;  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
270
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
   return(1);  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _memory_sort {  | 
| 
275
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
8
 | 
   warn "running _memory_sort" if DEBUG;  | 
| 
276
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
   my $self = shift;  | 
| 
277
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   my $ofh = shift;  | 
| 
278
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
   my @infiles = @_;  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
   local $/ = "\n";  | 
| 
281
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
   my @records;  | 
| 
282
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
   foreach my $infile (@infiles) {  | 
| 
283
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
353
 | 
     open my $fh, '<', $infile  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       or die "Could not open input file '$infile' for reading: $!";  | 
| 
285
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
144
 | 
     my $first_line = <$fh>;  | 
| 
286
 | 
11
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
79
 | 
     push @records, $first_line if $first_line !~ /^!_TAG_FILE_SORTED/;  | 
| 
287
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
     push @records, <$fh>;  | 
| 
288
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
     close $fh;  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
290
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
   @records = sort @records; # check fast inplace sort  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
292
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
   print $ofh "!_TAG_FILE_SORTED	1	  /0=unsorted, 1=sorted/\n";  | 
| 
293
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
   print $ofh @records;  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
295
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
320
 | 
   return(1);  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |