line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Number::Range::Regex::Util |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Copyright 2012 Brian Szymanski. All rights reserved. This module is |
4
|
|
|
|
|
|
|
# free software; you can redistribute it and/or modify it under the same |
5
|
|
|
|
|
|
|
# terms as Perl itself. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package Number::Range::Regex::Util; |
8
|
|
|
|
|
|
|
|
9
|
15
|
|
|
15
|
|
45654
|
use strict; |
|
15
|
|
|
|
|
29
|
|
|
15
|
|
|
|
|
678
|
|
10
|
15
|
|
|
15
|
|
4052
|
use vars qw ( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ); |
|
15
|
|
|
|
|
33
|
|
|
15
|
|
|
|
|
2429
|
|
11
|
|
|
|
|
|
|
eval { require warnings; }; #it's ok if we can't load warnings |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
require Exporter; |
14
|
15
|
|
|
15
|
|
96
|
use base 'Exporter'; |
|
15
|
|
|
|
|
25
|
|
|
15
|
|
|
|
|
21914
|
|
15
|
|
|
|
|
|
|
@ISA = qw( Exporter ); |
16
|
|
|
|
|
|
|
@EXPORT = qw ( option_mangler has_regex_overloading |
17
|
|
|
|
|
|
|
multi_union empty_set |
18
|
|
|
|
|
|
|
base_chr base_ord base_digits base_next base_prev |
19
|
|
|
|
|
|
|
_calculate_digit_range ); |
20
|
|
|
|
|
|
|
@EXPORT_OK = qw ( _order_by_min ) ; |
21
|
|
|
|
|
|
|
%EXPORT_TAGS = ( all => [ @EXPORT, @EXPORT_OK ] ); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$VERSION = '0.32'; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
require overload; |
26
|
|
|
|
|
|
|
sub has_regex_overloading { |
27
|
|
|
|
|
|
|
# http://www.gossamer-threads.com/lists/perl/porters/244314 |
28
|
|
|
|
|
|
|
# http://search.cpan.org/~jesse/perl-5.12.0/pod/perl5120delta.pod#qr_overload$ |
29
|
|
|
|
|
|
|
# 1.08, 1.09 are too low. 1.10: works |
30
|
|
|
|
|
|
|
# http://search.cpan.org/~jesse/perl-5.11.1/lib/overload.pm |
31
|
11
|
|
33
|
11
|
0
|
1779
|
return defined $overload::VERSION && $overload::VERSION > '1.09'; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub empty_set { |
35
|
585
|
|
|
585
|
0
|
1486
|
shift; |
36
|
585
|
|
|
|
|
3028
|
return Number::Range::Regex::CompoundRange->new( @_ ); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub multi_union { |
40
|
568
|
100
|
|
568
|
0
|
2151
|
my $opts = option_mangler( ref $_[-1] eq 'HASH' ? pop : undef ); |
41
|
568
|
|
|
|
|
1894
|
my $warn_overlap = delete $opts->{warn_overlap}; |
42
|
568
|
|
|
|
|
1433
|
my @ranges = @_; |
43
|
568
|
|
|
|
|
1339
|
my $self = empty_set( $opts ); |
44
|
568
|
|
|
|
|
4100
|
$self = $self->union( $_, { warn_overlap => $warn_overlap } ) for @ranges; |
45
|
|
|
|
|
|
|
# $self->{opts} = $opts; |
46
|
568
|
|
|
|
|
7728
|
return $self; |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# local options can override defaults |
50
|
|
|
|
|
|
|
sub option_mangler { |
51
|
13077
|
|
|
13077
|
0
|
52093
|
my (@passed_opts) = grep defined, @_; |
52
|
|
|
|
|
|
|
# next line is redundant but an optimization |
53
|
13077
|
100
|
|
|
|
36211
|
return $Number::Range::Regex::Range::default_opts unless @passed_opts; |
54
|
10093
|
|
|
|
|
17304
|
unshift @passed_opts, $Number::Range::Regex::Range::default_opts; |
55
|
10093
|
|
|
|
|
12170
|
my $opts; |
56
|
10093
|
|
|
|
|
17034
|
foreach my $opts_ref ( @passed_opts ) { |
57
|
26053
|
100
|
|
|
|
62459
|
die "too many arguments from ".join(":", caller())." $opts_ref" unless ref $opts_ref eq 'HASH'; |
58
|
|
|
|
|
|
|
# make a copy of options hashref, add overrides |
59
|
26052
|
|
|
|
|
98214
|
while (my ($key, $val) = each %$opts_ref) { |
60
|
301132
|
|
|
|
|
1103427
|
$opts->{$key} = $val; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
} |
63
|
10092
|
|
|
|
|
38665
|
return $opts; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub _order_by_min { |
67
|
901
|
|
|
901
|
|
1362
|
my ($a, $b) = @_; |
68
|
901
|
100
|
|
|
|
5405
|
return $a->{min} < $b->{min} ? ($a, $b) : ($b, $a); |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub base_digits { |
72
|
2673
|
|
|
2673
|
0
|
4114
|
my ($base) = @_; |
73
|
2673
|
|
|
|
|
7226
|
return join '', map { $Number::Range::Regex::Range::STANDARD_DIGIT_ORDER[$_] } (0..$base-1); |
|
26710
|
|
|
|
|
57649
|
|
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub base_next { |
77
|
317
|
|
|
317
|
0
|
8650
|
my ($c, $base_digits) = @_; |
78
|
317
|
|
|
|
|
787
|
my $ord = base_ord($c, $base_digits); |
79
|
317
|
100
|
|
|
|
1100
|
return if $ord+1 == length $base_digits; |
80
|
250
|
|
|
|
|
512
|
return base_chr($ord+1, $base_digits); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub base_prev { |
84
|
317
|
|
|
317
|
0
|
24193
|
my ($c, $base_digits) = @_; |
85
|
317
|
|
|
|
|
688
|
my $ord = base_ord($c, $base_digits); |
86
|
317
|
100
|
|
|
|
2476
|
return if $ord == 0; |
87
|
240
|
|
|
|
|
507
|
return base_chr($ord-1, $base_digits); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#TODO: memoize base_ord, base_chr for performance? |
91
|
|
|
|
|
|
|
sub base_ord { |
92
|
6562
|
|
|
6562
|
0
|
8996
|
my ($c, $base_digits) = @_; |
93
|
6562
|
50
|
|
|
|
24188
|
return -1 if $c eq -1; |
94
|
6562
|
50
|
|
|
|
13607
|
return 1+length $base_digits if length $c > 1; |
95
|
6562
|
|
|
|
|
10883
|
my $ord = index $base_digits, $c; |
96
|
6562
|
50
|
|
|
|
11370
|
die "$c not found in $base_digits" if $ord == -1; |
97
|
6562
|
|
|
|
|
16558
|
return $ord; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub base_chr { |
101
|
25197
|
|
|
25197
|
0
|
30711
|
my ($n, $base_digits) = @_; |
102
|
25197
|
|
|
|
|
32253
|
my $chr = substr($base_digits, $n, 1); |
103
|
25197
|
50
|
|
|
|
47589
|
die "offset out of range: $n > ".length($base_digits) if !length $chr; |
104
|
25197
|
|
|
|
|
101614
|
return $chr; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#TODO: should _calculate_digit_range() be in Util? |
108
|
|
|
|
|
|
|
# calculate the tersest possible representation of a digit range |
109
|
|
|
|
|
|
|
# '1' -> 1 |
110
|
|
|
|
|
|
|
# '12' -> [12] |
111
|
|
|
|
|
|
|
# '123' -> [1-3] #preferred stylistically to [123] |
112
|
|
|
|
|
|
|
# '1234' -> [1-4] |
113
|
|
|
|
|
|
|
# '0123456789' -> \d |
114
|
|
|
|
|
|
|
# '123456789abc' -> [1-9a-c] |
115
|
|
|
|
|
|
|
sub _calculate_digit_range { |
116
|
3108
|
|
|
3108
|
|
5449
|
my ($digit_min, $digit_max, $base_digits) = @_; |
117
|
3108
|
100
|
66
|
|
|
18258
|
return unless defined $digit_min && defined $digit_max; |
118
|
2964
|
|
|
|
|
7245
|
my $ord_min = base_ord( $digit_min, $base_digits ); |
119
|
2964
|
|
|
|
|
5790
|
my $ord_max = base_ord( $digit_max, $base_digits ); |
120
|
2964
|
100
|
|
|
|
5797
|
return if $ord_min > $ord_max; |
121
|
2846
|
100
|
|
|
|
5237
|
return $digit_min if $ord_min == $ord_max; |
122
|
2742
|
|
|
|
|
3225
|
my @range_chars; |
123
|
2742
|
|
|
|
|
6234
|
for(my $n=$ord_min; $n <= $ord_max; ++$n) { |
124
|
24707
|
|
|
|
|
60265
|
push @range_chars, base_chr( $n, $base_digits ); |
125
|
|
|
|
|
|
|
} |
126
|
2742
|
|
|
|
|
3980
|
my $last = $range_chars[0]; |
127
|
2742
|
|
|
|
|
3498
|
my $n = 1; |
128
|
2742
|
|
|
|
|
6663
|
while($n < @range_chars) { |
129
|
21965
|
|
|
|
|
29383
|
my $this = $range_chars[$n]; |
130
|
21965
|
100
|
|
|
|
39392
|
if(1 == ord($this)-ord($last)) { |
131
|
21962
|
|
|
|
|
32440
|
$range_chars[$n-1] .= $this; |
132
|
21962
|
|
|
|
|
27535
|
splice @range_chars, $n, 1; |
133
|
|
|
|
|
|
|
} else { |
134
|
3
|
|
|
|
|
4
|
$n++; |
135
|
|
|
|
|
|
|
} |
136
|
21965
|
|
|
|
|
79846
|
$last = $this; |
137
|
|
|
|
|
|
|
} |
138
|
2742
|
|
|
|
|
6528
|
foreach my $n (0..$#range_chars) { |
139
|
2745
|
|
|
|
|
3930
|
my $str = $range_chars[$n]; |
140
|
2745
|
|
|
|
|
3327
|
my $len = length $str; |
141
|
2745
|
50
|
|
|
|
5930
|
die "internal error" if $len == 0; |
142
|
2745
|
100
|
|
|
|
5020
|
next if $len == 1; # 'a' is as terse as possible |
143
|
2744
|
100
|
|
|
|
5665
|
next if $len == 2; # 'bc' is also as terse as possible |
144
|
|
|
|
|
|
|
# collapse e.g. 234567 into 2-7 |
145
|
2582
|
|
|
|
|
3977
|
my $first = substr($str, 0, 1); |
146
|
2582
|
|
|
|
|
3557
|
my $last = substr($str, -1, 1); |
147
|
2582
|
100
|
100
|
|
|
16697
|
$range_chars[$n] = ($first eq '0' && $last eq '9') ? '\d' : "$first-$last"; |
148
|
|
|
|
|
|
|
} |
149
|
2742
|
100
|
|
|
|
7126
|
if(1==@range_chars) { |
150
|
2739
|
|
|
|
|
3458
|
my $ret = $range_chars[0]; |
151
|
|
|
|
|
|
|
# we don't need brackets if all we have is \d or a single digit |
152
|
2739
|
100
|
66
|
|
|
19044
|
return $ret if $ret eq '\d' || length($ret)==1; |
153
|
|
|
|
|
|
|
} |
154
|
514
|
|
|
|
|
2353
|
return join '', '[', @range_chars, ']'; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
1; |
158
|
|
|
|
|
|
|
|