line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Locale::TextDomain::OO::Extract::Base::RegexBasedExtractor; ## no critic (TidyCode)
|
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
4156
|
use strict;
|
|
7
|
|
|
|
|
22
|
|
|
7
|
|
|
|
|
245
|
|
4
|
7
|
|
|
7
|
|
44
|
use warnings;
|
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
208
|
|
5
|
7
|
|
|
7
|
|
40
|
use Carp qw(confess);
|
|
7
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
333
|
|
6
|
7
|
|
|
7
|
|
2857
|
use Clone qw(clone);
|
|
7
|
|
|
|
|
17161
|
|
|
7
|
|
|
|
|
393
|
|
7
|
7
|
|
|
7
|
|
673
|
use Data::Dumper ();
|
|
7
|
|
|
|
|
6599
|
|
|
7
|
|
|
|
|
124
|
|
8
|
7
|
|
|
7
|
|
38
|
use Moo;
|
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
75
|
|
9
|
7
|
|
|
7
|
|
5475
|
use MooX::StrictConstructor;
|
|
7
|
|
|
|
|
93322
|
|
|
7
|
|
|
|
|
36
|
|
10
|
7
|
|
|
7
|
|
161524
|
use MooX::Types::MooseLike::Base qw(ArrayRef CodeRef RegexpRef ScalarRef);
|
|
7
|
|
|
|
|
21
|
|
|
7
|
|
|
|
|
500
|
|
11
|
7
|
|
|
7
|
|
80
|
use namespace::autoclean;
|
|
7
|
|
|
|
|
22
|
|
|
7
|
|
|
|
|
69
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = '2.007';
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has content_ref => (
|
16
|
|
|
|
|
|
|
is => 'rw',
|
17
|
|
|
|
|
|
|
isa => ScalarRef,
|
18
|
|
|
|
|
|
|
);
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
has start_rule => (
|
21
|
|
|
|
|
|
|
is => 'rw',
|
22
|
|
|
|
|
|
|
isa => RegexpRef,
|
23
|
|
|
|
|
|
|
);
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has rules => (
|
26
|
|
|
|
|
|
|
is => 'rw',
|
27
|
|
|
|
|
|
|
isa => ArrayRef,
|
28
|
|
|
|
|
|
|
);
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has debug_code => (
|
31
|
|
|
|
|
|
|
is => 'rw',
|
32
|
|
|
|
|
|
|
isa => CodeRef,
|
33
|
|
|
|
|
|
|
clearer => 'clear_debug_code',
|
34
|
|
|
|
|
|
|
);
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
has stack => (
|
37
|
|
|
|
|
|
|
is => 'rw',
|
38
|
|
|
|
|
|
|
isa => ArrayRef,
|
39
|
|
|
|
|
|
|
);
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub _parse_pos {
|
42
|
28
|
|
|
28
|
|
64
|
my $self = shift;
|
43
|
|
|
|
|
|
|
|
44
|
28
|
|
|
|
|
672
|
my $regex = $self->start_rule;
|
45
|
28
|
|
|
|
|
626
|
my $content_ref = $self->content_ref;
|
46
|
28
|
50
|
|
|
|
174
|
defined ${$content_ref}
|
|
28
|
|
|
|
|
120
|
|
47
|
|
|
|
|
|
|
or return confess 'content_ref is a reference to undef';
|
48
|
28
|
|
|
|
|
73
|
my @stack;
|
49
|
28
|
|
|
|
|
59
|
while ( ${$content_ref} =~ m{ \G .*? ( $regex ) }xmsgc ) {
|
|
473
|
|
|
|
|
8772
|
|
50
|
|
|
|
|
|
|
push @stack, {
|
51
|
445
|
|
|
|
|
922
|
start_pos => pos( ${$content_ref} ) - length $1,
|
|
445
|
|
|
|
|
1743
|
|
52
|
|
|
|
|
|
|
};
|
53
|
|
|
|
|
|
|
}
|
54
|
28
|
|
|
|
|
686
|
$self->stack(\@stack);
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# debug if requested
|
57
|
28
|
50
|
|
|
|
2000
|
$self->debug_code
|
58
|
|
|
|
|
|
|
or return $self;
|
59
|
0
|
|
|
|
|
0
|
my $dump = Data::Dumper ## no critic (LongChainsOfMethodCalls)
|
60
|
|
|
|
|
|
|
->new([ $self->stack ], [ qw(stack) ])
|
61
|
|
|
|
|
|
|
->Indent(1)
|
62
|
|
|
|
|
|
|
->Quotekeys(0)
|
63
|
|
|
|
|
|
|
->Sortkeys(1)
|
64
|
|
|
|
|
|
|
->Useqq(1)
|
65
|
|
|
|
|
|
|
->Dump;
|
66
|
0
|
|
|
|
|
0
|
chomp $dump;
|
67
|
0
|
|
|
|
|
0
|
$self->debug_code->('stack start', $dump);
|
68
|
|
|
|
|
|
|
|
69
|
0
|
|
|
|
|
0
|
return $self;
|
70
|
|
|
|
|
|
|
}
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _parse_rules { ## no critic (ExcessComplexity)
|
73
|
28
|
|
|
28
|
|
66
|
my $self = shift;
|
74
|
|
|
|
|
|
|
|
75
|
28
|
|
|
|
|
496
|
my $content_ref = $self->content_ref;
|
76
|
28
|
|
|
|
|
208
|
for my $stack_item ( @{ $self->stack } ) {
|
|
28
|
|
|
|
|
480
|
|
77
|
445
|
|
|
|
|
8224
|
my $rules = clone( $self->rules );
|
78
|
445
|
|
|
|
|
166019
|
my $pos = $stack_item->{start_pos};
|
79
|
445
|
|
|
|
|
1018
|
my $level = 0;
|
80
|
445
|
|
|
|
|
1155
|
my @level_matched = ( 1 );
|
81
|
445
|
|
|
|
|
812
|
my $has_matched = 0;
|
82
|
445
|
50
|
|
|
|
10625
|
$self->debug_code
|
83
|
|
|
|
|
|
|
and $self->debug_code->('rules start', "$level: Starting at pos $pos.");
|
84
|
445
|
|
|
|
|
3328
|
my (@parent_rules, @parent_pos, %level_and_of, @stack_result);
|
85
|
|
|
|
|
|
|
RULE: {
|
86
|
445
|
|
|
|
|
760
|
my $rule = shift @{$rules};
|
|
32164
|
|
|
|
|
51761
|
|
|
32164
|
|
|
|
|
70831
|
|
87
|
32164
|
100
|
|
|
|
74389
|
if (! $rule) {
|
88
|
2358
|
50
|
|
|
|
39128
|
$self->debug_code
|
89
|
|
|
|
|
|
|
and $self->debug_code->('rules last', "$level: No more rules found.");
|
90
|
2358
|
100
|
|
|
|
16450
|
if (@parent_rules) {
|
91
|
1913
|
|
|
|
|
3630
|
$rules = pop @parent_rules;
|
92
|
1913
|
|
|
|
|
3911
|
() = pop @parent_pos;
|
93
|
1913
|
50
|
|
|
|
31298
|
$self->debug_code
|
94
|
|
|
|
|
|
|
and $self->debug_code->('rules parent', "$level: Going back to parent.");
|
95
|
|
|
|
|
|
|
# delete the parent and match
|
96
|
1913
|
100
|
|
|
|
12199
|
if ( ! $has_matched ) {
|
97
|
|
|
|
|
|
|
LEVEL: ## no critic (DeepNests)
|
98
|
14
|
|
|
|
|
57
|
for my $parent_level ( reverse 0 .. ( $level - 1 ) ) {
|
99
|
14
|
100
|
|
|
|
57
|
if ( exists $level_and_of{$parent_level} ) { ## no critic (DeepNests)
|
100
|
1
|
|
|
|
|
3
|
$level_and_of{$parent_level} = 0;
|
101
|
1
|
|
|
|
|
3
|
last LEVEL;
|
102
|
|
|
|
|
|
|
}
|
103
|
|
|
|
|
|
|
}
|
104
|
|
|
|
|
|
|
}
|
105
|
1913
|
|
|
|
|
3331
|
--$level;
|
106
|
1913
|
|
|
|
|
4213
|
redo RULE;
|
107
|
|
|
|
|
|
|
}
|
108
|
445
|
|
|
|
|
2103
|
last RULE;
|
109
|
|
|
|
|
|
|
}
|
110
|
|
|
|
|
|
|
# goto child
|
111
|
29806
|
100
|
|
|
|
73392
|
if ( ref $rule eq 'ARRAY' ) {
|
112
|
7737
|
|
|
|
|
15560
|
push @parent_rules, $rules;
|
113
|
7737
|
|
|
|
|
14936
|
push @parent_pos, $pos;
|
114
|
7737
|
|
|
|
|
137232
|
$rules = clone($rule);
|
115
|
7737
|
50
|
|
|
|
142091
|
$self->debug_code
|
116
|
|
|
|
|
|
|
and $self->debug_code->('rules child', "$level: Going to child.");
|
117
|
7737
|
|
|
|
|
50657
|
$level_matched[ ++$level ] = 1;
|
118
|
7737
|
|
|
|
|
20568
|
redo RULE;
|
119
|
|
|
|
|
|
|
}
|
120
|
|
|
|
|
|
|
# alternative
|
121
|
22069
|
100
|
|
|
|
52469
|
if ( lc $rule eq 'or' ) {
|
122
|
6231
|
100
|
|
|
|
13665
|
if ($has_matched) {
|
123
|
781
|
|
|
|
|
1554
|
$rules = pop @parent_rules;
|
124
|
781
|
|
|
|
|
9252
|
() = pop @parent_pos;
|
125
|
781
|
|
|
|
|
1529
|
$has_matched = 0;
|
126
|
781
|
50
|
|
|
|
14124
|
$self->debug_code
|
127
|
|
|
|
|
|
|
and $self->debug_code->('rules ignore', "$level: Matched before 'or' so ignore alternatives. Going back to parent.");
|
128
|
781
|
|
|
|
|
5003
|
--$level;
|
129
|
781
|
|
|
|
|
1644
|
redo RULE;
|
130
|
|
|
|
|
|
|
}
|
131
|
|
|
|
|
|
|
$self->debug_code
|
132
|
5450
|
50
|
|
|
|
93502
|
and $self->debug_code->('rules try', "$level: Not matched so try next alternative.");
|
133
|
5450
|
|
|
|
|
33628
|
$level_matched[$level] = 1;
|
134
|
5450
|
|
|
|
|
10682
|
redo RULE;
|
135
|
|
|
|
|
|
|
}
|
136
|
|
|
|
|
|
|
# to expect the next match
|
137
|
15838
|
100
|
|
|
|
34749
|
if ( lc $rule eq 'and' ) {
|
138
|
2085
|
100
|
|
|
|
5003
|
if ( ! exists $level_and_of{$level} ) {
|
139
|
403
|
|
|
|
|
999
|
$level_and_of{$level} = 1;
|
140
|
|
|
|
|
|
|
}
|
141
|
2085
|
50
|
|
|
|
4574
|
if ( $level_matched[$level] ) {
|
142
|
2085
|
50
|
|
|
|
36392
|
$self->debug_code
|
143
|
|
|
|
|
|
|
and $self->debug_code->('rules next', "$level: And next rule.");
|
144
|
2085
|
|
|
|
|
13262
|
redo RULE;
|
145
|
|
|
|
|
|
|
}
|
146
|
0
|
|
|
|
|
0
|
$rules = pop @parent_rules;
|
147
|
0
|
|
|
|
|
0
|
() = pop @parent_pos;
|
148
|
0
|
0
|
|
|
|
0
|
$self->debug_code
|
149
|
|
|
|
|
|
|
and $self->debug_code->('rules ignore following', "$level: Ignore following. Going back to parent.");
|
150
|
0
|
|
|
|
|
0
|
--$level;
|
151
|
0
|
|
|
|
|
0
|
redo RULE;
|
152
|
|
|
|
|
|
|
}
|
153
|
13753
|
100
|
|
|
|
30242
|
if ( lc $rule eq 'begin' ) {
|
154
|
5327
|
|
|
|
|
9552
|
@stack_result = ();
|
155
|
5327
|
50
|
|
|
|
92485
|
$self->debug_code
|
156
|
|
|
|
|
|
|
and $self->debug_code->('rules begin', "$level: Begin.");
|
157
|
5327
|
|
|
|
|
34332
|
redo RULE;
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
# done
|
160
|
8426
|
100
|
|
|
|
19396
|
if ( lc $rule eq 'end' ) {
|
161
|
|
|
|
|
|
|
my $is_and
|
162
|
|
|
|
|
|
|
= ! exists $level_and_of{$level}
|
163
|
|
|
|
|
|
|
|| exists $level_and_of{$level}
|
164
|
444
|
|
66
|
|
|
2160
|
&& $level_and_of{$level};
|
165
|
444
|
50
|
|
|
|
1037
|
if ($is_and) {
|
166
|
444
|
|
|
|
|
748
|
push @{ $stack_item->{match} }, @stack_result;
|
|
444
|
|
|
|
|
1930
|
|
167
|
444
|
50
|
|
|
|
8110
|
$self->debug_code
|
168
|
|
|
|
|
|
|
and $self->debug_code->('rules end', "$level: End, so store data.");
|
169
|
|
|
|
|
|
|
}
|
170
|
444
|
|
|
|
|
3046
|
redo RULE;
|
171
|
|
|
|
|
|
|
}
|
172
|
|
|
|
|
|
|
# ref $rule is 'Regexp' or $rule is code
|
173
|
7982
|
|
|
|
|
12626
|
pos ${$content_ref} = $pos;
|
|
7982
|
|
|
|
|
106374
|
|
174
|
7982
|
50
|
|
|
|
144221
|
$self->debug_code
|
175
|
|
|
|
|
|
|
and $self->debug_code->('rules current pos', "$level: Set the current pos to $pos.");
|
176
|
|
|
|
|
|
|
$has_matched
|
177
|
|
|
|
|
|
|
= my ($full_match, @result)
|
178
|
|
|
|
|
|
|
= ref $rule eq 'CODE'
|
179
|
|
|
|
|
|
|
? $rule->($content_ref)
|
180
|
7982
|
100
|
|
|
|
56949
|
: ${$content_ref} =~ m{ \G ( $rule ) }xms;
|
|
7888
|
|
|
|
|
353029
|
|
181
|
7982
|
|
66
|
|
|
49866
|
$level_matched[$level] &&= $has_matched;
|
182
|
7982
|
100
|
|
|
|
19387
|
if ( exists $level_and_of{$level} ) {
|
183
|
1241
|
|
100
|
|
|
3911
|
$level_and_of{$level} &&= $has_matched;
|
184
|
|
|
|
|
|
|
}
|
185
|
7982
|
100
|
|
|
|
17641
|
if ($has_matched) {
|
186
|
2530
|
|
|
|
|
5446
|
push @stack_result, @result;
|
187
|
2530
|
|
|
|
|
6615
|
$pos += length $full_match;
|
188
|
|
|
|
|
|
|
$self->debug_code
|
189
|
2530
|
50
|
|
|
|
48429
|
and do {
|
190
|
0
|
0
|
|
|
|
0
|
my $rule_qr = ref $rule eq 'CODE' ? $rule->() : $rule;
|
191
|
0
|
|
|
|
|
0
|
$self->debug_code->(
|
192
|
|
|
|
|
|
|
'rules match',
|
193
|
|
|
|
|
|
|
"$level: Rule\n$rule_qr\nhas matched\n$full_match\nThe current pos is $pos.",
|
194
|
|
|
|
|
|
|
);
|
195
|
|
|
|
|
|
|
};
|
196
|
2530
|
|
|
|
|
19451
|
redo RULE;
|
197
|
|
|
|
|
|
|
}
|
198
|
5452
|
|
|
|
|
10647
|
$rules = pop @parent_rules;
|
199
|
5452
|
|
|
|
|
17779
|
$pos = pop @parent_pos;
|
200
|
|
|
|
|
|
|
$self->debug_code
|
201
|
5452
|
50
|
|
|
|
106509
|
and do {
|
202
|
0
|
0
|
|
|
|
0
|
my $rule_qr = ref $rule eq 'CODE' ? $rule->() : $rule;
|
203
|
0
|
|
|
|
|
0
|
$self->debug_code->(
|
204
|
|
|
|
|
|
|
'rules no match',
|
205
|
|
|
|
|
|
|
"$level: Rule\n$rule_qr\nhas not matched. Going back to parent.",
|
206
|
|
|
|
|
|
|
);
|
207
|
|
|
|
|
|
|
};
|
208
|
5452
|
|
|
|
|
36078
|
--$level;
|
209
|
5452
|
|
|
|
|
13772
|
redo RULE;
|
210
|
|
|
|
|
|
|
}
|
211
|
|
|
|
|
|
|
}
|
212
|
|
|
|
|
|
|
|
213
|
28
|
|
|
|
|
106
|
return $self;
|
214
|
|
|
|
|
|
|
}
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _cleanup_and_calculate_reference {
|
217
|
28
|
|
|
28
|
|
69
|
my $self = shift;
|
218
|
|
|
|
|
|
|
|
219
|
28
|
|
|
|
|
545
|
my $stack = $self->stack;
|
220
|
28
|
|
|
|
|
661
|
my $content_ref = $self->content_ref;
|
221
|
28
|
|
|
|
|
91
|
@{$stack} = map {
|
222
|
|
|
|
|
|
|
exists $_->{match}
|
223
|
445
|
100
|
|
|
|
956
|
? do {
|
224
|
|
|
|
|
|
|
# calculate reference
|
225
|
444
|
|
|
|
|
669
|
my $pre_match = substr ${$content_ref}, 0, $_->{start_pos};
|
|
444
|
|
|
|
|
6246
|
|
226
|
444
|
|
|
|
|
2244
|
my $newline_count = $pre_match =~ tr{\n}{\n};
|
227
|
444
|
|
|
|
|
1197
|
$_->{line_number} = $newline_count + 1;
|
228
|
444
|
|
|
|
|
1174
|
$_;
|
229
|
|
|
|
|
|
|
}
|
230
|
|
|
|
|
|
|
# cleanup
|
231
|
|
|
|
|
|
|
: ();
|
232
|
28
|
|
|
|
|
203
|
} @{$stack};
|
|
28
|
|
|
|
|
86
|
|
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# debug if requested
|
235
|
28
|
50
|
|
|
|
562
|
$self->debug_code
|
236
|
|
|
|
|
|
|
or return $self;
|
237
|
0
|
|
|
|
|
0
|
my $dump = Data::Dumper ## no critic (LongChainsOfMethodCalls)
|
238
|
|
|
|
|
|
|
->new([ $self->stack ], [ qw(stack) ])
|
239
|
|
|
|
|
|
|
->Indent(1)
|
240
|
|
|
|
|
|
|
->Quotekeys(0)
|
241
|
|
|
|
|
|
|
->Sortkeys(1)
|
242
|
|
|
|
|
|
|
->Useqq(1)
|
243
|
|
|
|
|
|
|
->Dump;
|
244
|
0
|
|
|
|
|
0
|
chomp $dump;
|
245
|
0
|
|
|
|
|
0
|
$self->debug_code->('stack clean', $dump);
|
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
0
|
return $self;
|
248
|
|
|
|
|
|
|
}
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub extract {
|
251
|
28
|
|
|
28
|
1
|
128
|
my ($self, $arg_ref) = @_;
|
252
|
|
|
|
|
|
|
|
253
|
28
|
|
|
|
|
156
|
$self->_parse_pos;
|
254
|
28
|
|
|
|
|
300
|
$self->_parse_rules;
|
255
|
28
|
|
|
|
|
200
|
$self->_cleanup_and_calculate_reference;
|
256
|
|
|
|
|
|
|
|
257
|
28
|
|
|
|
|
298
|
return $self;
|
258
|
|
|
|
|
|
|
}
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable;
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
1;
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
__END__
|