line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Locale::TextDomain::OO::Extract::Base::RegexBasedExtractor; ## no critic (TidyCode)
|
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
3168
|
use strict;
|
|
7
|
|
|
|
|
20
|
|
|
7
|
|
|
|
|
203
|
|
4
|
7
|
|
|
7
|
|
45
|
use warnings;
|
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
227
|
|
5
|
7
|
|
|
7
|
|
38
|
use Carp qw(confess);
|
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
374
|
|
6
|
7
|
|
|
7
|
|
1734
|
use Clone qw(clone);
|
|
7
|
|
|
|
|
13755
|
|
|
7
|
|
|
|
|
382
|
|
7
|
7
|
|
|
7
|
|
464
|
use Data::Dumper ();
|
|
7
|
|
|
|
|
4584
|
|
|
7
|
|
|
|
|
130
|
|
8
|
7
|
|
|
7
|
|
41
|
use Moo;
|
|
7
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
45
|
|
9
|
7
|
|
|
7
|
|
4385
|
use MooX::StrictConstructor;
|
|
7
|
|
|
|
|
76177
|
|
|
7
|
|
|
|
|
36
|
|
10
|
7
|
|
|
7
|
|
133969
|
use MooX::Types::MooseLike::Base qw(ArrayRef CodeRef RegexpRef ScalarRef);
|
|
7
|
|
|
|
|
18
|
|
|
7
|
|
|
|
|
504
|
|
11
|
7
|
|
|
7
|
|
54
|
use namespace::autoclean;
|
|
7
|
|
|
|
|
17
|
|
|
7
|
|
|
|
|
61
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = '2.007';
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has content_ref => (
|
16
|
|
|
|
|
|
|
is => 'rw',
|
17
|
|
|
|
|
|
|
isa => ScalarRef,
|
18
|
|
|
|
|
|
|
);
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
has start_rule => (
|
21
|
|
|
|
|
|
|
is => 'rw',
|
22
|
|
|
|
|
|
|
isa => RegexpRef,
|
23
|
|
|
|
|
|
|
);
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has rules => (
|
26
|
|
|
|
|
|
|
is => 'rw',
|
27
|
|
|
|
|
|
|
isa => ArrayRef,
|
28
|
|
|
|
|
|
|
);
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has debug_code => (
|
31
|
|
|
|
|
|
|
is => 'rw',
|
32
|
|
|
|
|
|
|
isa => CodeRef,
|
33
|
|
|
|
|
|
|
clearer => 'clear_debug_code',
|
34
|
|
|
|
|
|
|
);
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
has stack => (
|
37
|
|
|
|
|
|
|
is => 'rw',
|
38
|
|
|
|
|
|
|
isa => ArrayRef,
|
39
|
|
|
|
|
|
|
);
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub _parse_pos {
|
42
|
28
|
|
|
28
|
|
60
|
my $self = shift;
|
43
|
|
|
|
|
|
|
|
44
|
28
|
|
|
|
|
527
|
my $regex = $self->start_rule;
|
45
|
28
|
|
|
|
|
586
|
my $content_ref = $self->content_ref;
|
46
|
28
|
50
|
|
|
|
170
|
defined ${$content_ref}
|
|
28
|
|
|
|
|
131
|
|
47
|
|
|
|
|
|
|
or return confess 'content_ref is a reference to undef';
|
48
|
28
|
|
|
|
|
70
|
my @stack;
|
49
|
28
|
|
|
|
|
52
|
while ( ${$content_ref} =~ m{ \G .*? ( $regex ) }xmsgc ) {
|
|
473
|
|
|
|
|
8485
|
|
50
|
|
|
|
|
|
|
push @stack, {
|
51
|
445
|
|
|
|
|
910
|
start_pos => pos( ${$content_ref} ) - length $1,
|
|
445
|
|
|
|
|
1875
|
|
52
|
|
|
|
|
|
|
};
|
53
|
|
|
|
|
|
|
}
|
54
|
28
|
|
|
|
|
603
|
$self->stack(\@stack);
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# debug if requested
|
57
|
28
|
50
|
|
|
|
1942
|
$self->debug_code
|
58
|
|
|
|
|
|
|
or return $self;
|
59
|
0
|
|
|
|
|
0
|
my $dump = Data::Dumper ## no critic (LongChainsOfMethodCalls)
|
60
|
|
|
|
|
|
|
->new([ $self->stack ], [ qw(stack) ])
|
61
|
|
|
|
|
|
|
->Indent(1)
|
62
|
|
|
|
|
|
|
->Quotekeys(0)
|
63
|
|
|
|
|
|
|
->Sortkeys(1)
|
64
|
|
|
|
|
|
|
->Useqq(1)
|
65
|
|
|
|
|
|
|
->Dump;
|
66
|
0
|
|
|
|
|
0
|
chomp $dump;
|
67
|
0
|
|
|
|
|
0
|
$self->debug_code->('stack start', $dump);
|
68
|
|
|
|
|
|
|
|
69
|
0
|
|
|
|
|
0
|
return $self;
|
70
|
|
|
|
|
|
|
}
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _parse_rules { ## no critic (ExcessComplexity)
|
73
|
28
|
|
|
28
|
|
67
|
my $self = shift;
|
74
|
|
|
|
|
|
|
|
75
|
28
|
|
|
|
|
431
|
my $content_ref = $self->content_ref;
|
76
|
28
|
|
|
|
|
153
|
for my $stack_item ( @{ $self->stack } ) {
|
|
28
|
|
|
|
|
410
|
|
77
|
445
|
|
|
|
|
8001
|
my $rules = clone( $self->rules );
|
78
|
445
|
|
|
|
|
191308
|
my $pos = $stack_item->{start_pos};
|
79
|
445
|
|
|
|
|
1017
|
my $level = 0;
|
80
|
445
|
|
|
|
|
1182
|
my @level_matched = ( 1 );
|
81
|
445
|
|
|
|
|
817
|
my $has_matched = 0;
|
82
|
445
|
50
|
|
|
|
10865
|
$self->debug_code
|
83
|
|
|
|
|
|
|
and $self->debug_code->('rules start', "$level: Starting at pos $pos.");
|
84
|
445
|
|
|
|
|
3357
|
my (@parent_rules, @parent_pos, %level_and_of, @stack_result);
|
85
|
|
|
|
|
|
|
RULE: {
|
86
|
445
|
|
|
|
|
734
|
my $rule = shift @{$rules};
|
|
32164
|
|
|
|
|
48620
|
|
|
32164
|
|
|
|
|
66165
|
|
87
|
32164
|
100
|
|
|
|
67834
|
if (! $rule) {
|
88
|
2358
|
50
|
|
|
|
39449
|
$self->debug_code
|
89
|
|
|
|
|
|
|
and $self->debug_code->('rules last', "$level: No more rules found.");
|
90
|
2358
|
100
|
|
|
|
15959
|
if (@parent_rules) {
|
91
|
1913
|
|
|
|
|
3806
|
$rules = pop @parent_rules;
|
92
|
1913
|
|
|
|
|
4095
|
() = pop @parent_pos;
|
93
|
1913
|
50
|
|
|
|
31250
|
$self->debug_code
|
94
|
|
|
|
|
|
|
and $self->debug_code->('rules parent', "$level: Going back to parent.");
|
95
|
|
|
|
|
|
|
# delete the parent and match
|
96
|
1913
|
100
|
|
|
|
12684
|
if ( ! $has_matched ) {
|
97
|
|
|
|
|
|
|
LEVEL: ## no critic (DeepNests)
|
98
|
14
|
|
|
|
|
56
|
for my $parent_level ( reverse 0 .. ( $level - 1 ) ) {
|
99
|
14
|
100
|
|
|
|
52
|
if ( exists $level_and_of{$parent_level} ) { ## no critic (DeepNests)
|
100
|
1
|
|
|
|
|
3
|
$level_and_of{$parent_level} = 0;
|
101
|
1
|
|
|
|
|
3
|
last LEVEL;
|
102
|
|
|
|
|
|
|
}
|
103
|
|
|
|
|
|
|
}
|
104
|
|
|
|
|
|
|
}
|
105
|
1913
|
|
|
|
|
3504
|
--$level;
|
106
|
1913
|
|
|
|
|
3890
|
redo RULE;
|
107
|
|
|
|
|
|
|
}
|
108
|
445
|
|
|
|
|
2242
|
last RULE;
|
109
|
|
|
|
|
|
|
}
|
110
|
|
|
|
|
|
|
# goto child
|
111
|
29806
|
100
|
|
|
|
68773
|
if ( ref $rule eq 'ARRAY' ) {
|
112
|
7737
|
|
|
|
|
14683
|
push @parent_rules, $rules;
|
113
|
7737
|
|
|
|
|
14877
|
push @parent_pos, $pos;
|
114
|
7737
|
|
|
|
|
140675
|
$rules = clone($rule);
|
115
|
7737
|
50
|
|
|
|
137366
|
$self->debug_code
|
116
|
|
|
|
|
|
|
and $self->debug_code->('rules child', "$level: Going to child.");
|
117
|
7737
|
|
|
|
|
49978
|
$level_matched[ ++$level ] = 1;
|
118
|
7737
|
|
|
|
|
20280
|
redo RULE;
|
119
|
|
|
|
|
|
|
}
|
120
|
|
|
|
|
|
|
# alternative
|
121
|
22069
|
100
|
|
|
|
50611
|
if ( lc $rule eq 'or' ) {
|
122
|
6231
|
100
|
|
|
|
12161
|
if ($has_matched) {
|
123
|
781
|
|
|
|
|
1408
|
$rules = pop @parent_rules;
|
124
|
781
|
|
|
|
|
15392
|
() = pop @parent_pos;
|
125
|
781
|
|
|
|
|
1471
|
$has_matched = 0;
|
126
|
781
|
50
|
|
|
|
13120
|
$self->debug_code
|
127
|
|
|
|
|
|
|
and $self->debug_code->('rules ignore', "$level: Matched before 'or' so ignore alternatives. Going back to parent.");
|
128
|
781
|
|
|
|
|
4728
|
--$level;
|
129
|
781
|
|
|
|
|
1475
|
redo RULE;
|
130
|
|
|
|
|
|
|
}
|
131
|
|
|
|
|
|
|
$self->debug_code
|
132
|
5450
|
50
|
|
|
|
88094
|
and $self->debug_code->('rules try', "$level: Not matched so try next alternative.");
|
133
|
5450
|
|
|
|
|
37139
|
$level_matched[$level] = 1;
|
134
|
5450
|
|
|
|
|
10963
|
redo RULE;
|
135
|
|
|
|
|
|
|
}
|
136
|
|
|
|
|
|
|
# to expect the next match
|
137
|
15838
|
100
|
|
|
|
33106
|
if ( lc $rule eq 'and' ) {
|
138
|
2085
|
100
|
|
|
|
5605
|
if ( ! exists $level_and_of{$level} ) {
|
139
|
403
|
|
|
|
|
1073
|
$level_and_of{$level} = 1;
|
140
|
|
|
|
|
|
|
}
|
141
|
2085
|
50
|
|
|
|
4603
|
if ( $level_matched[$level] ) {
|
142
|
2085
|
50
|
|
|
|
36130
|
$self->debug_code
|
143
|
|
|
|
|
|
|
and $self->debug_code->('rules next', "$level: And next rule.");
|
144
|
2085
|
|
|
|
|
13618
|
redo RULE;
|
145
|
|
|
|
|
|
|
}
|
146
|
0
|
|
|
|
|
0
|
$rules = pop @parent_rules;
|
147
|
0
|
|
|
|
|
0
|
() = pop @parent_pos;
|
148
|
0
|
0
|
|
|
|
0
|
$self->debug_code
|
149
|
|
|
|
|
|
|
and $self->debug_code->('rules ignore following', "$level: Ignore following. Going back to parent.");
|
150
|
0
|
|
|
|
|
0
|
--$level;
|
151
|
0
|
|
|
|
|
0
|
redo RULE;
|
152
|
|
|
|
|
|
|
}
|
153
|
13753
|
100
|
|
|
|
29308
|
if ( lc $rule eq 'begin' ) {
|
154
|
5327
|
|
|
|
|
8576
|
@stack_result = ();
|
155
|
5327
|
50
|
|
|
|
86107
|
$self->debug_code
|
156
|
|
|
|
|
|
|
and $self->debug_code->('rules begin', "$level: Begin.");
|
157
|
5327
|
|
|
|
|
31759
|
redo RULE;
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
# done
|
160
|
8426
|
100
|
|
|
|
18594
|
if ( lc $rule eq 'end' ) {
|
161
|
|
|
|
|
|
|
my $is_and
|
162
|
|
|
|
|
|
|
= ! exists $level_and_of{$level}
|
163
|
|
|
|
|
|
|
|| exists $level_and_of{$level}
|
164
|
444
|
|
66
|
|
|
2377
|
&& $level_and_of{$level};
|
165
|
444
|
50
|
|
|
|
1137
|
if ($is_and) {
|
166
|
444
|
|
|
|
|
768
|
push @{ $stack_item->{match} }, @stack_result;
|
|
444
|
|
|
|
|
2334
|
|
167
|
444
|
50
|
|
|
|
7703
|
$self->debug_code
|
168
|
|
|
|
|
|
|
and $self->debug_code->('rules end', "$level: End, so store data.");
|
169
|
|
|
|
|
|
|
}
|
170
|
444
|
|
|
|
|
2941
|
redo RULE;
|
171
|
|
|
|
|
|
|
}
|
172
|
|
|
|
|
|
|
# ref $rule is 'Regexp' or $rule is code
|
173
|
7982
|
|
|
|
|
12503
|
pos ${$content_ref} = $pos;
|
|
7982
|
|
|
|
|
92936
|
|
174
|
7982
|
50
|
|
|
|
138859
|
$self->debug_code
|
175
|
|
|
|
|
|
|
and $self->debug_code->('rules current pos', "$level: Set the current pos to $pos.");
|
176
|
|
|
|
|
|
|
$has_matched
|
177
|
|
|
|
|
|
|
= my ($full_match, @result)
|
178
|
|
|
|
|
|
|
= ref $rule eq 'CODE'
|
179
|
|
|
|
|
|
|
? $rule->($content_ref)
|
180
|
7982
|
100
|
|
|
|
54289
|
: ${$content_ref} =~ m{ \G ( $rule ) }xms;
|
|
7888
|
|
|
|
|
334399
|
|
181
|
7982
|
|
66
|
|
|
47199
|
$level_matched[$level] &&= $has_matched;
|
182
|
7982
|
100
|
|
|
|
18703
|
if ( exists $level_and_of{$level} ) {
|
183
|
1241
|
|
100
|
|
|
4072
|
$level_and_of{$level} &&= $has_matched;
|
184
|
|
|
|
|
|
|
}
|
185
|
7982
|
100
|
|
|
|
16464
|
if ($has_matched) {
|
186
|
2530
|
|
|
|
|
5484
|
push @stack_result, @result;
|
187
|
2530
|
|
|
|
|
6903
|
$pos += length $full_match;
|
188
|
|
|
|
|
|
|
$self->debug_code
|
189
|
2530
|
50
|
|
|
|
49105
|
and do {
|
190
|
0
|
0
|
|
|
|
0
|
my $rule_qr = ref $rule eq 'CODE' ? $rule->() : $rule;
|
191
|
0
|
|
|
|
|
0
|
$self->debug_code->(
|
192
|
|
|
|
|
|
|
'rules match',
|
193
|
|
|
|
|
|
|
"$level: Rule\n$rule_qr\nhas matched\n$full_match\nThe current pos is $pos.",
|
194
|
|
|
|
|
|
|
);
|
195
|
|
|
|
|
|
|
};
|
196
|
2530
|
|
|
|
|
19524
|
redo RULE;
|
197
|
|
|
|
|
|
|
}
|
198
|
5452
|
|
|
|
|
9787
|
$rules = pop @parent_rules;
|
199
|
5452
|
|
|
|
|
16887
|
$pos = pop @parent_pos;
|
200
|
|
|
|
|
|
|
$self->debug_code
|
201
|
5452
|
50
|
|
|
|
100437
|
and do {
|
202
|
0
|
0
|
|
|
|
0
|
my $rule_qr = ref $rule eq 'CODE' ? $rule->() : $rule;
|
203
|
0
|
|
|
|
|
0
|
$self->debug_code->(
|
204
|
|
|
|
|
|
|
'rules no match',
|
205
|
|
|
|
|
|
|
"$level: Rule\n$rule_qr\nhas not matched. Going back to parent.",
|
206
|
|
|
|
|
|
|
);
|
207
|
|
|
|
|
|
|
};
|
208
|
5452
|
|
|
|
|
33750
|
--$level;
|
209
|
5452
|
|
|
|
|
13302
|
redo RULE;
|
210
|
|
|
|
|
|
|
}
|
211
|
|
|
|
|
|
|
}
|
212
|
|
|
|
|
|
|
|
213
|
28
|
|
|
|
|
95
|
return $self;
|
214
|
|
|
|
|
|
|
}
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _cleanup_and_calculate_reference {
|
217
|
28
|
|
|
28
|
|
58
|
my $self = shift;
|
218
|
|
|
|
|
|
|
|
219
|
28
|
|
|
|
|
481
|
my $stack = $self->stack;
|
220
|
28
|
|
|
|
|
587
|
my $content_ref = $self->content_ref;
|
221
|
28
|
|
|
|
|
94
|
@{$stack} = map {
|
222
|
|
|
|
|
|
|
exists $_->{match}
|
223
|
445
|
100
|
|
|
|
903
|
? do {
|
224
|
|
|
|
|
|
|
# calculate reference
|
225
|
444
|
|
|
|
|
623
|
my $pre_match = substr ${$content_ref}, 0, $_->{start_pos};
|
|
444
|
|
|
|
|
5459
|
|
226
|
444
|
|
|
|
|
2061
|
my $newline_count = $pre_match =~ tr{\n}{\n};
|
227
|
444
|
|
|
|
|
1158
|
$_->{line_number} = $newline_count + 1;
|
228
|
444
|
|
|
|
|
1124
|
$_;
|
229
|
|
|
|
|
|
|
}
|
230
|
|
|
|
|
|
|
# cleanup
|
231
|
|
|
|
|
|
|
: ();
|
232
|
28
|
|
|
|
|
201
|
} @{$stack};
|
|
28
|
|
|
|
|
84
|
|
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# debug if requested
|
235
|
28
|
50
|
|
|
|
530
|
$self->debug_code
|
236
|
|
|
|
|
|
|
or return $self;
|
237
|
0
|
|
|
|
|
0
|
my $dump = Data::Dumper ## no critic (LongChainsOfMethodCalls)
|
238
|
|
|
|
|
|
|
->new([ $self->stack ], [ qw(stack) ])
|
239
|
|
|
|
|
|
|
->Indent(1)
|
240
|
|
|
|
|
|
|
->Quotekeys(0)
|
241
|
|
|
|
|
|
|
->Sortkeys(1)
|
242
|
|
|
|
|
|
|
->Useqq(1)
|
243
|
|
|
|
|
|
|
->Dump;
|
244
|
0
|
|
|
|
|
0
|
chomp $dump;
|
245
|
0
|
|
|
|
|
0
|
$self->debug_code->('stack clean', $dump);
|
246
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
0
|
return $self;
|
248
|
|
|
|
|
|
|
}
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub extract {
|
251
|
28
|
|
|
28
|
1
|
86
|
my ($self, $arg_ref) = @_;
|
252
|
|
|
|
|
|
|
|
253
|
28
|
|
|
|
|
125
|
$self->_parse_pos;
|
254
|
28
|
|
|
|
|
288
|
$self->_parse_rules;
|
255
|
28
|
|
|
|
|
157
|
$self->_cleanup_and_calculate_reference;
|
256
|
|
|
|
|
|
|
|
257
|
28
|
|
|
|
|
253
|
return $self;
|
258
|
|
|
|
|
|
|
}
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable;
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
1;
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
__END__
|