| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
5
|
|
|
5
|
|
519602
|
use strictures 1; |
|
|
5
|
|
|
|
|
41
|
|
|
|
5
|
|
|
|
|
137
|
|
|
2
|
|
|
|
|
|
|
package Mojito::Page::Parse; |
|
3
|
|
|
|
|
|
|
{ |
|
4
|
|
|
|
|
|
|
$Mojito::Page::Parse::VERSION = '0.24'; |
|
5
|
|
|
|
|
|
|
} |
|
6
|
5
|
|
|
5
|
|
578
|
use 5.010; |
|
|
5
|
|
|
|
|
15
|
|
|
|
5
|
|
|
|
|
285
|
|
|
7
|
5
|
|
|
5
|
|
3232
|
use Moo; |
|
|
5
|
|
|
|
|
54119
|
|
|
|
5
|
|
|
|
|
34
|
|
|
8
|
5
|
|
|
5
|
|
11135
|
use MooX::Types::MooseLike::Base qw(:all); |
|
|
5
|
|
|
|
|
54977
|
|
|
|
5
|
|
|
|
|
2422
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
5
|
|
|
5
|
|
4804
|
use Data::Dumper::Concise; |
|
|
5
|
|
|
|
|
60611
|
|
|
|
5
|
|
|
|
|
6368
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 Name |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Mojito::Page::Parse - turn page source into a page structure |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=cut |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# This is the page source |
|
19
|
|
|
|
|
|
|
has 'page' => ( |
|
20
|
|
|
|
|
|
|
is => 'rw', |
|
21
|
|
|
|
|
|
|
isa => Value, |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
has 'sections' => ( |
|
24
|
|
|
|
|
|
|
is => 'ro', |
|
25
|
|
|
|
|
|
|
isa => ArrayRef[HashRef], |
|
26
|
|
|
|
|
|
|
lazy => 1, |
|
27
|
|
|
|
|
|
|
builder => 'build_sections', |
|
28
|
|
|
|
|
|
|
); |
|
29
|
|
|
|
|
|
|
has 'page_structure' => ( |
|
30
|
|
|
|
|
|
|
is => 'rw', |
|
31
|
|
|
|
|
|
|
isa => HashRef, |
|
32
|
|
|
|
|
|
|
lazy => 1, |
|
33
|
|
|
|
|
|
|
builder => 'build_page_structure', |
|
34
|
|
|
|
|
|
|
); |
|
35
|
|
|
|
|
|
|
has 'default_format' => ( |
|
36
|
|
|
|
|
|
|
is => 'rw', |
|
37
|
|
|
|
|
|
|
isa => Value, |
|
38
|
|
|
|
|
|
|
lazy => 1, |
|
39
|
|
|
|
|
|
|
default => sub { 'HTML' }, |
|
40
|
|
|
|
|
|
|
); |
|
41
|
|
|
|
|
|
|
has 'created' => ( |
|
42
|
|
|
|
|
|
|
is => 'ro', |
|
43
|
|
|
|
|
|
|
isa => Int, |
|
44
|
|
|
|
|
|
|
); |
|
45
|
|
|
|
|
|
|
has 'last_modified' => ( |
|
46
|
|
|
|
|
|
|
is => 'ro', |
|
47
|
|
|
|
|
|
|
isa => Int, |
|
48
|
|
|
|
|
|
|
default => sub { time() }, |
|
49
|
|
|
|
|
|
|
); |
|
50
|
|
|
|
|
|
|
has 'section_open_regex' => ( |
|
51
|
|
|
|
|
|
|
is => 'ro', |
|
52
|
|
|
|
|
|
|
isa => RegexpRef, |
|
53
|
|
|
|
|
|
|
default => sub { qr/<sx\.[^>]+>/ }, |
|
54
|
|
|
|
|
|
|
); |
|
55
|
|
|
|
|
|
|
has 'section_close_regex' => ( |
|
56
|
|
|
|
|
|
|
is => 'ro', |
|
57
|
|
|
|
|
|
|
isa => RegexpRef, |
|
58
|
|
|
|
|
|
|
default => sub { qr(</sx>) }, |
|
59
|
|
|
|
|
|
|
); |
|
60
|
|
|
|
|
|
|
has 'debug' => ( |
|
61
|
|
|
|
|
|
|
is => 'rw', |
|
62
|
|
|
|
|
|
|
isa => Bool, |
|
63
|
|
|
|
|
|
|
default => sub { 0 }, |
|
64
|
|
|
|
|
|
|
); |
|
65
|
|
|
|
|
|
|
has 'messages' => ( |
|
66
|
|
|
|
|
|
|
is => 'rw', |
|
67
|
|
|
|
|
|
|
isa => ArrayRef, |
|
68
|
|
|
|
|
|
|
default => sub { [] }, |
|
69
|
|
|
|
|
|
|
); |
|
70
|
|
|
|
|
|
|
has 'message_string' => ( |
|
71
|
|
|
|
|
|
|
is => 'ro', |
|
72
|
|
|
|
|
|
|
isa => Value, |
|
73
|
|
|
|
|
|
|
lazy => 1, |
|
74
|
|
|
|
|
|
|
builder => '_build_message_string', |
|
75
|
|
|
|
|
|
|
); |
|
76
|
|
|
|
|
|
|
sub _build_message_string { |
|
77
|
2
|
|
|
2
|
|
2570
|
my ($self) = (shift); |
|
78
|
2
|
50
|
|
|
|
33
|
return join ', ', @{$self->messages} if $self->messages; |
|
|
2
|
|
|
|
|
667
|
|
|
79
|
0
|
|
|
|
|
0
|
return; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 has_nested_section |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Test if we have nested sections. |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=cut |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub has_nested_section { |
|
89
|
5
|
|
|
5
|
1
|
2089
|
my ($self) = @_; |
|
90
|
|
|
|
|
|
|
|
|
91
|
5
|
|
|
|
|
31
|
my $section_open_regex = $self->section_open_regex; |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#die "Got no page" if !$self->page; |
|
94
|
5
|
|
|
|
|
74
|
my @stuff_between_section_opens = |
|
95
|
|
|
|
|
|
|
$self->page =~ m/${section_open_regex}(.*?)${section_open_regex}/si; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# If when find a section ending tag in the middle of the two consecutive |
|
98
|
|
|
|
|
|
|
# opening section tags then we know first section has been closed and thus |
|
99
|
|
|
|
|
|
|
# does NOT contain a nested section. |
|
100
|
5
|
|
|
|
|
7034
|
foreach my $tweener (@stuff_between_section_opens) { |
|
101
|
4
|
100
|
|
|
|
24
|
if ( $tweener =~ m/<\/sx>/ ) { |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# The tweener section could cause us to think we're not nested |
|
104
|
|
|
|
|
|
|
# due to an nested section of the general type (not the class=mc_ type) |
|
105
|
|
|
|
|
|
|
# In this case we need to count the number of open and closed sections |
|
106
|
|
|
|
|
|
|
# If they are the same then we dont' have </sec> left over to close the first |
|
107
|
|
|
|
|
|
|
# and thus we have a nest. |
|
108
|
2
|
|
|
|
|
10
|
my @opens = $tweener =~ m/(<sx[^>]*>)/sg; |
|
109
|
2
|
|
|
|
|
29
|
my @closes = $tweener =~ m/(<\/sx>)/sg; |
|
110
|
2
|
50
|
|
|
|
15
|
if ( scalar @opens == scalar @closes ) { |
|
111
|
0
|
|
|
|
|
0
|
return 1; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
else { |
|
115
|
2
|
|
|
|
|
13
|
return 1; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
3
|
|
|
|
|
19
|
return 0; |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head2 add_implicit_sections |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Add implicit sections to assist the building of the page_struct. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub add_implicit_sections { |
|
129
|
8
|
|
|
8
|
1
|
2899
|
my ($self) = @_; |
|
130
|
|
|
|
|
|
|
|
|
131
|
8
|
|
|
|
|
209
|
my $page = $self->page; |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Add implicit sections in between explicit sections (if needed) |
|
134
|
8
|
100
|
|
|
|
99
|
if ( $page =~ m/<\/sx>(?!\s*<sx\.).*?<sx\./si ) { |
|
135
|
5
|
|
|
|
|
61
|
$page =~ s/<\/sx>(?!\s*<sx\.)(.*?)<sx\./<\/sx>\n<sx.Implicit>$1<\/sx>\n<sx./sig; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Add implicit section at the beginning (if needed) |
|
139
|
8
|
|
|
|
|
58
|
$page =~ s/(?<!<sx\.\w)(<sx\.\w)/<\/sx>\n$1/si; |
|
140
|
8
|
|
|
|
|
27
|
$page = "\n<sx.Implicit>\n${page}"; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Add implicit section at the end (if needed) |
|
143
|
8
|
|
|
|
|
68
|
$page =~ s/(<\/sx>)(?!.*<\/sx>)/$1\n<sx.Implicit>/si; |
|
144
|
8
|
|
|
|
|
17
|
$page .= '</sx>'; |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# cut empty implicits |
|
147
|
8
|
|
|
|
|
43
|
$page =~ s/<sx\.Implicit>\s*<\/sx>//sig; |
|
148
|
|
|
|
|
|
|
|
|
149
|
8
|
50
|
|
|
|
174
|
if ( $self->debug ) { |
|
150
|
0
|
|
|
|
|
0
|
say "PREMATCH: ", ${^PREMATCH}; |
|
151
|
0
|
|
|
|
|
0
|
say "MATCH: ${^MATCH}"; |
|
152
|
0
|
|
|
|
|
0
|
say "POSTMATCH: ", ${^POSTMATCH}; |
|
153
|
0
|
|
|
|
|
0
|
say "page: $page"; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
8
|
|
|
|
|
25596
|
return $page; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 parse_sections |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Extract section class and content from the page. |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub parse_sections { |
|
166
|
3
|
|
|
3
|
1
|
36
|
my ( $self, $page ) = @_; |
|
167
|
|
|
|
|
|
|
|
|
168
|
3
|
|
|
|
|
6
|
my $sections; |
|
169
|
3
|
|
|
|
|
46
|
my @sections = $page =~ m/(<sx\.[^>]+>.*?<\/sx>)/sig; |
|
170
|
3
|
|
|
|
|
10
|
foreach my $sx (@sections) { |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Extract class and content |
|
173
|
11
|
|
|
|
|
64
|
my ( $class, $content ) = $sx =~ m/<sx\.([^>]+)>(.*)?<\/sx>/si; |
|
174
|
11
|
|
|
|
|
20
|
push @{$sections}, { class => $class, content => $content }; |
|
|
11
|
|
|
|
|
54
|
|
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
3
|
|
|
|
|
86
|
return $sections; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 build_sections |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Wrap up the getting of sections process. |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub build_sections { |
|
187
|
3
|
|
|
3
|
1
|
1975
|
my $self = shift; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Deal with nested sections gracefully by adding a message |
|
190
|
|
|
|
|
|
|
# to bubble up to the view and display in the #message_area. |
|
191
|
3
|
100
|
|
|
|
17
|
if ( $self->has_nested_section ) { |
|
192
|
1
|
|
|
|
|
3
|
$self->messages( [ @{$self->messages}, 'haz nested sexes'] ); |
|
|
1
|
|
|
|
|
5
|
|
|
193
|
|
|
|
|
|
|
} |
|
194
|
3
|
|
|
|
|
734
|
my $page = $self->add_implicit_sections; |
|
195
|
|
|
|
|
|
|
|
|
196
|
3
|
|
|
|
|
40
|
return $self->parse_sections($page); |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 build_page_structure |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
It's just an href that we'll persist as a Mongo document. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub build_page_structure { |
|
206
|
1
|
|
|
1
|
1
|
769
|
my $self = shift; |
|
207
|
|
|
|
|
|
|
|
|
208
|
1
|
|
|
|
|
6
|
my $return = { |
|
209
|
|
|
|
|
|
|
sections => $self->sections, |
|
210
|
|
|
|
|
|
|
default_format => $self->default_format, |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# created => '1234567890', |
|
213
|
|
|
|
|
|
|
# last_modified => time(), |
|
214
|
|
|
|
|
|
|
page_source => $self->page, |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Set the message last to pick any builder message above |
|
217
|
|
|
|
|
|
|
# e.g. ->sections can set a 'nested sections' message. |
|
218
|
|
|
|
|
|
|
message => $self->message_string, |
|
219
|
|
|
|
|
|
|
}; |
|
220
|
1
|
|
|
|
|
89
|
return $return; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
1 |