line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
5
|
|
|
5
|
|
519602
|
use strictures 1; |
|
5
|
|
|
|
|
41
|
|
|
5
|
|
|
|
|
137
|
|
2
|
|
|
|
|
|
|
package Mojito::Page::Parse; |
3
|
|
|
|
|
|
|
{ |
4
|
|
|
|
|
|
|
$Mojito::Page::Parse::VERSION = '0.24'; |
5
|
|
|
|
|
|
|
} |
6
|
5
|
|
|
5
|
|
578
|
use 5.010; |
|
5
|
|
|
|
|
15
|
|
|
5
|
|
|
|
|
285
|
|
7
|
5
|
|
|
5
|
|
3232
|
use Moo; |
|
5
|
|
|
|
|
54119
|
|
|
5
|
|
|
|
|
34
|
|
8
|
5
|
|
|
5
|
|
11135
|
use MooX::Types::MooseLike::Base qw(:all); |
|
5
|
|
|
|
|
54977
|
|
|
5
|
|
|
|
|
2422
|
|
9
|
|
|
|
|
|
|
|
10
|
5
|
|
|
5
|
|
4804
|
use Data::Dumper::Concise; |
|
5
|
|
|
|
|
60611
|
|
|
5
|
|
|
|
|
6368
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 Name |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Mojito::Page::Parse - turn page source into a page structure |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=cut |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# This is the page source |
19
|
|
|
|
|
|
|
has 'page' => ( |
20
|
|
|
|
|
|
|
is => 'rw', |
21
|
|
|
|
|
|
|
isa => Value, |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
has 'sections' => ( |
24
|
|
|
|
|
|
|
is => 'ro', |
25
|
|
|
|
|
|
|
isa => ArrayRef[HashRef], |
26
|
|
|
|
|
|
|
lazy => 1, |
27
|
|
|
|
|
|
|
builder => 'build_sections', |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
has 'page_structure' => ( |
30
|
|
|
|
|
|
|
is => 'rw', |
31
|
|
|
|
|
|
|
isa => HashRef, |
32
|
|
|
|
|
|
|
lazy => 1, |
33
|
|
|
|
|
|
|
builder => 'build_page_structure', |
34
|
|
|
|
|
|
|
); |
35
|
|
|
|
|
|
|
has 'default_format' => ( |
36
|
|
|
|
|
|
|
is => 'rw', |
37
|
|
|
|
|
|
|
isa => Value, |
38
|
|
|
|
|
|
|
lazy => 1, |
39
|
|
|
|
|
|
|
default => sub { 'HTML' }, |
40
|
|
|
|
|
|
|
); |
41
|
|
|
|
|
|
|
has 'created' => ( |
42
|
|
|
|
|
|
|
is => 'ro', |
43
|
|
|
|
|
|
|
isa => Int, |
44
|
|
|
|
|
|
|
); |
45
|
|
|
|
|
|
|
has 'last_modified' => ( |
46
|
|
|
|
|
|
|
is => 'ro', |
47
|
|
|
|
|
|
|
isa => Int, |
48
|
|
|
|
|
|
|
default => sub { time() }, |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
has 'section_open_regex' => ( |
51
|
|
|
|
|
|
|
is => 'ro', |
52
|
|
|
|
|
|
|
isa => RegexpRef, |
53
|
|
|
|
|
|
|
default => sub { qr/<sx\.[^>]+>/ }, |
54
|
|
|
|
|
|
|
); |
55
|
|
|
|
|
|
|
has 'section_close_regex' => ( |
56
|
|
|
|
|
|
|
is => 'ro', |
57
|
|
|
|
|
|
|
isa => RegexpRef, |
58
|
|
|
|
|
|
|
default => sub { qr(</sx>) }, |
59
|
|
|
|
|
|
|
); |
60
|
|
|
|
|
|
|
has 'debug' => ( |
61
|
|
|
|
|
|
|
is => 'rw', |
62
|
|
|
|
|
|
|
isa => Bool, |
63
|
|
|
|
|
|
|
default => sub { 0 }, |
64
|
|
|
|
|
|
|
); |
65
|
|
|
|
|
|
|
has 'messages' => ( |
66
|
|
|
|
|
|
|
is => 'rw', |
67
|
|
|
|
|
|
|
isa => ArrayRef, |
68
|
|
|
|
|
|
|
default => sub { [] }, |
69
|
|
|
|
|
|
|
); |
70
|
|
|
|
|
|
|
has 'message_string' => ( |
71
|
|
|
|
|
|
|
is => 'ro', |
72
|
|
|
|
|
|
|
isa => Value, |
73
|
|
|
|
|
|
|
lazy => 1, |
74
|
|
|
|
|
|
|
builder => '_build_message_string', |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
sub _build_message_string { |
77
|
2
|
|
|
2
|
|
2570
|
my ($self) = (shift); |
78
|
2
|
50
|
|
|
|
33
|
return join ', ', @{$self->messages} if $self->messages; |
|
2
|
|
|
|
|
667
|
|
79
|
0
|
|
|
|
|
0
|
return; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 has_nested_section |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Test if we have nested sections. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=cut |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub has_nested_section { |
89
|
5
|
|
|
5
|
1
|
2089
|
my ($self) = @_; |
90
|
|
|
|
|
|
|
|
91
|
5
|
|
|
|
|
31
|
my $section_open_regex = $self->section_open_regex; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#die "Got no page" if !$self->page; |
94
|
5
|
|
|
|
|
74
|
my @stuff_between_section_opens = |
95
|
|
|
|
|
|
|
$self->page =~ m/${section_open_regex}(.*?)${section_open_regex}/si; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# If when find a section ending tag in the middle of the two consecutive |
98
|
|
|
|
|
|
|
# opening section tags then we know first section has been closed and thus |
99
|
|
|
|
|
|
|
# does NOT contain a nested section. |
100
|
5
|
|
|
|
|
7034
|
foreach my $tweener (@stuff_between_section_opens) { |
101
|
4
|
100
|
|
|
|
24
|
if ( $tweener =~ m/<\/sx>/ ) { |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# The tweener section could cause us to think we're not nested |
104
|
|
|
|
|
|
|
# due to an nested section of the general type (not the class=mc_ type) |
105
|
|
|
|
|
|
|
# In this case we need to count the number of open and closed sections |
106
|
|
|
|
|
|
|
# If they are the same then we dont' have </sec> left over to close the first |
107
|
|
|
|
|
|
|
# and thus we have a nest. |
108
|
2
|
|
|
|
|
10
|
my @opens = $tweener =~ m/(<sx[^>]*>)/sg; |
109
|
2
|
|
|
|
|
29
|
my @closes = $tweener =~ m/(<\/sx>)/sg; |
110
|
2
|
50
|
|
|
|
15
|
if ( scalar @opens == scalar @closes ) { |
111
|
0
|
|
|
|
|
0
|
return 1; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
else { |
115
|
2
|
|
|
|
|
13
|
return 1; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
3
|
|
|
|
|
19
|
return 0; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head2 add_implicit_sections |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Add implicit sections to assist the building of the page_struct. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub add_implicit_sections { |
129
|
8
|
|
|
8
|
1
|
2899
|
my ($self) = @_; |
130
|
|
|
|
|
|
|
|
131
|
8
|
|
|
|
|
209
|
my $page = $self->page; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Add implicit sections in between explicit sections (if needed) |
134
|
8
|
100
|
|
|
|
99
|
if ( $page =~ m/<\/sx>(?!\s*<sx\.).*?<sx\./si ) { |
135
|
5
|
|
|
|
|
61
|
$page =~ s/<\/sx>(?!\s*<sx\.)(.*?)<sx\./<\/sx>\n<sx.Implicit>$1<\/sx>\n<sx./sig; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Add implicit section at the beginning (if needed) |
139
|
8
|
|
|
|
|
58
|
$page =~ s/(?<!<sx\.\w)(<sx\.\w)/<\/sx>\n$1/si; |
140
|
8
|
|
|
|
|
27
|
$page = "\n<sx.Implicit>\n${page}"; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Add implicit section at the end (if needed) |
143
|
8
|
|
|
|
|
68
|
$page =~ s/(<\/sx>)(?!.*<\/sx>)/$1\n<sx.Implicit>/si; |
144
|
8
|
|
|
|
|
17
|
$page .= '</sx>'; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# cut empty implicits |
147
|
8
|
|
|
|
|
43
|
$page =~ s/<sx\.Implicit>\s*<\/sx>//sig; |
148
|
|
|
|
|
|
|
|
149
|
8
|
50
|
|
|
|
174
|
if ( $self->debug ) { |
150
|
0
|
|
|
|
|
0
|
say "PREMATCH: ", ${^PREMATCH}; |
151
|
0
|
|
|
|
|
0
|
say "MATCH: ${^MATCH}"; |
152
|
0
|
|
|
|
|
0
|
say "POSTMATCH: ", ${^POSTMATCH}; |
153
|
0
|
|
|
|
|
0
|
say "page: $page"; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
8
|
|
|
|
|
25596
|
return $page; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 parse_sections |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Extract section class and content from the page. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub parse_sections { |
166
|
3
|
|
|
3
|
1
|
36
|
my ( $self, $page ) = @_; |
167
|
|
|
|
|
|
|
|
168
|
3
|
|
|
|
|
6
|
my $sections; |
169
|
3
|
|
|
|
|
46
|
my @sections = $page =~ m/(<sx\.[^>]+>.*?<\/sx>)/sig; |
170
|
3
|
|
|
|
|
10
|
foreach my $sx (@sections) { |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Extract class and content |
173
|
11
|
|
|
|
|
64
|
my ( $class, $content ) = $sx =~ m/<sx\.([^>]+)>(.*)?<\/sx>/si; |
174
|
11
|
|
|
|
|
20
|
push @{$sections}, { class => $class, content => $content }; |
|
11
|
|
|
|
|
54
|
|
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
3
|
|
|
|
|
86
|
return $sections; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 build_sections |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Wrap up the getting of sections process. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub build_sections { |
187
|
3
|
|
|
3
|
1
|
1975
|
my $self = shift; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Deal with nested sections gracefully by adding a message |
190
|
|
|
|
|
|
|
# to bubble up to the view and display in the #message_area. |
191
|
3
|
100
|
|
|
|
17
|
if ( $self->has_nested_section ) { |
192
|
1
|
|
|
|
|
3
|
$self->messages( [ @{$self->messages}, 'haz nested sexes'] ); |
|
1
|
|
|
|
|
5
|
|
193
|
|
|
|
|
|
|
} |
194
|
3
|
|
|
|
|
734
|
my $page = $self->add_implicit_sections; |
195
|
|
|
|
|
|
|
|
196
|
3
|
|
|
|
|
40
|
return $self->parse_sections($page); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 build_page_structure |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
It's just an href that we'll persist as a Mongo document. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub build_page_structure { |
206
|
1
|
|
|
1
|
1
|
769
|
my $self = shift; |
207
|
|
|
|
|
|
|
|
208
|
1
|
|
|
|
|
6
|
my $return = { |
209
|
|
|
|
|
|
|
sections => $self->sections, |
210
|
|
|
|
|
|
|
default_format => $self->default_format, |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# created => '1234567890', |
213
|
|
|
|
|
|
|
# last_modified => time(), |
214
|
|
|
|
|
|
|
page_source => $self->page, |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Set the message last to pick any builder message above |
217
|
|
|
|
|
|
|
# e.g. ->sections can set a 'nested sections' message. |
218
|
|
|
|
|
|
|
message => $self->message_string, |
219
|
|
|
|
|
|
|
}; |
220
|
1
|
|
|
|
|
89
|
return $return; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
1 |