File Coverage

lib/App/Followme/Web.pm

Criterion	Covered	Total	%
statement	180	185	97.3
branch	80	94	85.1
condition	8	12	66.6
subroutine	19	19	100.0
pod	9	13	69.2
total	296	323	91.6

line	stmt	bran	cond	sub	pod	time	code
1							package App::Followme::Web;
2
3	20			20		2514	use 5.008005;
	20					115
4	20			20		116	use strict;
	20					46
	20					538
5	20			20		108	use warnings;
	20					56
	20					679
6	20			20		698	use integer;
	20					63
	20					139
7	20			20		1179	use lib '../..';
	20					761
	20					126
8
9	20			20		2965	use Carp;
	20					66
	20					47979
10
11							require Exporter;
12							our @ISA = qw(Exporter);
13							our @EXPORT = qw(web_has_variables web_match_tags web_parse_sections
14							web_parse_tag web_only_tags web_only_text web_split_at_tags
15							web_substitute_sections web_substitute_tags
16							web_titled_sections);
17
18							our $VERSION = "2.01";
19
20							#----------------------------------------------------------------------
21							# Extract a list of parsed tags from a text
22
23							sub web_extract_tags {
24	230			230	0	388	my ($text) = @_;
25
26	230					437	my @tokens = web_split_at_tags($text);
27	230					515	return web_only_tags(@tokens);
28							}
29
30							#----------------------------------------------------------------------
31							# Return true if any of a list of variables is present in a template
32
33							sub web_has_variables {
34	16			16	1	3846	my ($text, @search_variables) = @_;
35
36	16					327	my @template_variables = $text =~ /([\$\@]\w+)/g;
37	16					55	my %template_variables = map {$_ => 1} @template_variables;
	175					361
38
39	16					61	for my $variable (@search_variables) {
40	29	100				92	return 1 if $template_variables{$variable};
41							}
42
43	14					84	return 0;
44							}
45
46							#----------------------------------------------------------------------
47							# Is the token an html tag?
48
49							sub web_is_tag {
50	8368			8368	0	18006	my ($token) = @_;
51	8368	100				24100	return $token =~ /^<[^!]/ ? 1 : 0;
52							}
53
54							#----------------------------------------------------------------------
55							# Call a function after a set of tags is matched
56
57							sub web_match_tags {
58	191			191	1	2824	my ($pattern, $text, $matcher, $metadata, $global) = @_;
59
60	191					253	my @tokens;
61	191					261	my $in = 0;
62	191					252	my $match_count = 0;
63	191					397	my @matches = web_extract_tags($pattern);
64
65	191					379	foreach my $token (web_split_at_tags($text)) {
66	1895	100				2838	if (web_is_tag($token)) {
67	943					1518	my $tag = web_parse_tag($token);
68	943	100				1817	if (web_same_tag($matches[$in], $tag)) {
69	407					545	$in += 1;
70	407	100				986	if ($in >= @matches) {
71	270					505	push(@tokens, $token);
72	270					813	$matcher->($metadata, @tokens);
73	270					464	$match_count += 1;
74	270					432	@tokens = ();
75	270					371	$in = 0;
76
77	270	100				774	last unless $global;
78							}
79							}
80							}
81	1758	100				3750	push(@tokens, $token) if $in > 0;
82							}
83
84	191					895	return $match_count;
85							}
86
87							#----------------------------------------------------------------------
88							# Extract a list of parsed tags from a set of tokens
89
90							sub web_only_tags {
91	364			364	1	730	my (@tokens) = @_;
92
93	364					478	my @tags;
94	364					608	foreach my $token (@tokens) {
95	546	100				943	if (web_is_tag($token)) {
96	541					1084	push(@tags, web_parse_tag($token));
97							}
98							}
99
100	364					965	return @tags;
101							}
102
103							#----------------------------------------------------------------------
104							# Parse a text string from a set of tokens
105
106							sub web_only_text {
107	195			195	1	1333	my (@tokens) = @_;
108
109	195					292	my @text;
110	195					351	foreach my $token (@tokens) {
111	617	100				948	push(@text, $token) unless web_is_tag($token);
112							}
113
114	195					540	my $text = join(' ', @text);
115	195					1206	$text =~ s/\s+/ /g;
116	195					489	$text =~ s/^\s+//;
117	195					602	$text =~ s/\s+$//;
118
119	195					606	return $text;
120							}
121
122							#----------------------------------------------------------------------
123							# Extract sections from file, store in hash
124
125							sub web_parse_sections {
126	50			50	1	1743	my ($text) = @_;
127
128	50					92	my $name;
129							my %section;
130
131							# Extract sections from input
132
133	50					858	my @tokens = split (/(<!--\s(?:section\|endsection)\s+.?-->)/, $text);
134
135	50					155	foreach my $token (@tokens) {
136	622	100				2152	if ($token =~ /^<!--\ssection\s+(\w+).?-->/) {
		100
		100
137	143	50				275	if (defined $name) {
138	0					0	die "Nested sections in input: $token\n";
139							}
140	143					328	$name = $1;
141
142							} elsif ($token =~ /^<!--\sendsection\s+(\w+).?-->/) {
143	143	50				365	if ($name ne $1) {
144	0					0	die "Nested sections in input: $token\n";
145							}
146	143					231	undef $name;
147
148							} elsif (defined $name) {
149	143					416	$section{$name} = $token;
150							}
151							}
152
153	50	50				124	die "Unmatched section (<!-- section $name -->)\n" if $name;
154	50					205	return \%section;
155							}
156
157							#----------------------------------------------------------------------
158							# Parse a web tag into attributes and their values
159
160							sub web_parse_tag {
161	2767			2767	1	9965	my ($tag) = @_;
162	2767	50				3967	croak "Not a tag: ($tag)" unless web_is_tag($tag);
163
164	2767					4118	my @pattern;
165	2767					3524	my $side = 0;
166	2767					4355	my @pair = (undef, undef);
167
168	2767					9675	while ($tag =~ /(=\|"[^"]*"\|[^<>="\s]+)/gs) {
169	6056					11208	my $token = $1;
170
171	6056	100				12826	if ($token eq '=') {
		100
172	978					1197	$side = 1;
173	978					1307	undef $token;
174							} elsif ($token =~ /^"/) {
175	884					2637	$token =~ s/"//g;
176							}
177
178	6056	100				11866	if (defined $token) {
179	5078	100				8447	if (defined $pair[$side]) {
180	1333	50				2155	if (defined $pair[0]) {
181	1333					2235	push(@pattern, @pair);
182	1333					2424	@pair = (undef, undef);
183	1333					1800	$side = 0;
184							}
185							}
186
187	5078	100				9557	$token = lc($token) if $side == 0;
188	5078					16330	$pair[$side] = $token;
189							}
190							}
191
192	2767	50				6575	push(@pattern, @pair) if defined $pair[0];
193
194	2767	50	33			8748	if (@pattern < 2 \|\| defined $pattern[1]) {
195	0					0	unshift(@pattern, undef, undef);
196							}
197
198	2767					4999	$tag = shift @pattern;
199	2767					3504	shift @pattern;
200
201	2767					6774	my %pattern = ('_', $tag, @pattern);
202	2767					6931	return \%pattern;
203							}
204
205							#----------------------------------------------------------------------
206							# Test if two parsed tags have the same name and attributes.
207
208							sub web_same_tag {
209	2221			2221	0	4321	my ($match, $tag) = @_;
210
211	2221	50				4260	croak "Match not parsed: $match" unless ref $match;
212	2221	50				3696	croak "Tag not parsed: $tag" unless ref $tag;
213
214	2221					4720	foreach my $name (keys %$match) {
215	2500	100				4625	return 0 unless exists $tag->{$name};
216	2345					3288	my $value = $match->{$name};
217	2345	100	100			9329	return 0 if $value ne '*' && $tag->{$name} ne $value;
218							}
219
220	523					1179	return 1;
221							}
222
223							#----------------------------------------------------------------------
224							# Return a list of tokens, split at tag boundaries
225
226							sub web_split_at_tags {
227	468			468	0	2302	my ($text) = @_;
228
229	468					620	my @tokens;
230	468	100				892	if ($text) {
231	464					7049	@tokens = split(/(<!--.?-->\|<[^">](?:"[^"]")[^>]*>)/s, $text);
232	464					992	@tokens = grep {length} @tokens;
	6726					10825
233							}
234
235	468					1923	return @tokens;
236							}
237
238							#----------------------------------------------------------------------
239							# Substitue comment delimeted sections for same blocks in template
240
241							sub web_substitute_sections {
242	64			64	1	1692	my ($text, $section) = @_;
243
244	64					126	my $name;
245							my @output;
246
247	64					1002	my @tokens = split (/(<!--\s(?:section\|endsection)\s+.?-->)/, $text);
248
249	64					172	foreach my $token (@tokens) {
250	744	100				2536	if ($token =~ /^<!--\ssection\s+(\w+).?-->/) {
		100
		100
251	170	50				328	if (defined $name) {
252	0					0	die "Nested sections in template: $name\n";
253							}
254
255	170					321	$name = $1;
256	170					302	push(@output, $token);
257
258							} elsif ($token =~ /^\s<!--\sendsection\s+(\w+).*?-->/) {
259	170	50				433	if ($name ne $1) {
260	0					0	die "Nested sections in template: $name\n";
261							}
262
263	170					237	undef $name;
264	170					326	push(@output, $token);
265
266							} elsif (defined $name) {
267	170		66			589	$section->{$name} \|\|= $token;
268	170					312	push(@output, $section->{$name});
269
270							} else {
271	234					442	push(@output, $token);
272							}
273							}
274
275	64					522	return join('', @output);
276							}
277
278							#----------------------------------------------------------------------
279							# Call a function after a set of tags is matched to generate substitute
280
281							sub web_substitute_tags {
282	36			36	1	1950	my ($pattern, $text, $substituter, $output, $global) = @_;
283
284	36					57	my @tokens;
285							my @all_tokens;
286
287	36					47	my $in = 0;
288	36	50				80	my $match_count = $global ? 99999 : 1;
289	36					65	my @matches = web_extract_tags($pattern);
290
291	36					77	foreach my $token (web_split_at_tags($text)) {
292	1060	100				1565	if (web_is_tag($token)) {
293	502					826	my $tag = web_parse_tag($token);
294	502	100				955	if (web_same_tag($matches[$in], $tag)) {
295	58	50				112	$in += 1 if $match_count;
296	58	100				132	if ($in >= @matches) {
297	35					59	push(@tokens, $token);
298	35					92	$token = $substituter->($output, @tokens);
299	35					65	$match_count -= 1;
300	35					60	@tokens = ();
301	35					75	$in = 0;
302							}
303							}
304							}
305
306	1060	100				1836	if ($in > 0) {
307	46					85	push(@tokens, $token);
308							} else {
309	1014					1819	push(@all_tokens, $token);
310							}
311							}
312
313	36	50				137	push(@all_tokens, @tokens) if @tokens;
314	36					383	return join('', @all_tokens);
315							}
316
317							#----------------------------------------------------------------------
318							# Divide html into sections based on contents of header tags
319
320							sub web_titled_sections {
321	3			3	1	9	my ($pattern, $text, $titler) = @_;
322
323	3					13	my $title;
324							my @tokens;
325	3					0	my %section;
326
327	3					6	my $in = 0;
328	3					10	my @matches = web_extract_tags($pattern);
329
330	3					8	foreach my $token (web_split_at_tags($text)) {
331	1480	100				2305	if (web_is_tag($token)) {
332	772					1186	my $tag = web_parse_tag($token);
333	772	100				1409	if (web_same_tag($matches[$in], $tag)) {
334	56					71	$in += 1;
335	56	100				135	if ($in >= @matches) {
336	28					48	push(@tokens, $token);
337	28					77	$title = $titler->(@tokens);
338	28					55	@tokens = ();
339	28					39	undef $token;
340	28					57	$in = 0;
341							}
342							}
343							}
344
345	1480	100				2785	if ($in > 0) {
346	56					81	push(@tokens, $token);
347	56					81	undef $title;
348	56					79	undef $token;
349							}
350
351	1480	100	66			3844	if (defined $token && defined $title) {
352	1396	100				2822	$section{$title} = [] unless exists $section{$title};
353	1396					1694	push(@{$section{$title}}, $token);
	1396					3026
354							}
355							}
356
357	3					125	foreach my $title (keys %section) {
358	28					46	$section{$title} = join('', @{$section{$title}});
	28					237
359	28					397	$section{$title} =~ s/^\s+//;
360							}
361
362	3					20	return \%section;
363							}
364
365							1;
366
367							=pod
368
369							=encoding utf-8
370
371							=head1 NAME
372
373							App::Followme::Web - Functions to parse html
374
375							=head1 SYNOPSIS
376
377							use App::Followme::Web;
378
379							=head1 DESCRIPTION
380
381							This module contains the subroutines followme uses to parse html. The code
382							is placed in a separate module because it is used by more than one other
383							module.
384
385							=head1 SUBROUTINES
386
387							=over 4
388
389							=item $flag = web_has_variables($text, @search_variables);
390
391							Return true if any of a list of variables is present in a template. The
392							variable names must include the sigil.
393
394							=item $match_count = web_match_tags($pattern, $text, $matcher,
395							$metadata, $global);
396
397							Match a tag pattern ($pattern) in a text ($text), pass the matched text to a
398							function ($matcher), which processes it and places it in a hash ($metadata).
399							Repeat this process for the entire text if the flag ($global) is set.
400
401							=item $section = web_parse_sections($text);
402
403							Place the text inside section tags into a hash indexed by the section names.
404
405							=item $parsed_tag = web_parse_tag($tag);
406
407							Parse a single html tag into a hash indexed by attribute name.
408
409							=item $tags = web_only_tags(@tokens);
410
411							Extract the tags from a text that has been split into tokens.
412
413							=item $text = web_only_text(@tokens);
414
415							Extract the text from a text that has been split into tokens.
416
417							=item $text = web_substitute_sections($text, $section);
418
419							Replace sections in a text by sections of the same name stored in a hash.
420
421							=item $text = web_substitute_tags($pattern, $text, $substituter,
422							$output, $global);
423
424							Match a tag pattern ($pattern) in a text ($text), pass the matched text to a
425							function ($substituter), which processes it and places it in a hash ($output) as
426							well as replaces the matched text. Repeat this process for the entire text if
427							the flag ($global) is set. Return the text with the substitutions.
428
429							=item $sections = web_titled_sections($pattern, $text, $titler);
430
431							Return a hash of sections from html, where the name of each section is derived
432							from the header tags that precede it. The title is built by calling the
433							subroutine passed in as $titler. It is passed the set of tags matched by
434							$pattern. A hash of sections that preceded by a matching set of header tags is
435							returned.
436
437							=back
438
439							=head1 LICENSE
440
441							Copyright (C) Bernie Simon.
442
443							This library is free software; you can redistribute it and/or modify
444							it under the same terms as Perl itself.
445
446							=head1 AUTHOR
447
448							Bernie Simon E<lt>bernie.simon@gmail.comE<gt>
449
450							=cut