File Coverage

lib/App/Followme/Web.pm

Criterion	Covered	Total	%
statement	180	185	97.3
branch	80	94	85.1
condition	8	12	66.6
subroutine	19	19	100.0
pod	9	13	69.2
total	296	323	91.6

line	stmt	bran	cond	sub	pod	time	code
1							package App::Followme::Web;
2
3	20			20		2611	use 5.008005;
	20					98
4	20			20		185	use strict;
	20					41
	20					556
5	20			20		108	use warnings;
	20					51
	20					636
6	20			20		627	use integer;
	20					46
	20					144
7	20			20		1056	use lib '../..';
	20					745
	20					125
8
9	20			20		2801	use Carp;
	20					63
	20					46143
10
11							require Exporter;
12							our @ISA = qw(Exporter);
13							our @EXPORT = qw(web_has_variables web_match_tags web_parse_sections
14							web_parse_tag web_only_tags web_only_text web_split_at_tags
15							web_substitute_sections web_substitute_tags
16							web_titled_sections);
17
18							our $VERSION = "2.02";
19
20							#----------------------------------------------------------------------
21							# Extract a list of parsed tags from a text
22
23							sub web_extract_tags {
24	260			260	0	417	my ($text) = @_;
25
26	260					484	my @tokens = web_split_at_tags($text);
27	260					589	return web_only_tags(@tokens);
28							}
29
30							#----------------------------------------------------------------------
31							# Return true if any of a list of variables is present in a template
32
33							sub web_has_variables {
34	16			16	1	3028	my ($text, @search_variables) = @_;
35
36	16					341	my @template_variables = $text =~ /([\$\@]\w+)/g;
37	16					56	my %template_variables = map {$_ => 1} @template_variables;
	175					353
38
39	16					61	for my $variable (@search_variables) {
40	29	100				92	return 1 if $template_variables{$variable};
41							}
42
43	14					87	return 0;
44							}
45
46							#----------------------------------------------------------------------
47							# Is the token an html tag?
48
49							sub web_is_tag {
50	9028			9028	0	18253	my ($token) = @_;
51	9028	100				25968	return $token =~ /^<[^!]/ ? 1 : 0;
52							}
53
54							#----------------------------------------------------------------------
55							# Call a function after a set of tags is matched
56
57							sub web_match_tags {
58	221			221	1	2373	my ($pattern, $text, $matcher, $metadata, $global) = @_;
59
60	221					316	my @tokens;
61	221					308	my $in = 0;
62	221					286	my $match_count = 0;
63	221					414	my @matches = web_extract_tags($pattern);
64
65	221					425	foreach my $token (web_split_at_tags($text)) {
66	2175	100				3293	if (web_is_tag($token)) {
67	1083					1762	my $tag = web_parse_tag($token);
68	1083	100				2074	if (web_same_tag($matches[$in], $tag)) {
69	487					703	$in += 1;
70	487	100				1029	if ($in >= @matches) {
71	330					552	push(@tokens, $token);
72	330					933	$matcher->($metadata, @tokens);
73	330					480	$match_count += 1;
74	330					513	@tokens = ();
75	330					457	$in = 0;
76
77	330	100				878	last unless $global;
78							}
79							}
80							}
81	2018	100				4479	push(@tokens, $token) if $in > 0;
82							}
83
84	221					1065	return $match_count;
85							}
86
87							#----------------------------------------------------------------------
88							# Extract a list of parsed tags from a set of tokens
89
90							sub web_only_tags {
91	434			434	1	848	my (@tokens) = @_;
92
93	434					582	my @tags;
94	434					762	foreach my $token (@tokens) {
95	636	100				1066	if (web_is_tag($token)) {
96	631					1175	push(@tags, web_parse_tag($token));
97							}
98							}
99
100	434					1136	return @tags;
101							}
102
103							#----------------------------------------------------------------------
104							# Parse a text string from a set of tokens
105
106							sub web_only_text {
107	215			215	1	1180	my (@tokens) = @_;
108
109	215					272	my @text;
110	215					354	foreach my $token (@tokens) {
111	677	100				1073	push(@text, $token) unless web_is_tag($token);
112							}
113
114	215					621	my $text = join(' ', @text);
115	215					1242	$text =~ s/\s+/ /g;
116	215					526	$text =~ s/^\s+//;
117	215					672	$text =~ s/\s+$//;
118
119	215					664	return $text;
120							}
121
122							#----------------------------------------------------------------------
123							# Extract sections from file, store in hash
124
125							sub web_parse_sections {
126	58			58	1	1379	my ($text) = @_;
127
128	58					98	my $name;
129							my %section;
130
131							# Extract sections from input
132
133	58					967	my @tokens = split (/(<!--\s(?:section\|endsection)\s+.?-->)/, $text);
134
135	58					172	foreach my $token (@tokens) {
136	730	100				2449	if ($token =~ /^<!--\ssection\s+(\w+).?-->/) {
		100
		100
137	168	50				323	if (defined $name) {
138	0					0	die "Nested sections in input: $token\n";
139							}
140	168					379	$name = $1;
141
142							} elsif ($token =~ /^<!--\sendsection\s+(\w+).?-->/) {
143	168	50				411	if ($name ne $1) {
144	0					0	die "Nested sections in input: $token\n";
145							}
146	168					271	undef $name;
147
148							} elsif (defined $name) {
149	168					412	$section{$name} = $token;
150							}
151							}
152
153	58	50				133	die "Unmatched section (<!-- section $name -->)\n" if $name;
154	58					268	return \%section;
155							}
156
157							#----------------------------------------------------------------------
158							# Parse a web tag into attributes and their values
159
160							sub web_parse_tag {
161	2997			2997	1	9224	my ($tag) = @_;
162	2997	50				4301	croak "Not a tag: ($tag)" unless web_is_tag($tag);
163
164	2997					4309	my @pattern;
165	2997					3675	my $side = 0;
166	2997					4642	my @pair = (undef, undef);
167
168	2997					10269	while ($tag =~ /(=\|"[^"]*"\|[^<>="\s]+)/gs) {
169	6986					12692	my $token = $1;
170
171	6986	100				14925	if ($token eq '=') {
		100
172	1178					1461	$side = 1;
173	1178					1539	undef $token;
174							} elsif ($token =~ /^"/) {
175	1064					3086	$token =~ s/"//g;
176							}
177
178	6986	100				13397	if (defined $token) {
179	5808	100				9603	if (defined $pair[$side]) {
180	1633	50				2640	if (defined $pair[0]) {
181	1633					2912	push(@pattern, @pair);
182	1633					2762	@pair = (undef, undef);
183	1633					2175	$side = 0;
184							}
185							}
186
187	5808	100				10788	$token = lc($token) if $side == 0;
188	5808					18530	$pair[$side] = $token;
189							}
190							}
191
192	2997	50				7273	push(@pattern, @pair) if defined $pair[0];
193
194	2997	50	33			8949	if (@pattern < 2 \|\| defined $pattern[1]) {
195	0					0	unshift(@pattern, undef, undef);
196							}
197
198	2997					5223	$tag = shift @pattern;
199	2997					3886	shift @pattern;
200
201	2997					7965	my %pattern = ('_', $tag, @pattern);
202	2997					7501	return \%pattern;
203							}
204
205							#----------------------------------------------------------------------
206							# Test if two parsed tags have the same name and attributes.
207
208							sub web_same_tag {
209	2361			2361	0	4379	my ($match, $tag) = @_;
210
211	2361	50				4425	croak "Match not parsed: $match" unless ref $match;
212	2361	50				3776	croak "Tag not parsed: $tag" unless ref $tag;
213
214	2361					5175	foreach my $name (keys %$match) {
215	2721	100				4941	return 0 unless exists $tag->{$name};
216	2543					3545	my $value = $match->{$name};
217	2543	100	100			9776	return 0 if $value ne '*' && $tag->{$name} ne $value;
218							}
219
220	603					1343	return 1;
221							}
222
223							#----------------------------------------------------------------------
224							# Return a list of tokens, split at tag boundaries
225
226							sub web_split_at_tags {
227	528			528	0	2059	my ($text) = @_;
228
229	528					693	my @tokens;
230	528	100				970	if ($text) {
231	524					7266	@tokens = split(/(<!--.?-->\|<[^">](?:"[^"]")[^>]*>)/s, $text);
232	524					1147	@tokens = grep {length} @tokens;
	7216					11714
233							}
234
235	528					2138	return @tokens;
236							}
237
238							#----------------------------------------------------------------------
239							# Substitue comment delimeted sections for same blocks in template
240
241							sub web_substitute_sections {
242	67			67	1	1474	my ($text, $section) = @_;
243
244	67					132	my $name;
245							my @output;
246
247	67					1007	my @tokens = split (/(<!--\s(?:section\|endsection)\s+.?-->)/, $text);
248
249	67					182	foreach my $token (@tokens) {
250	779	100				2622	if ($token =~ /^<!--\ssection\s+(\w+).?-->/) {
		100
		100
251	178	50				370	if (defined $name) {
252	0					0	die "Nested sections in template: $name\n";
253							}
254
255	178					357	$name = $1;
256	178					306	push(@output, $token);
257
258							} elsif ($token =~ /^\s<!--\sendsection\s+(\w+).*?-->/) {
259	178	50				499	if ($name ne $1) {
260	0					0	die "Nested sections in template: $name\n";
261							}
262
263	178					259	undef $name;
264	178					302	push(@output, $token);
265
266							} elsif (defined $name) {
267	178		66			607	$section->{$name} \|\|= $token;
268	178					316	push(@output, $section->{$name});
269
270							} else {
271	245					467	push(@output, $token);
272							}
273							}
274
275	67					530	return join('', @output);
276							}
277
278							#----------------------------------------------------------------------
279							# Call a function after a set of tags is matched to generate substitute
280
281							sub web_substitute_tags {
282	36			36	1	1684	my ($pattern, $text, $substituter, $output, $global) = @_;
283
284	36					55	my @tokens;
285							my @all_tokens;
286
287	36					56	my $in = 0;
288	36	50				71	my $match_count = $global ? 99999 : 1;
289	36					90	my @matches = web_extract_tags($pattern);
290
291	36					72	foreach my $token (web_split_at_tags($text)) {
292	1060	100				1543	if (web_is_tag($token)) {
293	502					742	my $tag = web_parse_tag($token);
294	502	100				887	if (web_same_tag($matches[$in], $tag)) {
295	58	50				118	$in += 1 if $match_count;
296	58	100				148	if ($in >= @matches) {
297	35					54	push(@tokens, $token);
298	35					111	$token = $substituter->($output, @tokens);
299	35					73	$match_count -= 1;
300	35					60	@tokens = ();
301	35					76	$in = 0;
302							}
303							}
304							}
305
306	1060	100				1805	if ($in > 0) {
307	46					135	push(@tokens, $token);
308							} else {
309	1014					1833	push(@all_tokens, $token);
310							}
311							}
312
313	36	50				134	push(@all_tokens, @tokens) if @tokens;
314	36					368	return join('', @all_tokens);
315							}
316
317							#----------------------------------------------------------------------
318							# Divide html into sections based on contents of header tags
319
320							sub web_titled_sections {
321	3			3	1	10	my ($pattern, $text, $titler) = @_;
322
323	3					9	my $title;
324							my @tokens;
325	3					0	my %section;
326
327	3					6	my $in = 0;
328	3					12	my @matches = web_extract_tags($pattern);
329
330	3					7	foreach my $token (web_split_at_tags($text)) {
331	1480	100				2205	if (web_is_tag($token)) {
332	772					1198	my $tag = web_parse_tag($token);
333	772	100				1419	if (web_same_tag($matches[$in], $tag)) {
334	56					76	$in += 1;
335	56	100				128	if ($in >= @matches) {
336	28					52	push(@tokens, $token);
337	28					83	$title = $titler->(@tokens);
338	28					56	@tokens = ();
339	28					46	undef $token;
340	28					57	$in = 0;
341							}
342							}
343							}
344
345	1480	100				2648	if ($in > 0) {
346	56					76	push(@tokens, $token);
347	56					72	undef $title;
348	56					77	undef $token;
349							}
350
351	1480	100	66			3696	if (defined $token && defined $title) {
352	1396	100				2620	$section{$title} = [] unless exists $section{$title};
353	1396					1606	push(@{$section{$title}}, $token);
	1396					3291
354							}
355							}
356
357	3					108	foreach my $title (keys %section) {
358	28					42	$section{$title} = join('', @{$section{$title}});
	28					252
359	28					415	$section{$title} =~ s/^\s+//;
360							}
361
362	3					23	return \%section;
363							}
364
365							1;
366
367							=pod
368
369							=encoding utf-8
370
371							=head1 NAME
372
373							App::Followme::Web - Functions to parse html
374
375							=head1 SYNOPSIS
376
377							use App::Followme::Web;
378
379							=head1 DESCRIPTION
380
381							This module contains the subroutines followme uses to parse html. The code
382							is placed in a separate module because it is used by more than one other
383							module.
384
385							=head1 SUBROUTINES
386
387							=over 4
388
389							=item $flag = web_has_variables($text, @search_variables);
390
391							Return true if any of a list of variables is present in a template. The
392							variable names must include the sigil.
393
394							=item $match_count = web_match_tags($pattern, $text, $matcher,
395							$metadata, $global);
396
397							Match a tag pattern ($pattern) in a text ($text), pass the matched text to a
398							function ($matcher), which processes it and places it in a hash ($metadata).
399							Repeat this process for the entire text if the flag ($global) is set.
400
401							=item $section = web_parse_sections($text);
402
403							Place the text inside section tags into a hash indexed by the section names.
404
405							=item $parsed_tag = web_parse_tag($tag);
406
407							Parse a single html tag into a hash indexed by attribute name.
408
409							=item $tags = web_only_tags(@tokens);
410
411							Extract the tags from a text that has been split into tokens.
412
413							=item $text = web_only_text(@tokens);
414
415							Extract the text from a text that has been split into tokens.
416
417							=item $text = web_substitute_sections($text, $section);
418
419							Replace sections in a text by sections of the same name stored in a hash.
420
421							=item $text = web_substitute_tags($pattern, $text, $substituter,
422							$output, $global);
423
424							Match a tag pattern ($pattern) in a text ($text), pass the matched text to a
425							function ($substituter), which processes it and places it in a hash ($output) as
426							well as replaces the matched text. Repeat this process for the entire text if
427							the flag ($global) is set. Return the text with the substitutions.
428
429							=item $sections = web_titled_sections($pattern, $text, $titler);
430
431							Return a hash of sections from html, where the name of each section is derived
432							from the header tags that precede it. The title is built by calling the
433							subroutine passed in as $titler. It is passed the set of tags matched by
434							$pattern. A hash of sections that preceded by a matching set of header tags is
435							returned.
436
437							=back
438
439							=head1 LICENSE
440
441							Copyright (C) Bernie Simon.
442
443							This library is free software; you can redistribute it and/or modify
444							it under the same terms as Perl itself.
445
446							=head1 AUTHOR
447
448							Bernie Simon E<lt>bernie.simon@gmail.comE<gt>
449
450							=cut