File Coverage

lib/Document/Parser.pm

Criterion	Covered	Total	%
statement	6	88	6.8
branch	0	40	0.0
condition	0	33	0.0
subroutine	2	14	14.2
pod	0	12	0.0
total	8	187	4.2

line	stmt	bran	cond	sub	pod	time	code
1							## Base Class for Creating Text Format Parsers
2							#
3							# Document::Parser is a base class that you can use to easily generate a
4							# parser for text document markups (like Wiki or POD markups).
5							#
6							# See this parser as an example:
7							#
8							# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm
9							#
10							# And this module for usage of the parser:
11							#
12							# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm
13							#
14							# Copyright (c) 2007. Ingy döt Net. All rights reserved.
15							#
16							# Licensed under the same terms as Perl itself.
17							##
18							package Document::Parser;
19	1			1		367	use strict;
	1					1
	1					27
20	1			1		4	use warnings;
	1					2
	1					1120
21
22							## Synopsis:
23							#
24							# package MyParser;
25							# use base 'Document::Parser';
26							#
27							# sub create_grammar {
28							# return {
29							# # ... define a grammar hash here ...
30							# };
31							# }
32							##
33
34							##------------------------------------------------------------------------------
35							# Parser object constructor/initializer
36							##------------------------------------------------------------------------------
37							sub new {
38	0			0	0		my $class = shift;
39	0		0				return bless { @_ }, ref($class) \|\| $class;
40							}
41
42							##------------------------------------------------------------------------------
43							# $parsed = $parser->parse($wikitext);
44							##------------------------------------------------------------------------------
45							sub parse {
46	0			0	0		my $self = shift;
47	0		0				$self->{input} \|\|= shift;
48	0		0				$self->{grammar} \|\|= $self->set_grammar;
49	0		0				$self->{receiver} \|\|= $self->set_receiver;
50	0						$self->{receiver}->init;
51	0						$self->parse_blocks('top');
52	0						return $self->{receiver}->content;
53							}
54
55							##
56							# Call `set_receiver` to reset the receiver for a new parse.
57							sub set_receiver {
58	0			0	0		my $self = shift;
59	0		0				$self->{receiver} = shift \|\| $self->create_receiver;
60							}
61
62							sub set_grammar {
63	0			0	0		my $self = shift;
64	0		0				$self->{grammar} = shift \|\| $self->create_grammar;
65							}
66
67							#-------------------------------------------------------------------------------
68							# Parse input into a series of blocks. With each iteration the parser must
69							# match a block at position 0 of the text, and remove that block from the
70							# input reparse it further. This continues until there is no input left.
71							#-------------------------------------------------------------------------------
72							sub parse_blocks {
73	0			0	0		my $self = shift;
74	0						my $container_type = shift;
75	0						my $types = $self->{grammar}{$container_type}{blocks};
76	0						while (my $length = length $self->{input}) {
77	0						for my $type (@$types) {
78	0	0					my $matched = $self->find_match(matched_block => $type) or next;
79	0						substr($self->{input}, 0, $matched->{end}, '');
80	0						$self->handle_match($type, $matched);
81	0						last;
82							}
83	0	0					die $self->reduction_error
84							unless length($self->{input}) < $length;
85							}
86	0						return;
87							}
88
89							#-------------------------------------------------------------------------------
90							# This code parses a chunk into interleaved pieces of plain text and
91							# phrases. It repeatedly tries to match every possible phrase and
92							# then takes the match closest to the start. Everything before a
93							# match is written as text. Matched phrases are subparsed according
94							# to their rules. This continues until the input is all eaten.
95							#-------------------------------------------------------------------------------
96							sub parse_phrases {
97	0			0	0		my $self = shift;
98	0						my $container_type = shift;
99	0						my $types = $self->{grammar}{$container_type}{phrases};
100	0						while (length $self->{input}) {
101	0						my $match;
102	0						for my $type (@$types) {
103	0	0					my $matched = $self->find_match(matched_phrase => $type) or next;
104	0	0	0				if (not defined $match or $matched->{begin} < $match->{begin}) {
105	0						$match = $matched;
106	0						$match->{type} = $type;
107	0	0					last if $match->{begin} == 0;
108							}
109							}
110	0	0					if (! $match) {
111	0						$self->{receiver}->text_node($self->{input});
112	0						last;
113							}
114	0						my ($begin, $end, $type) = @{$match}{qw(begin end type)};
	0
115	0	0					$self->{receiver}->text_node(substr($self->{input}, 0, $begin))
116							unless $begin == 0;
117	0						substr($self->{input}, 0, $end, '');
118	0						$type = $match->{type};
119	0						$self->handle_match($type, $match);
120							}
121	0						return;
122							}
123
124							sub find_match {
125	0			0	0		my ($self, $matched_func, $type) = @_;
126	0						my $matched;
127	0	0					if (my $regexp = $self->{grammar}{$type}{match}) {
128	0	0					if (ref($regexp) eq 'ARRAY') {
129	0						for my $re (@$regexp) {
130	0	0					if ($self->{input} =~ $re) {
131	0						$matched = $self->$matched_func;
132	0						last;
133							}
134							}
135	0	0					return unless $matched;
136							}
137							else {
138	0	0					return unless $self->{input} =~ $regexp;
139	0						$matched = $self->$matched_func;
140							}
141							}
142							else {
143	0						my $func = "match_$type";
144	0	0					$matched = $self->$func or return;
145							}
146	0						return $matched;
147							}
148
149							sub handle_match {
150	0			0	0		my ($self, $type, $match) = @_;
151	0						my $func = "handle_$type";
152	0	0					if ($self->can($func)) {
153	0						$self->$func($match, $type);
154							}
155							else {
156	0						my $grammar = $self->{grammar}{$type};
157	0	0					my $parse = $grammar->{blocks}
158							? 'parse_blocks'
159							: 'parse_phrases';
160	0	0					my @filter = $grammar->{filter}
161							? ($grammar->{filter})
162							: ();
163	0						$self->subparse($parse, $match, $type, @filter);
164							}
165							}
166
167							sub subparse {
168	0			0	0		my ($self, $func, $match, $type, $filter) = @_;
169	0						$self->{receiver}->begin_node($type);
170							my $parser = $self->new(
171							grammar => $self->{grammar},
172							receiver => $self->{receiver}->new,
173							input => $filter
174	0	0					? do { $_ = $match->{text}; &$filter(); $_ }
	0
	0
	0
175							: $match->{text},
176							);
177	0						$parser->$func($type);
178	0						$self->{receiver}->insert($parser->{receiver});
179	0						$self->{receiver}->end_node($type);
180							}
181
182							#-------------------------------------------------------------------------------
183							# Helper functions
184							#
185							# These are the odds and ends called by the code above.
186							#-------------------------------------------------------------------------------
187
188							sub reduction_error {
189	0			0	0		my $self = shift;
190	0						return ref($self) . qq[ reduction error for:\n"$self->{input}"];
191							}
192
193							sub matched_block {
194	0	0		0	0		my $begin = defined $_[2] ? $_[2] : $-[0];
195	0	0					die "All blocks must match at position 0"
196							if "$begin" ne "0";
197
198							return +{
199	0		0				text => ($_[1] \|\| $1),
			0
200							end => ($_[3] \|\| $+[0]),
201							};
202							}
203
204							sub matched_phrase {
205							return +{
206	0	0	0	0	0		text => ($_[1] \|\| $1),
			0
207							begin => (defined $_[2] ? $_[2] : $-[0]),
208							end => ($_[3] \|\| $+[0]),
209							};
210							}
211
212							1;
213
214							=for perldoc
215							This POD generated by Perldoc-0.21.
216							DO NOT EDIT. Your changes will be lost.
217
218							=encoding utf8
219
220							=head1 NAME
221
222							Document::Parser - Base Class for Creating Text Format Parsers
223
224							=head1 SYNOPSIS
225
226							package MyParser;
227							use base 'Document::Parser';
228
229							sub create_grammar {
230							return {
231							# ... define a grammar hash here ...
232							};
233							}
234
235							=head1 DESCRIPTION
236
237							Document::Parser is a base class that you can use to easily generate a
238							parser for text document markups (like Wiki or POD markups).
239
240							See this parser as an example:
241
242							http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm
243
244							And this module for usage of the parser:
245
246							http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm
247
248							=head1 AUTHOR
249
250							Ingy döt Net
251
252							=head1 COPYRIGHT
253
254							Copyright (c) 2007. Ingy döt Net. All rights reserved.
255
256							This program is free software; you can redistribute it and/or modify it
257							under the same terms as Perl itself.
258
259							See http://www.perl.com/perl/misc/Artistic.html
260
261							=cut