File Coverage

lib/Document/Parser.pm
Criterion Covered Total %
statement 6 88 6.8
branch 0 40 0.0
condition 0 33 0.0
subroutine 2 14 14.2
pod 0 12 0.0
total 8 187 4.2


line stmt bran cond sub pod time code
1             ## Base Class for Creating Text Format Parsers
2             #
3             # Document::Parser is a base class that you can use to easily generate a
4             # parser for text document markups (like Wiki or POD markups).
5             #
6             # See this parser as an example:
7             #
8             # http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm
9             #
10             # And this module for usage of the parser:
11             #
12             # http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm
13             #
14             # Copyright (c) 2007. Ingy döt Net. All rights reserved.
15             #
16             # Licensed under the same terms as Perl itself.
17             ##
18             package Document::Parser;
19 1     1   367 use strict;
  1         1  
  1         27  
20 1     1   4 use warnings;
  1         2  
  1         1120  
21              
22             ## Synopsis:
23             #
24             # package MyParser;
25             # use base 'Document::Parser';
26             #
27             # sub create_grammar {
28             # return {
29             # # ... define a grammar hash here ...
30             # };
31             # }
32             ##
33              
34             ##------------------------------------------------------------------------------
35             # Parser object constructor/initializer
36             ##------------------------------------------------------------------------------
37             sub new {
38 0     0 0   my $class = shift;
39 0   0       return bless { @_ }, ref($class) || $class;
40             }
41              
42             ##------------------------------------------------------------------------------
43             # $parsed = $parser->parse($wikitext);
44             ##------------------------------------------------------------------------------
45             sub parse {
46 0     0 0   my $self = shift;
47 0   0       $self->{input} ||= shift;
48 0   0       $self->{grammar} ||= $self->set_grammar;
49 0   0       $self->{receiver} ||= $self->set_receiver;
50 0           $self->{receiver}->init;
51 0           $self->parse_blocks('top');
52 0           return $self->{receiver}->content;
53             }
54              
55             ##
56             # Call `set_receiver` to reset the receiver for a new parse.
57             sub set_receiver {
58 0     0 0   my $self = shift;
59 0   0       $self->{receiver} = shift || $self->create_receiver;
60             }
61              
62             sub set_grammar {
63 0     0 0   my $self = shift;
64 0   0       $self->{grammar} = shift || $self->create_grammar;
65             }
66              
67             #-------------------------------------------------------------------------------
68             # Parse input into a series of blocks. With each iteration the parser must
69             # match a block at position 0 of the text, and remove that block from the
70             # input reparse it further. This continues until there is no input left.
71             #-------------------------------------------------------------------------------
72             sub parse_blocks {
73 0     0 0   my $self = shift;
74 0           my $container_type = shift;
75 0           my $types = $self->{grammar}{$container_type}{blocks};
76 0           while (my $length = length $self->{input}) {
77 0           for my $type (@$types) {
78 0 0         my $matched = $self->find_match(matched_block => $type) or next;
79 0           substr($self->{input}, 0, $matched->{end}, '');
80 0           $self->handle_match($type, $matched);
81 0           last;
82             }
83 0 0         die $self->reduction_error
84             unless length($self->{input}) < $length;
85             }
86 0           return;
87             }
88              
89             #-------------------------------------------------------------------------------
90             # This code parses a chunk into interleaved pieces of plain text and
91             # phrases. It repeatedly tries to match every possible phrase and
92             # then takes the match closest to the start. Everything before a
93             # match is written as text. Matched phrases are subparsed according
94             # to their rules. This continues until the input is all eaten.
95             #-------------------------------------------------------------------------------
96             sub parse_phrases {
97 0     0 0   my $self = shift;
98 0           my $container_type = shift;
99 0           my $types = $self->{grammar}{$container_type}{phrases};
100 0           while (length $self->{input}) {
101 0           my $match;
102 0           for my $type (@$types) {
103 0 0         my $matched = $self->find_match(matched_phrase => $type) or next;
104 0 0 0       if (not defined $match or $matched->{begin} < $match->{begin}) {
105 0           $match = $matched;
106 0           $match->{type} = $type;
107 0 0         last if $match->{begin} == 0;
108             }
109             }
110 0 0         if (! $match) {
111 0           $self->{receiver}->text_node($self->{input});
112 0           last;
113             }
114 0           my ($begin, $end, $type) = @{$match}{qw(begin end type)};
  0            
115 0 0         $self->{receiver}->text_node(substr($self->{input}, 0, $begin))
116             unless $begin == 0;
117 0           substr($self->{input}, 0, $end, '');
118 0           $type = $match->{type};
119 0           $self->handle_match($type, $match);
120             }
121 0           return;
122             }
123              
124             sub find_match {
125 0     0 0   my ($self, $matched_func, $type) = @_;
126 0           my $matched;
127 0 0         if (my $regexp = $self->{grammar}{$type}{match}) {
128 0 0         if (ref($regexp) eq 'ARRAY') {
129 0           for my $re (@$regexp) {
130 0 0         if ($self->{input} =~ $re) {
131 0           $matched = $self->$matched_func;
132 0           last;
133             }
134             }
135 0 0         return unless $matched;
136             }
137             else {
138 0 0         return unless $self->{input} =~ $regexp;
139 0           $matched = $self->$matched_func;
140             }
141             }
142             else {
143 0           my $func = "match_$type";
144 0 0         $matched = $self->$func or return;
145             }
146 0           return $matched;
147             }
148              
149             sub handle_match {
150 0     0 0   my ($self, $type, $match) = @_;
151 0           my $func = "handle_$type";
152 0 0         if ($self->can($func)) {
153 0           $self->$func($match, $type);
154             }
155             else {
156 0           my $grammar = $self->{grammar}{$type};
157 0 0         my $parse = $grammar->{blocks}
158             ? 'parse_blocks'
159             : 'parse_phrases';
160 0 0         my @filter = $grammar->{filter}
161             ? ($grammar->{filter})
162             : ();
163 0           $self->subparse($parse, $match, $type, @filter);
164             }
165             }
166              
167             sub subparse {
168 0     0 0   my ($self, $func, $match, $type, $filter) = @_;
169 0           $self->{receiver}->begin_node($type);
170             my $parser = $self->new(
171             grammar => $self->{grammar},
172             receiver => $self->{receiver}->new,
173             input => $filter
174 0 0         ? do { $_ = $match->{text}; &$filter(); $_ }
  0            
  0            
  0            
175             : $match->{text},
176             );
177 0           $parser->$func($type);
178 0           $self->{receiver}->insert($parser->{receiver});
179 0           $self->{receiver}->end_node($type);
180             }
181              
182             #-------------------------------------------------------------------------------
183             # Helper functions
184             #
185             # These are the odds and ends called by the code above.
186             #-------------------------------------------------------------------------------
187              
188             sub reduction_error {
189 0     0 0   my $self = shift;
190 0           return ref($self) . qq[ reduction error for:\n"$self->{input}"];
191             }
192              
193             sub matched_block {
194 0 0   0 0   my $begin = defined $_[2] ? $_[2] : $-[0];
195 0 0         die "All blocks must match at position 0"
196             if "$begin" ne "0";
197              
198             return +{
199 0   0       text => ($_[1] || $1),
      0        
200             end => ($_[3] || $+[0]),
201             };
202             }
203              
204             sub matched_phrase {
205             return +{
206 0 0 0 0 0   text => ($_[1] || $1),
      0        
207             begin => (defined $_[2] ? $_[2] : $-[0]),
208             end => ($_[3] || $+[0]),
209             };
210             }
211              
212             1;
213              
214             =for perldoc
215             This POD generated by Perldoc-0.21.
216             DO NOT EDIT. Your changes will be lost.
217              
218             =encoding utf8
219              
220             =head1 NAME
221              
222             Document::Parser - Base Class for Creating Text Format Parsers
223              
224             =head1 SYNOPSIS
225              
226             package MyParser;
227             use base 'Document::Parser';
228              
229             sub create_grammar {
230             return {
231             # ... define a grammar hash here ...
232             };
233             }
234              
235             =head1 DESCRIPTION
236              
237             Document::Parser is a base class that you can use to easily generate a
238             parser for text document markups (like Wiki or POD markups).
239              
240             See this parser as an example:
241              
242             http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm
243              
244             And this module for usage of the parser:
245              
246             http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm
247              
248             =head1 AUTHOR
249              
250             Ingy döt Net
251              
252             =head1 COPYRIGHT
253              
254             Copyright (c) 2007. Ingy döt Net. All rights reserved.
255              
256             This program is free software; you can redistribute it and/or modify it
257             under the same terms as Perl itself.
258              
259             See http://www.perl.com/perl/misc/Artistic.html
260              
261             =cut