File Coverage

/.cpan/build/RTF-HTMLConverter-0.05-BVlWEy/blib/lib/RTF/Lexer.pm

Criterion	Covered	Total	%
statement	81	92	88.0
branch	12	26	46.1
condition	1	3	33.3
subroutine	22	25	88.0
pod	3	7	42.8
total	119	153	77.7

line	stmt	bran	cond	sub	pod	time	code
1							package RTF::Lexer;
2	1			1		7	use strict;
	1					1
	1					55
3	1			1		7	use Exporter;
	1					3
	1					48
4	1			1		6	use DynaLoader;
	1					2
	1					68
5
6							our @ISA = qw(DynaLoader Exporter);
7							our $VERSION = '0.03';
8
9	1			1		6	use constant CWORD => 256;
	1					1
	1					95
10	1			1		6	use constant CSYMB => 257;
	1					1
	1					51
11	1			1		5	use constant CUNDF => 258;
	1					2
	1					53
12	1			1		6	use constant CPARM => 259;
	1					2
	1					56
13	1			1		6	use constant CNOPR => 260;
	1					2
	1					60
14	1			1		6	use constant PTEXT => 261;
	1					3
	1					47
15	1			1		5	use constant ENTER => 262;
	1					2
	1					53
16	1			1		5	use constant LEAVE => 263;
	1					2
	1					44
17	1			1		5	use constant DESTN => 264;
	1					2
	1					50
18	1			1		5	use constant ENHEX => 265;
	1					2
	1					49
19	1			1		13	use constant ENBIN => 266;
	1					2
	1					42
20	1			1		5	use constant WRHEX => 268;
	1					3
	1					50
21	1			1		6	use constant OKEOF => 300;
	1					2
	1					54
22	1			1		5	use constant UNEOF => 301;
	1					2
	1					51
23	1			1		6	use constant UNBRC => 302;
	1					2
	1					874
24
25							our @EXPORT_OK;
26							our %EXPORT_TAGS;
27							{
28							my $package = __PACKAGE__;
29							@EXPORT_OK = (grep { $_ } map { /^$package\::(\w+)$/ } keys %constant::declared);
30							%EXPORT_TAGS = (all => [@EXPORT_OK]);
31							}
32
33							sub new {
34	1			1	1	2	my $proto = shift;
35	1		33			8	my $self = bless({}, ref($proto) \|\| $proto);
36	1					4	$self->init(@_);
37	1					3	return $self;
38							}
39
40							sub init{
41	1			1	0	3	my ($self, %opts) = @_;
42	1					9	$self->{RTF_Lexer_backpack} = [];
43	1					2	$self->{_RTF_Lexer_backpack} = undef;
44	1	50				5	if($opts{in}){
45	1	50				4	unless(ref $opts{in}){
46	0	0				0	open my $fh, "< $opts{in}" or die "Can't open '$opts{in}': $!!\n";
47	0					0	$opts{in} = $fh;
48							}
49	1					68	$self->_set_source($opts{in});
50	1					3	$self->{_RTF_Lexer_IN} = $opts{in};
51							}
52							}
53
54							bootstrap RTF::Lexer $VERSION;
55
56							sub get_token {
57	1115			1115	1	995	my $self = shift;
58	1115					1097	my $token;
59	1115	50				990	if(@{$self->{RTF_Lexer_backpack}}){
	1115					1920
60	0					0	$token = shift @{$self->{RTF_Lexer_backpack}};
	0					0
61							}else{
62	1115	50				1505	if($self->{_RTF_Lexer_backpack}){
63	0					0	$token = $self->{_RTF_Lexer_backpack};
64	0					0	$self->{_RTF_Lexer_backpack} = undef;
65							}else{
66	1115					1738	$token = [0, ''];
67	1115					3065	$token->[0] = $self->_get_token($token->[1]);
68							}
69	1115	100				1990	if($token->[0] == CWORD){
70	641					552	my ($type, $text);
71	641					1198	$type = $self->_get_token($text);
72	641	100				1193	if($type == CPARM){ # Parameter that follows
		50
73	371					572	$token->[2] = $text;
74	371					690	$type = $self->_get_token($text); # Space delimiter if any
75	371	50				724	$self->{_RTF_Lexer_backpack} = [$type, $text] unless $type == CNOPR;
76							}elsif($type != CNOPR){ # Not a space delimiter
77	0					0	$self->{_RTF_Lexer_backpack} = [$type, $text];
78							}
79							}
80							}
81	1115	50				3655	return $token->[0] ? $token : undef;
82							}
83
84	0			0	1	0	sub unget_token { push @{$_[0]->{RTF_Lexer_backpack}}, $_[1] }
	0					0
85
86							{
87							my %stop_tokens = map { $_ => 1 } (OKEOF, UNBRC, UNEOF);
88	1115	50		1115	0	5843	sub is_stop_token { $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} }
89	0	0		0	0		sub add_stop_token { $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} = 1 }
90	0	0		0	0		sub del_stop_token { delete $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} }
91							}
92
93
94							1;
95
96							=head1 NAME
97
98							RTF::Lexer - Rich Text Format (RTF) lexical analyzer.
99
100							=head1 SYNOPSIS
101
102							use RTF::Lexer qw(:all);
103
104							my $parser = RTF::Lexer->new(in => 'text.rtf');
105							my $token;
106							do {
107							$token = $parser->get_token();
108							} until $parser->is_stop_token($token);
109
110
111							=head1 DESCRIPTION
112
113							RTF::Lexer is a low-level RTF format lexical analyzer. It splits the
114							input stream into separate tokens, which can be handled by other
115							high-level modules.
116
117							=head1 METHODS
118
119							=over 4
120
121							=item new
122
123							The constructor. Accepts the only argument C which must be an input
124							file handle or a file name. In the latter case if there is a failure
125							while opening the file method C throws an exception. By default
126							the input is read from C.
127
128							=item get_token
129
130							Returns the next token from the input stream. The token is a reference to
131							an array those first element is a numeric id of the token type. The second
132							element is a string representation of the token. The third element may
133							exists only if the token is a control word and represents the numerical
134							parameter of this control word.
135
136							The following token types are recognized by RTF::Lexer, that are declared
137							as constants in this module:
138
139							=over 8
140
141							=item CWORD
142
143							Control word (eg. C<\rtf1>, C<\trowd>).
144
145							=item CSYMB
146
147							Control symbol, mentioned in RTF Specification version 1.7.
148
149							=item CUNDF
150
151							Unknown control symbol (i.e. not mentioned in RTF Specification).
152
153							=item PTEXT
154
155							Plain text.
156
157							=item ENTER
158
159							Start of group (C<{>).
160
161							=item LEAVE
162
163							End of group (C<}>).
164
165							=item DESTN
166
167							End of destination group (C<}> that turns off destination mode).
168
169							=item ENHEX
170
171							Data in hexadecimal format that follows C<\'> control symbol.
172
173							=item ENBIN
174
175							End of binary data block (started by C<\bin> control word).
176
177							=item WRHEX
178
179							Symbol which is not a hexadecimal digit found where ENHEX token expected.
180
181							=item OKEOF
182
183							Normal end of input stream.
184
185							=item UNEOF
186
187							Unexpected end of input stream.
188
189							=item UNBRC
190
191							End of group that does not match any start of group.
192
193							=back
194
195							These constants are not exported by default. Any of them may be exported by request.
196							All of them may be exported by the use of C<:all> export tag.
197
198							=item unget_token($token)
199
200							Pushes back token C<$token> so the next call to C will return it.
201
202							=item set_destination
203
204							Turns on the destination mode, i.e. all tokens will be ignored until the end of
205							current group.
206
207							=back
208
209							=head1 SEE ALSO
210
211							RTF::Tokenizer, Rich Text Format (RTF) Specification.
212
213							=head1 BUGS
214
215							It is impossible to have more then one RTF::Lexer objects in a single process.
216
217							=head1 AUTHOR
218
219							Vadim O. Ustiansky
220