| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package RTF::Lexer; |
|
2
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
55
|
|
|
3
|
1
|
|
|
1
|
|
7
|
use Exporter; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
48
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use DynaLoader; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
68
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our @ISA = qw(DynaLoader Exporter); |
|
7
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use constant CWORD => 256; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
95
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use constant CSYMB => 257; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
51
|
|
|
11
|
1
|
|
|
1
|
|
5
|
use constant CUNDF => 258; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
53
|
|
|
12
|
1
|
|
|
1
|
|
6
|
use constant CPARM => 259; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
56
|
|
|
13
|
1
|
|
|
1
|
|
6
|
use constant CNOPR => 260; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
60
|
|
|
14
|
1
|
|
|
1
|
|
6
|
use constant PTEXT => 261; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
47
|
|
|
15
|
1
|
|
|
1
|
|
5
|
use constant ENTER => 262; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
53
|
|
|
16
|
1
|
|
|
1
|
|
5
|
use constant LEAVE => 263; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
44
|
|
|
17
|
1
|
|
|
1
|
|
5
|
use constant DESTN => 264; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
50
|
|
|
18
|
1
|
|
|
1
|
|
5
|
use constant ENHEX => 265; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
49
|
|
|
19
|
1
|
|
|
1
|
|
13
|
use constant ENBIN => 266; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
42
|
|
|
20
|
1
|
|
|
1
|
|
5
|
use constant WRHEX => 268; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
50
|
|
|
21
|
1
|
|
|
1
|
|
6
|
use constant OKEOF => 300; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
54
|
|
|
22
|
1
|
|
|
1
|
|
5
|
use constant UNEOF => 301; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
51
|
|
|
23
|
1
|
|
|
1
|
|
6
|
use constant UNBRC => 302; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
874
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @EXPORT_OK; |
|
26
|
|
|
|
|
|
|
our %EXPORT_TAGS; |
|
27
|
|
|
|
|
|
|
{ |
|
28
|
|
|
|
|
|
|
my $package = __PACKAGE__; |
|
29
|
|
|
|
|
|
|
@EXPORT_OK = (grep { $_ } map { /^$package\::(\w+)$/ } keys %constant::declared); |
|
30
|
|
|
|
|
|
|
%EXPORT_TAGS = (all => [@EXPORT_OK]); |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub new { |
|
34
|
1
|
|
|
1
|
1
|
2
|
my $proto = shift; |
|
35
|
1
|
|
33
|
|
|
8
|
my $self = bless({}, ref($proto) || $proto); |
|
36
|
1
|
|
|
|
|
4
|
$self->init(@_); |
|
37
|
1
|
|
|
|
|
3
|
return $self; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub init{ |
|
41
|
1
|
|
|
1
|
0
|
3
|
my ($self, %opts) = @_; |
|
42
|
1
|
|
|
|
|
9
|
$self->{RTF_Lexer_backpack} = []; |
|
43
|
1
|
|
|
|
|
2
|
$self->{_RTF_Lexer_backpack} = undef; |
|
44
|
1
|
50
|
|
|
|
5
|
if($opts{in}){ |
|
45
|
1
|
50
|
|
|
|
4
|
unless(ref $opts{in}){ |
|
46
|
0
|
0
|
|
|
|
0
|
open my $fh, "< $opts{in}" or die "Can't open '$opts{in}': $!!\n"; |
|
47
|
0
|
|
|
|
|
0
|
$opts{in} = $fh; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
1
|
|
|
|
|
68
|
$self->_set_source($opts{in}); |
|
50
|
1
|
|
|
|
|
3
|
$self->{_RTF_Lexer_IN} = $opts{in}; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
bootstrap RTF::Lexer $VERSION; |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub get_token { |
|
57
|
1115
|
|
|
1115
|
1
|
995
|
my $self = shift; |
|
58
|
1115
|
|
|
|
|
1097
|
my $token; |
|
59
|
1115
|
50
|
|
|
|
990
|
if(@{$self->{RTF_Lexer_backpack}}){ |
|
|
1115
|
|
|
|
|
1920
|
|
|
60
|
0
|
|
|
|
|
0
|
$token = shift @{$self->{RTF_Lexer_backpack}}; |
|
|
0
|
|
|
|
|
0
|
|
|
61
|
|
|
|
|
|
|
}else{ |
|
62
|
1115
|
50
|
|
|
|
1505
|
if($self->{_RTF_Lexer_backpack}){ |
|
63
|
0
|
|
|
|
|
0
|
$token = $self->{_RTF_Lexer_backpack}; |
|
64
|
0
|
|
|
|
|
0
|
$self->{_RTF_Lexer_backpack} = undef; |
|
65
|
|
|
|
|
|
|
}else{ |
|
66
|
1115
|
|
|
|
|
1738
|
$token = [0, '']; |
|
67
|
1115
|
|
|
|
|
3065
|
$token->[0] = $self->_get_token($token->[1]); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
1115
|
100
|
|
|
|
1990
|
if($token->[0] == CWORD){ |
|
70
|
641
|
|
|
|
|
552
|
my ($type, $text); |
|
71
|
641
|
|
|
|
|
1198
|
$type = $self->_get_token($text); |
|
72
|
641
|
100
|
|
|
|
1193
|
if($type == CPARM){ # Parameter that follows |
|
|
|
50
|
|
|
|
|
|
|
73
|
371
|
|
|
|
|
572
|
$token->[2] = $text; |
|
74
|
371
|
|
|
|
|
690
|
$type = $self->_get_token($text); # Space delimiter if any |
|
75
|
371
|
50
|
|
|
|
724
|
$self->{_RTF_Lexer_backpack} = [$type, $text] unless $type == CNOPR; |
|
76
|
|
|
|
|
|
|
}elsif($type != CNOPR){ # Not a space delimiter |
|
77
|
0
|
|
|
|
|
0
|
$self->{_RTF_Lexer_backpack} = [$type, $text]; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
} |
|
81
|
1115
|
50
|
|
|
|
3655
|
return $token->[0] ? $token : undef; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
|
|
84
|
0
|
|
|
0
|
1
|
0
|
sub unget_token { push @{$_[0]->{RTF_Lexer_backpack}}, $_[1] } |
|
|
0
|
|
|
|
|
0
|
|
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
{ |
|
87
|
|
|
|
|
|
|
my %stop_tokens = map { $_ => 1 } (OKEOF, UNBRC, UNEOF); |
|
88
|
1115
|
50
|
|
1115
|
0
|
5843
|
sub is_stop_token { $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} } |
|
89
|
0
|
0
|
|
0
|
0
|
|
sub add_stop_token { $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} = 1 } |
|
90
|
0
|
0
|
|
0
|
0
|
|
sub del_stop_token { delete $stop_tokens{ref($_[1]) ? $_[1]->[0] : $_[1]} } |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
1; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 NAME |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
RTF::Lexer - Rich Text Format (RTF) lexical analyzer. |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
use RTF::Lexer qw(:all); |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
my $parser = RTF::Lexer->new(in => 'text.rtf'); |
|
105
|
|
|
|
|
|
|
my $token; |
|
106
|
|
|
|
|
|
|
do { |
|
107
|
|
|
|
|
|
|
$token = $parser->get_token(); |
|
108
|
|
|
|
|
|
|
} until $parser->is_stop_token($token); |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
RTF::Lexer is a low-level RTF format lexical analyzer. It splits the |
|
114
|
|
|
|
|
|
|
input stream into separate tokens, which can be handled by other |
|
115
|
|
|
|
|
|
|
high-level modules. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 METHODS |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=over 4 |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item new |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The constructor. Accepts the only argument C which must be an input |
|
124
|
|
|
|
|
|
|
file handle or a file name. In the latter case if there is a failure |
|
125
|
|
|
|
|
|
|
while opening the file method C throws an exception. By default |
|
126
|
|
|
|
|
|
|
the input is read from C. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item get_token |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Returns the next token from the input stream. The token is a reference to |
|
131
|
|
|
|
|
|
|
an array those first element is a numeric id of the token type. The second |
|
132
|
|
|
|
|
|
|
element is a string representation of the token. The third element may |
|
133
|
|
|
|
|
|
|
exists only if the token is a control word and represents the numerical |
|
134
|
|
|
|
|
|
|
parameter of this control word. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
The following token types are recognized by RTF::Lexer, that are declared |
|
137
|
|
|
|
|
|
|
as constants in this module: |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=over 8 |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item CWORD |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Control word (eg. C<\rtf1>, C<\trowd>). |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item CSYMB |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Control symbol, mentioned in RTF Specification version 1.7. |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=item CUNDF |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Unknown control symbol (i.e. not mentioned in RTF Specification). |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item PTEXT |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Plain text. |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item ENTER |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Start of group (C<{>). |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=item LEAVE |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
End of group (C<}>). |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=item DESTN |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
End of destination group (C<}> that turns off destination mode). |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=item ENHEX |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Data in hexadecimal format that follows C<\'> control symbol. |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item ENBIN |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
End of binary data block (started by C<\bin> control word). |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=item WRHEX |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Symbol which is not a hexadecimal digit found where ENHEX token expected. |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item OKEOF |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Normal end of input stream. |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=item UNEOF |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Unexpected end of input stream. |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item UNBRC |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
End of group that does not match any start of group. |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
These constants are not exported by default. Any of them may be exported by request. |
|
196
|
|
|
|
|
|
|
All of them may be exported by the use of C<:all> export tag. |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item unget_token($token) |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Pushes back token C<$token> so the next call to C will return it. |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item set_destination |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Turns on the destination mode, i.e. all tokens will be ignored until the end of |
|
205
|
|
|
|
|
|
|
current group. |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=back |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
RTF::Tokenizer, Rich Text Format (RTF) Specification. |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=head1 BUGS |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
It is impossible to have more then one RTF::Lexer objects in a single process. |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head1 AUTHOR |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
Vadim O. Ustiansky |
|
220
|
|
|
|
|
|
|
|