line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RTF::Parser; |
2
|
|
|
|
|
|
|
$RTF::Parser::VERSION = '1.12'; |
3
|
14
|
|
|
14
|
|
135526
|
use strict; |
|
14
|
|
|
|
|
27
|
|
|
14
|
|
|
|
|
470
|
|
4
|
14
|
|
|
14
|
|
72
|
use warnings; |
|
14
|
|
|
|
|
20
|
|
|
14
|
|
|
|
|
853
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
RTF::Parser - A DEPRECATED event-driven RTF Parser |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
version 1.12 |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 DESCRIPTION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
A DEPRECATED event-driven RTF Parser |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 PUBLIC SERVICE ANNOUNCEMENT |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
B
|
21
|
|
|
|
|
|
|
Need rtf2*? Google for pandoc.> |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
A very short history lesson... |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
C<1.07> of this module was released in 1999 by the original author, |
26
|
|
|
|
|
|
|
Philippe Verdret. I took over the module around 2004 with high intentions. I |
27
|
|
|
|
|
|
|
added almost all of the POD, all of the tests, and most of the comments, and |
28
|
|
|
|
|
|
|
rejigged the whole thing to use L for tokenizing the incoming |
29
|
|
|
|
|
|
|
RTF, which fixed a whole class of problems. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
The big problem is really that the whole module is an API which happens to have |
32
|
|
|
|
|
|
|
C and C stuck on top of it. Any serious changes involve |
33
|
|
|
|
|
|
|
breaking the API, and that seems the greater sin than telling people to go and |
34
|
|
|
|
|
|
|
get themselves a better RTF convertor suite. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
I had high hopes of overhauling the whole thing, but it didn't happen. I handed |
37
|
|
|
|
|
|
|
over maintainership some years later, but no new version was forthcoming, and |
38
|
|
|
|
|
|
|
the module has languished since then. There are many open bugs on rt.cpan.org |
39
|
|
|
|
|
|
|
and in the reviews. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
In a moment of weakness, I've picked up the module again with the aim of |
42
|
|
|
|
|
|
|
adding this message, fixing one or two very minor bugs, and putting a version |
43
|
|
|
|
|
|
|
that doesn't have B in big red letters on the CPAN. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
I doubt I'll ever tackle the bigger bugs (Unicode support), but I will accept |
46
|
|
|
|
|
|
|
patches I can understand. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 IMPORTANT HINTS |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
RTF parsing is non-trivial. The inner workings of these modules are somewhat |
51
|
|
|
|
|
|
|
scary. You should go and read the 'Introduction' document included with this |
52
|
|
|
|
|
|
|
distribution before going any further - it explains how this distribution fits |
53
|
|
|
|
|
|
|
together, and is B reading. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
If you just want to convert RTF to HTML or text, from inside your own script, |
56
|
|
|
|
|
|
|
jump straight to the docs for L or L |
57
|
|
|
|
|
|
|
respectively. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 SUBCLASSING RTF::PARSER |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
When you subclass RTF::Parser, you'll want to do two things. You'll firstly |
62
|
|
|
|
|
|
|
want to overwrite the methods below described as the API. This describes what |
63
|
|
|
|
|
|
|
we do when we have tokens that aren't control words (except 'symbols' - see below). |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Then you'll want to create a hash that maps control words to code references |
66
|
|
|
|
|
|
|
that you want executed. They'll get passed a copy of the RTF::Parser object, |
67
|
|
|
|
|
|
|
the name of the control word (say, 'b'), any arguments passed with the control |
68
|
|
|
|
|
|
|
word, and then 'start'. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head2 An example... |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
The following code removes bold tags from RTF documents, and then spits back |
73
|
|
|
|
|
|
|
out RTF. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
{ |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Create our subclass |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
package UnboldRTF; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# We'll be doing lots of printing without newlines, so don't buffer output |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
$|++; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Subclassing magic... |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
use RTF::Parser; |
88
|
|
|
|
|
|
|
@UnboldRTF::ISA = ( 'RTF::Parser' ); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Redefine the API nicely |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub parse_start { print STDERR "Starting...\n"; } |
93
|
|
|
|
|
|
|
sub group_start { print '{' } |
94
|
|
|
|
|
|
|
sub group_end { print '}' } |
95
|
|
|
|
|
|
|
sub text { print "\n" . $_[1] } |
96
|
|
|
|
|
|
|
sub char { print "\\\'$_[1]" } |
97
|
|
|
|
|
|
|
sub symbol { print "\\$_[1]" } |
98
|
|
|
|
|
|
|
sub parse_end { print STDERR "All done...\n"; } |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
my %do_on_control = ( |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# What to do when we see any control we don't have |
105
|
|
|
|
|
|
|
# a specific action for... In this case, we print it. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
'__DEFAULT__' => sub { |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my ( $self, $type, $arg ) = @_; |
110
|
|
|
|
|
|
|
$arg = "\n" unless defined $arg; |
111
|
|
|
|
|
|
|
print "\\$type$arg"; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
}, |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# When we come across a bold tag, we just ignore it. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
'b' => sub {}, |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Grab STDIN... |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
my $data = join '', (<>); |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# Create an instance of the class we created above |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my $parser = UnboldRTF->new(); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Prime the object with our control handlers... |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
$parser->control_definition( \%do_on_control ); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Don't skip undefined destinations... |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
$parser->dont_skip_destinations(1); |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Start the parsing! |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
$parser->parse_string( $data ); |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head1 METHODS |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
|
146
|
14
|
|
|
14
|
|
68
|
use vars qw($VERSION); |
|
14
|
|
|
|
|
34
|
|
|
14
|
|
|
|
|
835
|
|
147
|
|
|
|
|
|
|
|
148
|
14
|
|
|
14
|
|
75
|
use Carp; |
|
14
|
|
|
|
|
22
|
|
|
14
|
|
|
|
|
1449
|
|
149
|
14
|
|
|
14
|
|
28035
|
use RTF::Tokenizer 1.01; |
|
14
|
|
|
|
|
281456
|
|
|
14
|
|
|
|
|
456
|
|
150
|
14
|
|
|
14
|
|
16585
|
use RTF::Config; |
|
14
|
|
|
|
|
36
|
|
|
14
|
|
|
|
|
1836
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
my $DEBUG = 0; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Debugging stuff I'm leaving in in case someone is using it.., |
155
|
14
|
|
|
14
|
|
79
|
use constant PARSER_TRACE => 0; |
|
14
|
|
|
|
|
26
|
|
|
14
|
|
|
|
|
8365
|
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub backtrace { |
158
|
0
|
|
|
0
|
0
|
0
|
Carp::confess; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
$SIG{'INT'} = \&backtrace if PARSER_TRACE; |
162
|
|
|
|
|
|
|
$SIG{__DIE__} = \&backtrace if PARSER_TRACE; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 new |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Creates a new RTF::Parser object. Doesn't accept any arguments. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub new { |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Get the real class name |
173
|
17
|
|
|
17
|
1
|
7688
|
my $proto = shift; |
174
|
17
|
|
33
|
|
|
113
|
my $class = ref($proto) || $proto; |
175
|
|
|
|
|
|
|
|
176
|
17
|
|
|
|
|
42
|
my $self = {}; |
177
|
|
|
|
|
|
|
|
178
|
17
|
100
|
|
|
|
104
|
$self->{_RTF_CONTROL_USED}++ if $INC{'RTF/Control.pm'}; |
179
|
|
|
|
|
|
|
|
180
|
17
|
|
|
|
|
44
|
$self->{_DONT_SKIP_DESTINATIONS} = 0; |
181
|
|
|
|
|
|
|
|
182
|
17
|
|
|
|
|
46
|
bless $self, $class; |
183
|
|
|
|
|
|
|
|
184
|
17
|
|
|
|
|
52
|
return $self; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# For backwards compatability, we import RTF::Control's %do_on_control |
189
|
|
|
|
|
|
|
# if we've loaded RTF::Control (which would suggest we're being subclassed |
190
|
|
|
|
|
|
|
# by RTF::Control). This isn't nice or pretty, but it doesn't break things. |
191
|
|
|
|
|
|
|
# I'd do this in new() but there's no guarentee it'll be set by then... |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub _install_do_on_control { |
194
|
|
|
|
|
|
|
|
195
|
14
|
|
|
14
|
|
22
|
my $self = shift; |
196
|
|
|
|
|
|
|
|
197
|
14
|
100
|
|
|
|
68
|
return if $self->{_DO_ON_CONTROL}; |
198
|
|
|
|
|
|
|
|
199
|
8
|
100
|
|
|
|
28
|
if ( $self->{_RTF_CONTROL_USED} ) { |
200
|
|
|
|
|
|
|
|
201
|
4
|
|
|
|
|
20
|
$self->{_DO_ON_CONTROL} = \%RTF::Control::do_on_control; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
} else { |
204
|
|
|
|
|
|
|
|
205
|
4
|
|
|
|
|
13
|
$self->{_DO_ON_CONTROL} = {}; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 parse_stream( \*FH ) |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
This function used to accept a second parameter - a function specifying how |
214
|
|
|
|
|
|
|
the filehandle should be read. This is deprecated, because I could find no |
215
|
|
|
|
|
|
|
examples of people using it, nor could I see why people might want to use it. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Pass this function a reference to a filehandle (or, now, a filename! yay) to |
218
|
|
|
|
|
|
|
begin reading and processing. |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=cut |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub parse_stream { |
223
|
|
|
|
|
|
|
|
224
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
225
|
1
|
|
|
|
|
2
|
my $stream = shift; |
226
|
1
|
|
|
|
|
1
|
my $reader = shift; |
227
|
|
|
|
|
|
|
|
228
|
1
|
|
|
|
|
5
|
$self->_install_do_on_control(); |
229
|
|
|
|
|
|
|
|
230
|
1
|
50
|
|
|
|
3
|
die("parse_stream no longer accepts a reader") if $reader; |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Put an appropriately primed RTF::Tokenizer object into our object |
233
|
1
|
|
|
|
|
10
|
$self->{_TOKENIZER} = RTF::Tokenizer->new( file => $stream ); |
234
|
|
|
|
|
|
|
|
235
|
1
|
|
|
|
|
110
|
$self->_parse(); |
236
|
|
|
|
|
|
|
|
237
|
1
|
|
|
|
|
2
|
return $self; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head2 parse_string( $string ) |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Pass this function a string to begin reading and processing. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub parse_string { |
248
|
|
|
|
|
|
|
|
249
|
13
|
|
|
13
|
1
|
478
|
my $self = shift; |
250
|
13
|
|
|
|
|
24
|
my $string = shift; |
251
|
|
|
|
|
|
|
|
252
|
13
|
|
|
|
|
91
|
$self->_install_do_on_control(); |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# Put an appropriately primed RTF::Tokenizer object into our object |
255
|
13
|
|
|
|
|
79
|
$self->{_TOKENIZER} = RTF::Tokenizer->new( string => $string ); |
256
|
|
|
|
|
|
|
|
257
|
13
|
|
|
|
|
549
|
$self->_parse(); |
258
|
|
|
|
|
|
|
|
259
|
13
|
|
|
|
|
44
|
return $self; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head2 control_definition |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
The code that's executed when we trigger a control event is kept |
266
|
|
|
|
|
|
|
in a hash. We're holding this somewhere in our object. Earlier |
267
|
|
|
|
|
|
|
versions would make the assumption we're being subclassed by |
268
|
|
|
|
|
|
|
RTF::Control, which isn't something I want to assume. If you are |
269
|
|
|
|
|
|
|
using RTF::Control, you don't need to worry about this, because |
270
|
|
|
|
|
|
|
we're grabbing %RTF::Control::do_on_control, and using that. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Otherwise, you pass this method a reference to a hash where the keys |
273
|
|
|
|
|
|
|
are control words, and the values are coderefs that you want executed. |
274
|
|
|
|
|
|
|
This sets all the callbacks... The arguments passed to your coderefs |
275
|
|
|
|
|
|
|
are: $self, control word itself (like, say, 'par'), any parameter the |
276
|
|
|
|
|
|
|
control word had, and then 'start'. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
If you don't pass it a reference, you get back the reference of the |
279
|
|
|
|
|
|
|
current control hash we're holding. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=cut |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub control_definition { |
284
|
|
|
|
|
|
|
|
285
|
4
|
|
|
4
|
1
|
31
|
my $self = shift; |
286
|
|
|
|
|
|
|
|
287
|
4
|
100
|
|
|
|
14
|
if (@_) { |
288
|
|
|
|
|
|
|
|
289
|
2
|
50
|
|
|
|
9
|
if ( ref $_[0] eq 'HASH' ) { |
290
|
|
|
|
|
|
|
|
291
|
2
|
|
|
|
|
8
|
$self->{_DO_ON_CONTROL} = shift; |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
} else { |
294
|
|
|
|
|
|
|
|
295
|
0
|
|
|
|
|
0
|
die "argument of control_definition() method must be an HASHREF"; |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
} else { |
300
|
|
|
|
|
|
|
|
301
|
2
|
|
|
|
|
15
|
return $self->{_DO_ON_CONTROL}; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head2 rtf_control_emulation |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
If you pass it a boolean argument, it'll set whether or not it thinks RTF::Control |
310
|
|
|
|
|
|
|
has been loaded. If you don't pass it an argument, it'll return what it thinks... |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=cut |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub rtf_control_emulation { |
315
|
|
|
|
|
|
|
|
316
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
317
|
0
|
|
|
|
|
0
|
my $bool = shift; |
318
|
|
|
|
|
|
|
|
319
|
0
|
0
|
|
|
|
0
|
if ( defined $bool ) { |
320
|
|
|
|
|
|
|
|
321
|
0
|
|
|
|
|
0
|
$self->{_RTF_CONTROL_USED} = $bool; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
} else { |
324
|
|
|
|
|
|
|
|
325
|
0
|
|
|
|
|
0
|
return $self->{_RTF_CONTROL_USED}; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 dont_skip_destinations |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
The RTF spec says that we skip any destinations that we don't have an explicit |
334
|
|
|
|
|
|
|
handler for. You could well not want this. Accepts a boolean argument, true |
335
|
|
|
|
|
|
|
to process destinations, 0 to skip the ones we don't understand. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub dont_skip_destinations { |
340
|
|
|
|
|
|
|
|
341
|
2
|
|
|
2
|
1
|
24
|
my $self = shift; |
342
|
2
|
|
|
|
|
4
|
my $bool = shift; |
343
|
|
|
|
|
|
|
|
344
|
2
|
|
|
|
|
6
|
$self->{_DONT_SKIP_DESTINATIONS} = $bool; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# This is how he decided to call control actions. Leaving |
349
|
|
|
|
|
|
|
# it to do the right thing at the moment... Users of the |
350
|
|
|
|
|
|
|
# module don't need to know our dirty little secret... |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
{ |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
package RTF::Action; |
355
|
|
|
|
|
|
|
$RTF::Action::VERSION = '1.12'; |
356
|
14
|
|
|
14
|
|
83
|
use RTF::Config; |
|
14
|
|
|
|
|
18
|
|
|
14
|
|
|
|
|
1450
|
|
357
|
|
|
|
|
|
|
|
358
|
14
|
|
|
14
|
|
68
|
use vars qw($AUTOLOAD); |
|
14
|
|
|
|
|
24
|
|
|
14
|
|
|
|
|
1919
|
|
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
my $default; |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# The original RTF::Parser allowed $LOGFILE to be set |
363
|
|
|
|
|
|
|
# that made RTF::Config do fun things. We're allowing it |
364
|
|
|
|
|
|
|
# to, but wrapping it up a bit more carefully... |
365
|
|
|
|
|
|
|
if ($LOG_FILE) { |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
$default = sub { $RTF::Control::not_processed{ $_[1] }++ } |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
my $sub; |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
sub AUTOLOAD { |
374
|
|
|
|
|
|
|
|
375
|
61
|
|
|
61
|
|
90
|
my $self = $_[0]; |
376
|
|
|
|
|
|
|
|
377
|
61
|
|
|
|
|
238
|
$AUTOLOAD =~ s/^.*:://; |
378
|
|
|
|
|
|
|
|
379
|
14
|
|
|
14
|
|
83
|
no strict 'refs'; |
|
14
|
|
|
|
|
31
|
|
|
14
|
|
|
|
|
12758
|
|
380
|
|
|
|
|
|
|
|
381
|
61
|
100
|
|
|
|
196
|
if ( defined( $sub = $self->{_DO_ON_CONTROL}->{$AUTOLOAD} ) ) { |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Yuck, empty if. But we're just going to leave it for a while |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
} else { |
386
|
|
|
|
|
|
|
|
387
|
36
|
50
|
|
|
|
97
|
if ($default) { |
|
|
100
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
389
|
0
|
|
|
|
|
0
|
$sub = $default |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
} elsif ( $self->{_DO_ON_CONTROL}->{'__DEFAULT__'} ) { |
392
|
|
|
|
|
|
|
|
393
|
10
|
|
|
|
|
17
|
$sub = $self->{_DO_ON_CONTROL}->{'__DEFAULT__'}; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
} else { |
396
|
|
|
|
|
|
|
|
397
|
26
|
|
|
27
|
|
148
|
$sub = sub { }; |
|
27
|
|
|
|
|
64
|
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# I don't understand why he's using goto here... |
404
|
61
|
|
|
|
|
157
|
*$AUTOLOAD = $sub; |
405
|
61
|
|
|
|
|
194
|
goto &$sub; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=head1 API |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
These are some methods that you're going to want to over-ride if you |
414
|
|
|
|
|
|
|
subclass this modules. In general though, people seem to want to subclass |
415
|
|
|
|
|
|
|
RTF::Control, which subclasses this module. |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 parse_start |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Called before we start parsing... |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head2 parse_end |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Called when we're finished parsing |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 group_start |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Called when we encounter an opening { |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head2 group_end |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Called when we encounter a closing } |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head2 text |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Called when we encounter plain-text. Is given the text as its |
436
|
|
|
|
|
|
|
first argument |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 char |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Called when we encounter a hex-escaped character. The hex characters |
441
|
|
|
|
|
|
|
are passed as the first argument. |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=head2 symbol |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
Called when we come across a control character. This is interesting, because, |
446
|
|
|
|
|
|
|
I'd have treated these as control words, so, I'm using Philippe's list as control |
447
|
|
|
|
|
|
|
words that'll trigger this for you. These are C<-_~:|{}*'\>. This needs to be |
448
|
|
|
|
|
|
|
tested. |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 bitmap |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Called when we come across a command that's talking about a linked bitmap |
453
|
|
|
|
|
|
|
file. You're given the file name. |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=head2 binary |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
Called when we have binary data. You get passed it. |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
460
|
|
|
|
|
|
|
|
461
|
4
|
|
|
4
|
1
|
8
|
sub parse_start { } |
462
|
4
|
|
|
4
|
1
|
5
|
sub parse_end { } |
463
|
4
|
|
|
4
|
1
|
7
|
sub group_start { } |
464
|
4
|
|
|
4
|
1
|
6
|
sub group_end { } |
465
|
0
|
|
|
0
|
1
|
0
|
sub text { } |
466
|
0
|
|
|
0
|
1
|
0
|
sub char { } |
467
|
0
|
|
|
0
|
1
|
0
|
sub symbol { } # -_~:|{}*'\ |
468
|
0
|
|
|
0
|
1
|
0
|
sub bitmap { } # \{bm(?:[clr]|cwd) |
469
|
0
|
|
|
0
|
1
|
0
|
sub binary { } |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
# This is the big, bad parse routine that isn't called directly. |
472
|
|
|
|
|
|
|
# We loop around RTF::Tokenizer, making event calls when we need to. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
sub _parse { |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# Read in our object |
477
|
14
|
|
|
14
|
|
26
|
my $self = shift; |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# Execute any pre-parse subroutines |
480
|
14
|
|
|
|
|
75
|
$self->parse_start(); |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
# Loop until we find the EOF |
483
|
14
|
|
|
|
|
25
|
while (1) { |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# Read in our initial token |
486
|
174
|
|
|
|
|
711
|
my ( $token_type, $token_argument, $token_parameter ) = |
487
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# Control words |
490
|
174
|
100
|
|
|
|
3642
|
if ( $token_type eq 'control' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# We have a special handler for control words |
493
|
79
|
|
|
|
|
317
|
$self->_control( $token_argument, $token_parameter ); |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# Plain text |
496
|
|
|
|
|
|
|
} elsif ( $token_type eq 'text' ) { |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# Send it to the text() routine |
499
|
34
|
|
|
|
|
258
|
$self->text($token_argument); |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
# Groups |
502
|
|
|
|
|
|
|
} elsif ( $token_type eq 'group' ) { |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
# Call the appropriate handler |
505
|
47
|
100
|
|
|
|
195
|
$token_argument ? $self->group_start : |
506
|
|
|
|
|
|
|
$self->group_end; |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
# EOF |
509
|
|
|
|
|
|
|
} else { |
510
|
|
|
|
|
|
|
|
511
|
14
|
|
|
|
|
28
|
last; |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# All done |
518
|
14
|
|
|
|
|
63
|
$self->parse_end(); |
519
|
14
|
|
|
|
|
25
|
$self; |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# Control word handler (yeuch) |
524
|
|
|
|
|
|
|
# purl, be RTF barbie is Control words are *HARD*! |
525
|
|
|
|
|
|
|
sub _control { |
526
|
|
|
|
|
|
|
|
527
|
79
|
|
|
79
|
|
107
|
my $self = shift; |
528
|
79
|
|
|
|
|
95
|
my $type = shift; |
529
|
79
|
|
|
|
|
90
|
my $arg = shift; |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
# standard, control_symbols, hex |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# Funky destination |
534
|
79
|
100
|
|
|
|
313
|
if ( $type eq '*' ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# We might actually want to process it... |
537
|
2
|
100
|
|
|
|
5
|
if ( $self->{_DONT_SKIP_DESTINATIONS} ) { |
538
|
|
|
|
|
|
|
|
539
|
1
|
|
|
|
|
3
|
$self->_control_execute('*'); |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
} else { |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# Grab the next token |
544
|
1
|
|
|
|
|
3
|
my ( $token_type, $token_argument, $token_parameter ) = |
545
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
# Basic sanity check |
548
|
1
|
50
|
|
|
|
20
|
croak('Malformed RTF - \* not followed by a control...') |
549
|
|
|
|
|
|
|
unless $token_type eq 'control'; |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
# Do we have a handler for it? |
552
|
1
|
50
|
|
|
|
3
|
if ( defined $self->{_DO_ON_CONTROL}->{$token_argument} ) { |
553
|
0
|
|
|
|
|
0
|
$self->_control_execute( $token_argument, $token_parameter ); |
554
|
|
|
|
|
|
|
} else { |
555
|
1
|
|
|
|
|
4
|
$self->_skip_group(); |
556
|
1
|
|
|
|
|
3
|
$self->group_end(); |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
# Binary data |
561
|
|
|
|
|
|
|
} elsif ( $type eq 'bin' ) { |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# Grab the next token |
564
|
0
|
|
|
|
|
0
|
my ( $token_type, $token_argument, $token_parameter ) = |
565
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# Basic sanity check |
568
|
0
|
0
|
|
|
|
0
|
croak('Malformed RTF - \bin not followed by text...') |
569
|
|
|
|
|
|
|
unless $token_type eq 'text'; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# Send it to the handler |
572
|
0
|
|
|
|
|
0
|
$self->binary($token_argument); |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# Implement a bitmap handler here |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
# Control symbols |
577
|
|
|
|
|
|
|
} elsif ( $type =~ m/[-_~:|{}*\\]/ ) { |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
# Send it to the handler |
580
|
1
|
|
|
|
|
4
|
$self->symbol($type); |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# Entity |
583
|
|
|
|
|
|
|
} elsif ( $type eq "'" ) { |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# Entity handler |
586
|
3
|
|
|
|
|
12
|
$self->char($arg); |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# Some other control type - give it to the control executer |
589
|
|
|
|
|
|
|
} else { |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
# Pass it to our default executer |
592
|
73
|
|
|
|
|
206
|
$self->_control_execute( $type, $arg ) |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# Control word executer (this is nasty) |
599
|
|
|
|
|
|
|
sub _control_execute { |
600
|
|
|
|
|
|
|
|
601
|
74
|
|
|
74
|
|
80
|
my $self = shift; |
602
|
74
|
|
|
|
|
141
|
my $type = shift; |
603
|
74
|
|
|
|
|
83
|
my $arg = shift; |
604
|
|
|
|
|
|
|
|
605
|
14
|
|
|
14
|
|
86
|
no strict 'refs'; |
|
14
|
|
|
|
|
26
|
|
|
14
|
|
|
|
|
2488
|
|
606
|
74
|
|
|
|
|
77
|
&{"RTF::Action::$type"}( $self, $type, $arg, 'start' ); |
|
74
|
|
|
|
|
612
|
|
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
# Skip a group |
611
|
|
|
|
|
|
|
sub _skip_group { |
612
|
|
|
|
|
|
|
|
613
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
614
|
|
|
|
|
|
|
|
615
|
1
|
|
|
|
|
1
|
my $level_counter = 1; |
616
|
|
|
|
|
|
|
|
617
|
1
|
|
|
|
|
3
|
while ($level_counter) { |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
# Get a token |
620
|
2
|
|
|
|
|
5
|
my ( $token_type, $token_argument, $token_parameter ) = |
621
|
|
|
|
|
|
|
$self->{_TOKENIZER}->get_token(); |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# Make sure we can't loop forever |
624
|
2
|
50
|
|
|
|
31
|
last if $token_type eq 'eof'; |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# We're in business if it's a group |
627
|
2
|
100
|
|
|
|
6
|
if ( $token_type eq 'group' ) { |
628
|
|
|
|
|
|
|
|
629
|
1
|
50
|
|
|
|
6
|
$token_argument ? $level_counter++ : |
630
|
|
|
|
|
|
|
$level_counter--; |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
1; |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=head1 AUTHOR |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
Peter Sergeant C, originally by Philippe Verdret |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=head1 COPYRIGHT |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
Copyright 2004 B. |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
649
|
|
|
|
|
|
|
the same terms as Perl itself. |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=head1 CREDITS |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
This work was carried out under a grant generously provided by The Perl Foundation - |
654
|
|
|
|
|
|
|
give them money! |