line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# File: RTF.pm |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Description: Read Rich Text Format meta information |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Revisions: 2010/06/17 - P. Harvey Created |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# References: 1) http://download.microsoft.com/download/2/f/5/2f599e18-07ee-4ec5-a1e7-f4e6a9423592/Word2007RTFSpec9.doc |
9
|
|
|
|
|
|
|
# 2) http://search.cpan.org/dist/RTF-Writer/lib/RTF/Cookbook.pod |
10
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
package Image::ExifTool::RTF; |
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
3629
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
32
|
|
15
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
42
|
|
16
|
1
|
|
|
1
|
|
4
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1751
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
$VERSION = '1.04'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub ProcessUserProps($$$); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# supported RTF character entities |
23
|
|
|
|
|
|
|
my %rtfEntity = ( |
24
|
|
|
|
|
|
|
par => 0x0a, |
25
|
|
|
|
|
|
|
tab => 0x09, |
26
|
|
|
|
|
|
|
endash => 0x2013, |
27
|
|
|
|
|
|
|
emdash => 0x2014, |
28
|
|
|
|
|
|
|
lquote => 0x2018, |
29
|
|
|
|
|
|
|
rquote => 0x2019, |
30
|
|
|
|
|
|
|
ldblquote => 0x201c, |
31
|
|
|
|
|
|
|
rdblquote => 0x201d, |
32
|
|
|
|
|
|
|
bullet => 0x2022, |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# RTF tags (ref 1) |
36
|
|
|
|
|
|
|
%Image::ExifTool::RTF::Main = ( |
37
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
38
|
|
|
|
|
|
|
NOTES => q{ |
39
|
|
|
|
|
|
|
This table lists standard tags of the RTF information group, but ExifTool |
40
|
|
|
|
|
|
|
will also extract any non-standard tags found in this group. As well, |
41
|
|
|
|
|
|
|
ExifTool will extract any custom properties that are found. See |
42
|
|
|
|
|
|
|
L for the |
43
|
|
|
|
|
|
|
specification. |
44
|
|
|
|
|
|
|
}, |
45
|
|
|
|
|
|
|
title => { }, |
46
|
|
|
|
|
|
|
subject => { }, |
47
|
|
|
|
|
|
|
author => { Groups => { 2 => 'Author' } }, |
48
|
|
|
|
|
|
|
manager => { }, |
49
|
|
|
|
|
|
|
company => { }, |
50
|
|
|
|
|
|
|
copyright=> { Groups => { 2 => 'Author' } }, # (written by Apple TextEdit) |
51
|
|
|
|
|
|
|
operator => { Name => 'LastModifiedBy' }, |
52
|
|
|
|
|
|
|
category => { }, |
53
|
|
|
|
|
|
|
keywords => { }, |
54
|
|
|
|
|
|
|
comment => { }, |
55
|
|
|
|
|
|
|
doccomm => { Name => 'Comments' }, |
56
|
|
|
|
|
|
|
hlinkbase=> { Name => 'HyperlinkBase' }, |
57
|
|
|
|
|
|
|
creatim => { |
58
|
|
|
|
|
|
|
Name => 'CreateDate', |
59
|
|
|
|
|
|
|
Format => 'date', |
60
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
61
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
62
|
|
|
|
|
|
|
}, |
63
|
|
|
|
|
|
|
revtim => { |
64
|
|
|
|
|
|
|
Name => 'ModifyDate', |
65
|
|
|
|
|
|
|
Format => 'date', |
66
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
67
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
68
|
|
|
|
|
|
|
}, |
69
|
|
|
|
|
|
|
printim => { |
70
|
|
|
|
|
|
|
Name => 'LastPrinted', |
71
|
|
|
|
|
|
|
Format => 'date', |
72
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
73
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
74
|
|
|
|
|
|
|
}, |
75
|
|
|
|
|
|
|
buptim => { |
76
|
|
|
|
|
|
|
Name => 'BackupTime', |
77
|
|
|
|
|
|
|
Format => 'date', |
78
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
79
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
80
|
|
|
|
|
|
|
}, |
81
|
|
|
|
|
|
|
edmins => { |
82
|
|
|
|
|
|
|
Name => 'TotalEditTime', # in minutes |
83
|
|
|
|
|
|
|
PrintConv => 'ConvertTimeSpan($val, 60)', |
84
|
|
|
|
|
|
|
}, |
85
|
|
|
|
|
|
|
nofpages => { Name => 'Pages' }, |
86
|
|
|
|
|
|
|
nofwords => { Name => 'Words' }, |
87
|
|
|
|
|
|
|
nofchars => { Name => 'Characters' }, |
88
|
|
|
|
|
|
|
nofcharsws=>{ |
89
|
|
|
|
|
|
|
Name => 'CharactersWithSpaces', |
90
|
|
|
|
|
|
|
Notes => q{ |
91
|
|
|
|
|
|
|
according to the 2007 Microsoft RTF specification this is clearly the number |
92
|
|
|
|
|
|
|
of characters NOT including spaces, but Microsoft Word writes this as the |
93
|
|
|
|
|
|
|
number WITH spaces, so ExifTool names this tag according to the de facto |
94
|
|
|
|
|
|
|
standard |
95
|
|
|
|
|
|
|
}, |
96
|
|
|
|
|
|
|
}, |
97
|
|
|
|
|
|
|
id => { Name => 'InternalIDNumber' }, |
98
|
|
|
|
|
|
|
version => { Name => 'RevisionNumber' }, |
99
|
|
|
|
|
|
|
vern => { Name => 'InternalVersionNumber' }, |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# lookup for user-defined properties |
103
|
|
|
|
|
|
|
# (none are pre-defined and this table doesn't appear in the docs) |
104
|
|
|
|
|
|
|
%Image::ExifTool::RTF::UserProps = ( |
105
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
106
|
|
|
|
|
|
|
); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
109
|
|
|
|
|
|
|
# Read to nested closing curly bracket "}" |
110
|
|
|
|
|
|
|
# Inputs: 0) data ref, 1) optional RAF ref to read more data if available |
111
|
|
|
|
|
|
|
# Returns: text inside brackets, or undef on error |
112
|
|
|
|
|
|
|
# Notes: On entry the current position in the data must be set to immediately |
113
|
|
|
|
|
|
|
# after the command that opens the bracket. On return the current |
114
|
|
|
|
|
|
|
# position is immediately following the closing brace if the return |
115
|
|
|
|
|
|
|
# value is defined. |
116
|
|
|
|
|
|
|
sub ReadToNested($;$) |
117
|
|
|
|
|
|
|
{ |
118
|
14
|
|
|
14
|
0
|
25
|
my ($dataPt, $raf) = @_; |
119
|
14
|
|
|
|
|
18
|
my $pos = pos $$dataPt; |
120
|
14
|
|
|
|
|
16
|
my $level = 1; |
121
|
14
|
|
|
|
|
20
|
for (;;) { |
122
|
|
|
|
|
|
|
# look for the next bracket |
123
|
46
|
50
|
|
|
|
132
|
unless ($$dataPt =~ /(\\*)([{}])/g) { |
124
|
|
|
|
|
|
|
# must read some more data |
125
|
0
|
|
|
|
|
0
|
my $p = length $$dataPt; |
126
|
0
|
|
|
|
|
0
|
my $buff; |
127
|
0
|
0
|
0
|
|
|
0
|
last unless $raf and $raf->Read($buff, 65536); |
128
|
0
|
|
|
|
|
0
|
$$dataPt .= $buff; |
129
|
|
|
|
|
|
|
# rewind position to include any leading backslashes |
130
|
0
|
|
0
|
|
|
0
|
--$p while $p and substr($$dataPt, $p - 1, 1) eq '\\'; |
131
|
0
|
|
|
|
|
0
|
pos($$dataPt) = $p; # set position to continue search |
132
|
0
|
|
|
|
|
0
|
next; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
# bracket is escaped if preceded by an odd number of backslashes |
135
|
46
|
100
|
100
|
|
|
93
|
next if $1 and length($1) & 0x01; |
136
|
42
|
100
|
|
|
|
68
|
$2 eq '{' and ++$level, next; |
137
|
28
|
100
|
|
|
|
50
|
next unless --$level <= 0; |
138
|
14
|
|
|
|
|
37
|
return substr($$dataPt, $pos, pos($$dataPt) - $pos - 1); |
139
|
|
|
|
|
|
|
} |
140
|
0
|
|
|
|
|
0
|
return undef; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
144
|
|
|
|
|
|
|
# Unescape RTF escape sequences |
145
|
|
|
|
|
|
|
# Inputs: 0) ExifTool ref, 1) RTF text, 2) RTF character set (for hex characters) |
146
|
|
|
|
|
|
|
# Returns: Unescaped text (in current ExifTool Charset) |
147
|
|
|
|
|
|
|
sub UnescapeRTF($$$) |
148
|
|
|
|
|
|
|
{ |
149
|
11
|
|
|
11
|
0
|
17
|
my ($et, $val, $charset) = @_; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# return now unless we have a control sequence |
152
|
11
|
100
|
|
|
|
25
|
unless ($val =~ /\\/) { |
153
|
5
|
|
|
|
|
9
|
$val =~ tr/\n\r//d; # ignore CR's and LF's |
154
|
5
|
|
|
|
|
10
|
return $val; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
# CR/LF is significant if it terminates a control sequence (so change these to a space) |
157
|
|
|
|
|
|
|
# (was $val =~ s/(^|[^\\])((?:\\\\)*)(\\[a-zA-Z]+(?:-?\d+)?)[\n\r]/$1$2$3 /g;) |
158
|
6
|
100
|
|
|
|
26
|
$val =~ s/\\(?:([a-zA-Z]+(?:-?\d+)?)[\n\r]|(.))/'\\'.($1 ? "$1 " : $2)/sge; |
|
35
|
|
|
|
|
118
|
|
159
|
|
|
|
|
|
|
# protect the newline control sequence by converting to a \par command |
160
|
|
|
|
|
|
|
# (was $val =~ s/(^|[^\\])((?:\\\\)*)(\\[\n\r])/$1$2\\par /g;) |
161
|
6
|
100
|
|
|
|
21
|
$val =~ s/(\\[\n\r])|(\\.)/$2 || '\\par '/sge; |
|
35
|
|
|
|
|
130
|
|
162
|
|
|
|
|
|
|
# all other CR/LF's are ignored (so delete them) |
163
|
6
|
|
|
|
|
15
|
$val =~ tr/\n\r//d; |
164
|
|
|
|
|
|
|
|
165
|
6
|
|
|
|
|
9
|
my $rtnVal = ''; |
166
|
6
|
|
|
|
|
14
|
my $len = length $val; |
167
|
6
|
|
|
|
|
9
|
my $skip = 1; # default Unicode skip count |
168
|
6
|
|
|
|
|
8
|
my $p0 = 0; |
169
|
|
|
|
|
|
|
|
170
|
6
|
|
|
|
|
8
|
for (;;) { |
171
|
|
|
|
|
|
|
# find next backslash |
172
|
40
|
100
|
|
|
|
73
|
my $p1 = ($val =~ /\\/g) ? pos($val) : $len + 1; |
173
|
|
|
|
|
|
|
# add text up to start of this control sequence (or up to end) |
174
|
40
|
|
|
|
|
54
|
my $n = $p1 - $p0 - 1; |
175
|
40
|
100
|
|
|
|
66
|
$rtnVal .= substr($val, $p0, $n) if $n > 0; |
176
|
|
|
|
|
|
|
# all done if at the end or if control sequence is empty |
177
|
40
|
100
|
|
|
|
63
|
last if $p1 >= $len; |
178
|
|
|
|
|
|
|
# look for an ASCII-letter control word or Unicode control |
179
|
34
|
100
|
|
|
|
77
|
if ($val =~ /\G([a-zA-Z]+)(-?\d+)? ?/g) { |
180
|
|
|
|
|
|
|
# interpret command if recognized |
181
|
25
|
100
|
|
|
|
57
|
if ($1 eq 'uc') { # \ucN |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
182
|
2
|
|
|
|
|
4
|
$skip = $2; |
183
|
|
|
|
|
|
|
} elsif ($1 eq 'u') { # \uN |
184
|
20
|
50
|
|
|
|
34
|
if ($2 < 0) { |
185
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Invalid Unicode character(s) in text'); |
186
|
0
|
|
|
|
|
0
|
$rtnVal .= '?'; |
187
|
|
|
|
|
|
|
} else { |
188
|
20
|
|
|
|
|
53
|
require Image::ExifTool::Charset; |
189
|
20
|
|
|
|
|
48
|
$rtnVal .= Image::ExifTool::Charset::Recompose($et, [$2]); |
190
|
20
|
100
|
|
|
|
38
|
if ($skip) { |
191
|
|
|
|
|
|
|
# must skip the specified number of characters |
192
|
|
|
|
|
|
|
# (not simple because RTF control words count as a single character) |
193
|
1
|
50
|
|
|
|
57
|
last unless $val =~ /\G([^\\]|\\([a-zA-Z]+)(-?\d+)? ?|\\'.{2}|\\.){$skip}/g; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} elsif ($rtfEntity{$1}) { |
197
|
3
|
|
|
|
|
10
|
require Image::ExifTool::Charset; |
198
|
3
|
|
|
|
|
9
|
$rtnVal .= Image::ExifTool::Charset::Recompose($et, [$rtfEntity{$1}]); |
199
|
|
|
|
|
|
|
} # (else ignore the command) |
200
|
|
|
|
|
|
|
} else { |
201
|
9
|
|
|
|
|
19
|
my $ch = substr($val, $p1, 1); |
202
|
9
|
100
|
|
|
|
78
|
if ($ch eq "'") { |
203
|
|
|
|
|
|
|
# hex character code |
204
|
5
|
50
|
|
|
|
17
|
last if $p1 + 3 > $len; |
205
|
5
|
|
|
|
|
11
|
my $hex = substr($val, $p1 + 1, 2); |
206
|
5
|
50
|
|
|
|
14
|
if ($hex =~ /^[0-9a-fA-F]{2}$/) { |
207
|
5
|
|
|
|
|
561
|
require Image::ExifTool::Charset; |
208
|
5
|
|
|
|
|
25
|
$rtnVal .= $et->Decode(chr(hex($hex)), $charset); |
209
|
|
|
|
|
|
|
} |
210
|
5
|
|
|
|
|
15
|
pos($val) = $p1 + 3; # skip to after the hex code |
211
|
|
|
|
|
|
|
} else { |
212
|
|
|
|
|
|
|
# assume a standard control symbol (\, {, }, etc) |
213
|
|
|
|
|
|
|
# (note, this may not be valid for some uncommon |
214
|
|
|
|
|
|
|
# control symbols like \~ for non-breaking space) |
215
|
4
|
|
|
|
|
5
|
$rtnVal .= $ch; |
216
|
4
|
|
|
|
|
7
|
pos($val) = $p1 + 1; # skip to after this character |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
} |
219
|
34
|
|
|
|
|
46
|
$p0 = pos($val); |
220
|
|
|
|
|
|
|
} |
221
|
6
|
|
|
|
|
13
|
return $rtnVal; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
225
|
|
|
|
|
|
|
# Read information in a RTF document |
226
|
|
|
|
|
|
|
# Inputs: 0) ExifTool ref, 1) dirInfo ref |
227
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid RTF file |
228
|
|
|
|
|
|
|
sub ProcessRTF($$) |
229
|
|
|
|
|
|
|
{ |
230
|
1
|
|
|
1
|
0
|
2
|
my ($et, $dirInfo) = @_; |
231
|
1
|
|
|
|
|
3
|
my $raf = $$dirInfo{RAF}; |
232
|
1
|
|
|
|
|
2
|
my ($buff, $buf2, $cs); |
233
|
|
|
|
|
|
|
|
234
|
1
|
50
|
33
|
|
|
4
|
return 0 unless $raf->Read($buff, 64) and $raf->Seek(0,0); |
235
|
1
|
50
|
|
|
|
7
|
return 0 unless $buff =~ /^[\n\r]*\{[\n\r]*\\rtf[^a-zA-Z]/; |
236
|
1
|
|
|
|
|
5
|
$et->SetFileType(); |
237
|
|
|
|
|
|
|
# |
238
|
|
|
|
|
|
|
# determine the RTF character set |
239
|
|
|
|
|
|
|
# |
240
|
1
|
50
|
|
|
|
6
|
if ($buff=~ /\\ansicpg(\d*)/) { |
|
|
0
|
|
|
|
|
|
241
|
1
|
|
|
|
|
4
|
$cs = "cp$1"; |
242
|
|
|
|
|
|
|
} elsif ($buff=~ /\\(ansi|mac|pc|pca)[^a-zA-Z]/) { |
243
|
0
|
|
|
|
|
0
|
my %trans = ( |
244
|
|
|
|
|
|
|
ansi => 'Latin', |
245
|
|
|
|
|
|
|
mac => 'MacRoman', |
246
|
|
|
|
|
|
|
pc => 'cp437', |
247
|
|
|
|
|
|
|
pca => 'cp850', |
248
|
|
|
|
|
|
|
); |
249
|
0
|
|
|
|
|
0
|
$cs = $trans{$1}; |
250
|
|
|
|
|
|
|
} else { |
251
|
0
|
|
|
|
|
0
|
$et->Warn('Unspecified RTF encoding. Will assume Latin'); |
252
|
0
|
|
|
|
|
0
|
$cs = 'Latin'; |
253
|
|
|
|
|
|
|
} |
254
|
1
|
|
|
|
|
5
|
my $charset = $Image::ExifTool::charsetName{lc $cs}; |
255
|
1
|
50
|
|
|
|
3
|
unless ($charset) { |
256
|
0
|
|
|
|
|
0
|
$et->Warn("Unsupported RTF encoding $cs. Will assume Latin."); |
257
|
0
|
|
|
|
|
0
|
$charset = 'Latin'; |
258
|
|
|
|
|
|
|
} |
259
|
1
|
|
|
|
|
3
|
my $tagTablePtr = GetTagTable('Image::ExifTool::RTF::Main'); |
260
|
1
|
|
|
|
|
3
|
undef $buff; |
261
|
|
|
|
|
|
|
# |
262
|
|
|
|
|
|
|
# scan for \info group |
263
|
|
|
|
|
|
|
# |
264
|
1
|
|
|
|
|
2
|
for (;;) { |
265
|
2
|
100
|
|
|
|
10
|
$raf->Read($buf2, 65536) or last; |
266
|
1
|
50
|
|
|
|
3
|
if (defined $buff) { |
267
|
|
|
|
|
|
|
# read more but leave some overlap for the match |
268
|
0
|
|
|
|
|
0
|
$buff = substr($buff, -16) . $buf2; |
269
|
|
|
|
|
|
|
} else { |
270
|
1
|
|
|
|
|
3
|
$buff = $buf2; |
271
|
|
|
|
|
|
|
} |
272
|
1
|
50
|
|
|
|
9
|
next unless $buff =~ /[^\\]\{[\n\r]*\\info([^a-zA-Z])/g; |
273
|
|
|
|
|
|
|
# anything but a space is included in the contents |
274
|
1
|
50
|
|
|
|
8
|
pos($buff) = pos($buff) - 1 if $1 ne ' '; |
275
|
1
|
|
|
|
|
5
|
my $info = ReadToNested(\$buff, $raf); |
276
|
1
|
50
|
|
|
|
3
|
unless (defined $info) { |
277
|
0
|
|
|
|
|
0
|
$et->Warn('Unterminated information group'); |
278
|
0
|
|
|
|
|
0
|
last; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
# process info commands (eg. "\author", "\*\copyright"); |
281
|
1
|
|
|
|
|
8
|
while ($info =~ /\{[\n\r]*(\\\*[\n\r]*)?\\([a-zA-Z]+)([^a-zA-Z])/g) { |
282
|
8
|
100
|
|
|
|
26
|
pos($info) = pos($info) - 1 if $3 ne ' '; |
283
|
8
|
|
|
|
|
15
|
my $tag = $2; |
284
|
8
|
|
|
|
|
13
|
my $val = ReadToNested(\$info); |
285
|
8
|
50
|
|
|
|
16
|
last unless defined $val; |
286
|
8
|
|
|
|
|
17
|
my $tagInfo = $$tagTablePtr{$tag}; |
287
|
8
|
100
|
66
|
|
|
34
|
if ($tagInfo and $$tagInfo{Format} and $$tagInfo{Format} eq 'date') { |
|
|
|
66
|
|
|
|
|
288
|
|
|
|
|
|
|
# parse RTF date commands |
289
|
1
|
|
|
|
|
9
|
my %idx = (yr=>0,mo=>1,dy=>2,hr=>3,min=>4,sec=>5); |
290
|
1
|
|
|
|
|
4
|
my @t = (0) x 6; |
291
|
1
|
|
|
|
|
6
|
while ($val =~ /\\([a-z]+)(\d+)/g) { |
292
|
4
|
50
|
|
|
|
9
|
next unless defined $idx{$1}; |
293
|
4
|
|
|
|
|
13
|
$t[$idx{$1}] = $2; |
294
|
|
|
|
|
|
|
} |
295
|
1
|
|
|
|
|
8
|
$val = sprintf("%.4d:%.2d:%.2d %.2d:%.2d:%.2d", @t); |
296
|
|
|
|
|
|
|
} else { |
297
|
|
|
|
|
|
|
# unescape RTF string value |
298
|
7
|
|
|
|
|
14
|
$val = UnescapeRTF($et, $val, $charset); |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
# create tagInfo for unknown tags |
301
|
8
|
50
|
|
|
|
15
|
if (not $tagInfo) { |
302
|
0
|
|
|
|
|
0
|
AddTagToTable($tagTablePtr, $tag, { Name => ucfirst($tag) }); |
303
|
|
|
|
|
|
|
} |
304
|
8
|
|
|
|
|
20
|
$et->HandleTag($tagTablePtr, $tag, $val); |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
1
|
50
|
|
|
|
3
|
return 1 unless defined $buff; |
308
|
|
|
|
|
|
|
# |
309
|
|
|
|
|
|
|
# scan for \userprops (but don't read more from file to find the start of this command) |
310
|
|
|
|
|
|
|
# |
311
|
1
|
|
|
|
|
3
|
pos($buff) = 0; |
312
|
1
|
|
|
|
|
11
|
while ($buff =~ /[^\\]\{[\n\r]*\\\*[\n\r]*\\userprops([^a-zA-Z])/g) { |
313
|
|
|
|
|
|
|
# Note: The RTF spec places brackets around each propinfo structure, |
314
|
|
|
|
|
|
|
# but Microsoft Word doesn't write it this way, so tolerate either. |
315
|
1
|
50
|
|
|
|
5
|
pos($buff) = pos($buff) - 1 if $1 ne ' '; |
316
|
1
|
|
|
|
|
3
|
my $props = ReadToNested(\$buff, $raf); |
317
|
1
|
|
|
|
|
4
|
$tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::RTF::UserProps'); |
318
|
1
|
50
|
|
|
|
3
|
unless (defined $props) { |
319
|
0
|
|
|
|
|
0
|
$et->Warn('Unterminated user properties'); |
320
|
0
|
|
|
|
|
0
|
last; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
# process user properties |
323
|
1
|
|
|
|
|
2
|
my $tag; |
324
|
1
|
|
|
|
|
6
|
while ($props =~ /\{[\n\r]*(\\\*[\n\r]*)?\\([a-zA-Z]+)([^a-zA-Z])/g) { |
325
|
4
|
50
|
|
|
|
10
|
pos($props) = pos($props) - 1 if $3 ne ' '; |
326
|
4
|
|
|
|
|
7
|
my $t = $2; |
327
|
4
|
|
|
|
|
6
|
my $val = ReadToNested(\$props); |
328
|
4
|
50
|
|
|
|
9
|
last unless defined $val; |
329
|
4
|
|
|
|
|
8
|
$val = UnescapeRTF($et, $val, $charset); |
330
|
4
|
100
|
33
|
|
|
14
|
if ($t eq 'propname') { |
|
|
50
|
|
|
|
|
|
331
|
2
|
|
|
|
|
5
|
$tag = $val; |
332
|
2
|
|
|
|
|
7
|
next; |
333
|
|
|
|
|
|
|
} elsif ($t ne 'staticval' or not defined $tag) { |
334
|
0
|
|
|
|
|
0
|
next; # ignore \linkval and \proptype for now |
335
|
|
|
|
|
|
|
} |
336
|
2
|
|
|
|
|
5
|
$tag =~ s/\s(.)/\U$1/g; # capitalize all words in tag name |
337
|
2
|
|
|
|
|
3
|
$tag =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters |
338
|
2
|
50
|
|
|
|
5
|
next unless $tag; |
339
|
|
|
|
|
|
|
# create tagInfo for unknown tags |
340
|
2
|
50
|
|
|
|
5
|
unless ($$tagTablePtr{$tag}) { |
341
|
2
|
|
|
|
|
21
|
AddTagToTable($tagTablePtr, $tag, { Name => $tag }); |
342
|
|
|
|
|
|
|
} |
343
|
2
|
|
|
|
|
6
|
$et->HandleTag($tagTablePtr, $tag, $val); |
344
|
|
|
|
|
|
|
} |
345
|
1
|
|
|
|
|
2
|
last; # (didn't really want to loop) |
346
|
|
|
|
|
|
|
} |
347
|
1
|
|
|
|
|
4
|
return 1; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
1; # end |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
__END__ |