| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 2 |  |  |  |  |  |  | # File:         RTF.pm | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Description:  Read Rich Text Format meta information | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # Revisions:    2010/06/17 - P. Harvey Created | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # References:   1) http://download.microsoft.com/download/2/f/5/2f599e18-07ee-4ec5-a1e7-f4e6a9423592/Word2007RTFSpec9.doc | 
| 9 |  |  |  |  |  |  | #               2) http://search.cpan.org/dist/RTF-Writer/lib/RTF/Cookbook.pod | 
| 10 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | package Image::ExifTool::RTF; | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 1 |  |  | 1 |  | 4421 | use strict; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 35 |  | 
| 15 | 1 |  |  | 1 |  | 5 | use vars qw($VERSION); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 41 |  | 
| 16 | 1 |  |  | 1 |  | 5 | use Image::ExifTool qw(:DataAccess :Utils); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 2118 |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | $VERSION = '1.04'; | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | sub ProcessUserProps($$$); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | # supported RTF character entities | 
| 23 |  |  |  |  |  |  | my %rtfEntity = ( | 
| 24 |  |  |  |  |  |  | par       => 0x0a, | 
| 25 |  |  |  |  |  |  | tab       => 0x09, | 
| 26 |  |  |  |  |  |  | endash    => 0x2013, | 
| 27 |  |  |  |  |  |  | emdash    => 0x2014, | 
| 28 |  |  |  |  |  |  | lquote    => 0x2018, | 
| 29 |  |  |  |  |  |  | rquote    => 0x2019, | 
| 30 |  |  |  |  |  |  | ldblquote => 0x201c, | 
| 31 |  |  |  |  |  |  | rdblquote => 0x201d, | 
| 32 |  |  |  |  |  |  | bullet    => 0x2022, | 
| 33 |  |  |  |  |  |  | ); | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | # RTF tags (ref 1) | 
| 36 |  |  |  |  |  |  | %Image::ExifTool::RTF::Main = ( | 
| 37 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 38 |  |  |  |  |  |  | NOTES => q{ | 
| 39 |  |  |  |  |  |  | This table lists standard tags of the RTF information group, but ExifTool | 
| 40 |  |  |  |  |  |  | will also extract any non-standard tags found in this group.  As well, | 
| 41 |  |  |  |  |  |  | ExifTool will extract any custom properties that are found.  See | 
| 42 |  |  |  |  |  |  | L for the | 
| 43 |  |  |  |  |  |  | specification. | 
| 44 |  |  |  |  |  |  | }, | 
| 45 |  |  |  |  |  |  | title    => { }, | 
| 46 |  |  |  |  |  |  | subject  => { }, | 
| 47 |  |  |  |  |  |  | author   => { Groups => { 2 => 'Author' } }, | 
| 48 |  |  |  |  |  |  | manager  => { }, | 
| 49 |  |  |  |  |  |  | company  => { }, | 
| 50 |  |  |  |  |  |  | copyright=> { Groups => { 2 => 'Author' } }, # (written by Apple TextEdit) | 
| 51 |  |  |  |  |  |  | operator => { Name => 'LastModifiedBy' }, | 
| 52 |  |  |  |  |  |  | category => { }, | 
| 53 |  |  |  |  |  |  | keywords => { }, | 
| 54 |  |  |  |  |  |  | comment  => { }, | 
| 55 |  |  |  |  |  |  | doccomm  => { Name => 'Comments' }, | 
| 56 |  |  |  |  |  |  | hlinkbase=> { Name => 'HyperlinkBase' }, | 
| 57 |  |  |  |  |  |  | creatim  => { | 
| 58 |  |  |  |  |  |  | Name => 'CreateDate', | 
| 59 |  |  |  |  |  |  | Format => 'date', | 
| 60 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 61 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 62 |  |  |  |  |  |  | }, | 
| 63 |  |  |  |  |  |  | revtim   => { | 
| 64 |  |  |  |  |  |  | Name => 'ModifyDate', | 
| 65 |  |  |  |  |  |  | Format => 'date', | 
| 66 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 67 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 68 |  |  |  |  |  |  | }, | 
| 69 |  |  |  |  |  |  | printim  => { | 
| 70 |  |  |  |  |  |  | Name => 'LastPrinted', | 
| 71 |  |  |  |  |  |  | Format => 'date', | 
| 72 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 73 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 74 |  |  |  |  |  |  | }, | 
| 75 |  |  |  |  |  |  | buptim   => { | 
| 76 |  |  |  |  |  |  | Name => 'BackupTime', | 
| 77 |  |  |  |  |  |  | Format => 'date', | 
| 78 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 79 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 80 |  |  |  |  |  |  | }, | 
| 81 |  |  |  |  |  |  | edmins   => { | 
| 82 |  |  |  |  |  |  | Name => 'TotalEditTime', # in minutes | 
| 83 |  |  |  |  |  |  | PrintConv => 'ConvertTimeSpan($val, 60)', | 
| 84 |  |  |  |  |  |  | }, | 
| 85 |  |  |  |  |  |  | nofpages => { Name => 'Pages' }, | 
| 86 |  |  |  |  |  |  | nofwords => { Name => 'Words' }, | 
| 87 |  |  |  |  |  |  | nofchars => { Name => 'Characters' }, | 
| 88 |  |  |  |  |  |  | nofcharsws=>{ | 
| 89 |  |  |  |  |  |  | Name => 'CharactersWithSpaces', | 
| 90 |  |  |  |  |  |  | Notes => q{ | 
| 91 |  |  |  |  |  |  | according to the 2007 Microsoft RTF specification this is clearly the number | 
| 92 |  |  |  |  |  |  | of characters NOT including spaces, but Microsoft Word writes this as the | 
| 93 |  |  |  |  |  |  | number WITH spaces, so ExifTool names this tag according to the de facto | 
| 94 |  |  |  |  |  |  | standard | 
| 95 |  |  |  |  |  |  | }, | 
| 96 |  |  |  |  |  |  | }, | 
| 97 |  |  |  |  |  |  | id       => { Name => 'InternalIDNumber' }, | 
| 98 |  |  |  |  |  |  | version  => { Name => 'RevisionNumber' }, | 
| 99 |  |  |  |  |  |  | vern     => { Name => 'InternalVersionNumber' }, | 
| 100 |  |  |  |  |  |  | ); | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | # lookup for user-defined properties | 
| 103 |  |  |  |  |  |  | # (none are pre-defined and this table doesn't appear in the docs) | 
| 104 |  |  |  |  |  |  | %Image::ExifTool::RTF::UserProps = ( | 
| 105 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 106 |  |  |  |  |  |  | ); | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 109 |  |  |  |  |  |  | # Read to nested closing curly bracket "}" | 
| 110 |  |  |  |  |  |  | # Inputs: 0) data ref, 1) optional RAF ref to read more data if available | 
| 111 |  |  |  |  |  |  | # Returns: text inside brackets, or undef on error | 
| 112 |  |  |  |  |  |  | # Notes: On entry the current position in the data must be set to immediately | 
| 113 |  |  |  |  |  |  | #        after the command that opens the bracket.  On return the current | 
| 114 |  |  |  |  |  |  | #        position is immediately following the closing brace if the return | 
| 115 |  |  |  |  |  |  | #        value is defined. | 
| 116 |  |  |  |  |  |  | sub ReadToNested($;$) | 
| 117 |  |  |  |  |  |  | { | 
| 118 | 14 |  |  | 14 | 0 | 28 | my ($dataPt, $raf) = @_; | 
| 119 | 14 |  |  |  |  | 25 | my $pos = pos $$dataPt; | 
| 120 | 14 |  |  |  |  | 18 | my $level = 1; | 
| 121 | 14 |  |  |  |  | 22 | for (;;) { | 
| 122 |  |  |  |  |  |  | # look for the next bracket | 
| 123 | 46 | 50 |  |  |  | 142 | unless ($$dataPt =~ /(\\*)([{}])/g) { | 
| 124 |  |  |  |  |  |  | # must read some more data | 
| 125 | 0 |  |  |  |  | 0 | my $p = length $$dataPt; | 
| 126 | 0 |  |  |  |  | 0 | my $buff; | 
| 127 | 0 | 0 | 0 |  |  | 0 | last unless $raf and $raf->Read($buff, 65536); | 
| 128 | 0 |  |  |  |  | 0 | $$dataPt .= $buff; | 
| 129 |  |  |  |  |  |  | # rewind position to include any leading backslashes | 
| 130 | 0 |  | 0 |  |  | 0 | --$p while $p and substr($$dataPt, $p - 1, 1) eq '\\'; | 
| 131 | 0 |  |  |  |  | 0 | pos($$dataPt) = $p; # set position to continue search | 
| 132 | 0 |  |  |  |  | 0 | next; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  | # bracket is escaped if preceded by an odd number of backslashes | 
| 135 | 46 | 100 | 100 |  |  | 123 | next if $1 and length($1) & 0x01; | 
| 136 | 42 | 100 |  |  |  | 85 | $2 eq '{' and ++$level, next; | 
| 137 | 28 | 100 |  |  |  | 54 | next unless --$level <= 0; | 
| 138 | 14 |  |  |  |  | 53 | return substr($$dataPt, $pos, pos($$dataPt) - $pos - 1); | 
| 139 |  |  |  |  |  |  | } | 
| 140 | 0 |  |  |  |  | 0 | return undef; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 144 |  |  |  |  |  |  | # Unescape RTF escape sequences | 
| 145 |  |  |  |  |  |  | # Inputs: 0) ExifTool ref, 1) RTF text, 2) RTF character set (for hex characters) | 
| 146 |  |  |  |  |  |  | # Returns: Unescaped text (in current ExifTool Charset) | 
| 147 |  |  |  |  |  |  | sub UnescapeRTF($$$) | 
| 148 |  |  |  |  |  |  | { | 
| 149 | 11 |  |  | 11 | 0 | 24 | my ($et, $val, $charset) = @_; | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | # return now unless we have a control sequence | 
| 152 | 11 | 100 |  |  |  | 29 | unless ($val =~ /\\/) { | 
| 153 | 5 |  |  |  |  | 11 | $val =~ tr/\n\r//d; # ignore CR's and LF's | 
| 154 | 5 |  |  |  |  | 12 | return $val; | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  | # CR/LF is significant if it terminates a control sequence (so change these to a space) | 
| 157 |  |  |  |  |  |  | # (was $val =~ s/(^|[^\\])((?:\\\\)*)(\\[a-zA-Z]+(?:-?\d+)?)[\n\r]/$1$2$3 /g;) | 
| 158 | 6 | 100 |  |  |  | 30 | $val =~ s/\\(?:([a-zA-Z]+(?:-?\d+)?)[\n\r]|(.))/'\\'.($1 ? "$1 " : $2)/sge; | 
|  | 35 |  |  |  |  | 148 |  | 
| 159 |  |  |  |  |  |  | # protect the newline control sequence by converting to a \par command | 
| 160 |  |  |  |  |  |  | # (was $val =~ s/(^|[^\\])((?:\\\\)*)(\\[\n\r])/$1$2\\par /g;) | 
| 161 | 6 | 100 |  |  |  | 28 | $val =~ s/(\\[\n\r])|(\\.)/$2 || '\\par '/sge; | 
|  | 35 |  |  |  |  | 158 |  | 
| 162 |  |  |  |  |  |  | # all other CR/LF's are ignored (so delete them) | 
| 163 | 6 |  |  |  |  | 18 | $val =~ tr/\n\r//d; | 
| 164 |  |  |  |  |  |  |  | 
| 165 | 6 |  |  |  |  | 9 | my $rtnVal = ''; | 
| 166 | 6 |  |  |  |  | 11 | my $len = length $val; | 
| 167 | 6 |  |  |  |  | 8 | my $skip = 1;   # default Unicode skip count | 
| 168 | 6 |  |  |  |  | 9 | my $p0 = 0; | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 6 |  |  |  |  | 8 | for (;;) { | 
| 171 |  |  |  |  |  |  | # find next backslash | 
| 172 | 40 | 100 |  |  |  | 109 | my $p1 = ($val =~ /\\/g) ? pos($val) : $len + 1; | 
| 173 |  |  |  |  |  |  | # add text up to start of this control sequence (or up to end) | 
| 174 | 40 |  |  |  |  | 59 | my $n = $p1 - $p0 - 1; | 
| 175 | 40 | 100 |  |  |  | 87 | $rtnVal .= substr($val, $p0, $n) if $n > 0; | 
| 176 |  |  |  |  |  |  | # all done if at the end or if control sequence is empty | 
| 177 | 40 | 100 |  |  |  | 75 | last if $p1 >= $len; | 
| 178 |  |  |  |  |  |  | # look for an ASCII-letter control word or Unicode control | 
| 179 | 34 | 100 |  |  |  | 102 | if ($val =~ /\G([a-zA-Z]+)(-?\d+)? ?/g) { | 
| 180 |  |  |  |  |  |  | # interpret command if recognized | 
| 181 | 25 | 100 |  |  |  | 75 | if ($1 eq 'uc') {       # \ucN | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 182 | 2 |  |  |  |  | 4 | $skip = $2; | 
| 183 |  |  |  |  |  |  | } elsif ($1 eq 'u') {   # \uN | 
| 184 | 20 | 50 |  |  |  | 45 | if ($2 < 0) { | 
| 185 | 0 |  |  |  |  | 0 | $et->WarnOnce('Invalid Unicode character(s) in text'); | 
| 186 | 0 |  |  |  |  | 0 | $rtnVal .= '?'; | 
| 187 |  |  |  |  |  |  | } else { | 
| 188 | 20 |  |  |  |  | 65 | require Image::ExifTool::Charset; | 
| 189 | 20 |  |  |  |  | 54 | $rtnVal .= Image::ExifTool::Charset::Recompose($et, [$2]); | 
| 190 | 20 | 100 |  |  |  | 45 | if ($skip) { | 
| 191 |  |  |  |  |  |  | # must skip the specified number of characters | 
| 192 |  |  |  |  |  |  | # (not simple because RTF control words count as a single character) | 
| 193 | 1 | 50 |  |  |  | 53 | last unless $val =~ /\G([^\\]|\\([a-zA-Z]+)(-?\d+)? ?|\\'.{2}|\\.){$skip}/g; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  | } elsif ($rtfEntity{$1}) { | 
| 197 | 3 |  |  |  |  | 12 | require Image::ExifTool::Charset; | 
| 198 | 3 |  |  |  |  | 13 | $rtnVal .= Image::ExifTool::Charset::Recompose($et, [$rtfEntity{$1}]); | 
| 199 |  |  |  |  |  |  | } # (else ignore the command) | 
| 200 |  |  |  |  |  |  | } else { | 
| 201 | 9 |  |  |  |  | 20 | my $ch = substr($val, $p1, 1); | 
| 202 | 9 | 100 |  |  |  | 19 | if ($ch eq "'") { | 
| 203 |  |  |  |  |  |  | # hex character code | 
| 204 | 5 | 50 |  |  |  | 12 | last if $p1 + 3 > $len; | 
| 205 | 5 |  |  |  |  | 9 | my $hex = substr($val, $p1 + 1, 2); | 
| 206 | 5 | 50 |  |  |  | 18 | if ($hex =~ /^[0-9a-fA-F]{2}$/) { | 
| 207 | 5 |  |  |  |  | 589 | require Image::ExifTool::Charset; | 
| 208 | 5 |  |  |  |  | 30 | $rtnVal .= $et->Decode(chr(hex($hex)), $charset); | 
| 209 |  |  |  |  |  |  | } | 
| 210 | 5 |  |  |  |  | 15 | pos($val) = $p1 + 3;    # skip to after the hex code | 
| 211 |  |  |  |  |  |  | } else { | 
| 212 |  |  |  |  |  |  | # assume a standard control symbol (\, {, }, etc) | 
| 213 |  |  |  |  |  |  | # (note, this may not be valid for some uncommon | 
| 214 |  |  |  |  |  |  | #  control symbols like \~ for non-breaking space) | 
| 215 | 4 |  |  |  |  | 6 | $rtnVal .= $ch; | 
| 216 | 4 |  |  |  |  | 10 | pos($val) = $p1 + 1;    # skip to after this character | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | } | 
| 219 | 34 |  |  |  |  | 61 | $p0 = pos($val); | 
| 220 |  |  |  |  |  |  | } | 
| 221 | 6 |  |  |  |  | 13 | return $rtnVal; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 225 |  |  |  |  |  |  | # Read information in a RTF document | 
| 226 |  |  |  |  |  |  | # Inputs: 0) ExifTool ref, 1) dirInfo ref | 
| 227 |  |  |  |  |  |  | # Returns: 1 on success, 0 if this wasn't a valid RTF file | 
| 228 |  |  |  |  |  |  | sub ProcessRTF($$) | 
| 229 |  |  |  |  |  |  | { | 
| 230 | 1 |  |  | 1 | 0 | 3 | my ($et, $dirInfo) = @_; | 
| 231 | 1 |  |  |  |  | 4 | my $raf = $$dirInfo{RAF}; | 
| 232 | 1 |  |  |  |  | 3 | my ($buff, $buf2, $cs); | 
| 233 |  |  |  |  |  |  |  | 
| 234 | 1 | 50 | 33 |  |  | 4 | return 0 unless $raf->Read($buff, 64) and $raf->Seek(0,0); | 
| 235 | 1 | 50 |  |  |  | 12 | return 0 unless $buff =~ /^[\n\r]*\{[\n\r]*\\rtf[^a-zA-Z]/; | 
| 236 | 1 |  |  |  |  | 8 | $et->SetFileType(); | 
| 237 |  |  |  |  |  |  | # | 
| 238 |  |  |  |  |  |  | # determine the RTF character set | 
| 239 |  |  |  |  |  |  | # | 
| 240 | 1 | 50 |  |  |  | 8 | if ($buff=~ /\\ansicpg(\d*)/) { | 
|  |  | 0 |  |  |  |  |  | 
| 241 | 1 |  |  |  |  | 4 | $cs = "cp$1"; | 
| 242 |  |  |  |  |  |  | } elsif ($buff=~ /\\(ansi|mac|pc|pca)[^a-zA-Z]/) { | 
| 243 | 0 |  |  |  |  | 0 | my %trans = ( | 
| 244 |  |  |  |  |  |  | ansi => 'Latin', | 
| 245 |  |  |  |  |  |  | mac  => 'MacRoman', | 
| 246 |  |  |  |  |  |  | pc   => 'cp437', | 
| 247 |  |  |  |  |  |  | pca  => 'cp850', | 
| 248 |  |  |  |  |  |  | ); | 
| 249 | 0 |  |  |  |  | 0 | $cs = $trans{$1}; | 
| 250 |  |  |  |  |  |  | } else { | 
| 251 | 0 |  |  |  |  | 0 | $et->Warn('Unspecified RTF encoding. Will assume Latin'); | 
| 252 | 0 |  |  |  |  | 0 | $cs = 'Latin'; | 
| 253 |  |  |  |  |  |  | } | 
| 254 | 1 |  |  |  |  | 5 | my $charset = $Image::ExifTool::charsetName{lc $cs}; | 
| 255 | 1 | 50 |  |  |  | 4 | unless ($charset) { | 
| 256 | 0 |  |  |  |  | 0 | $et->Warn("Unsupported RTF encoding $cs. Will assume Latin."); | 
| 257 | 0 |  |  |  |  | 0 | $charset = 'Latin'; | 
| 258 |  |  |  |  |  |  | } | 
| 259 | 1 |  |  |  |  | 4 | my $tagTablePtr = GetTagTable('Image::ExifTool::RTF::Main'); | 
| 260 | 1 |  |  |  |  | 4 | undef $buff; | 
| 261 |  |  |  |  |  |  | # | 
| 262 |  |  |  |  |  |  | # scan for \info group | 
| 263 |  |  |  |  |  |  | # | 
| 264 | 1 |  |  |  |  | 2 | for (;;) { | 
| 265 | 2 | 100 |  |  |  | 16 | $raf->Read($buf2, 65536) or last; | 
| 266 | 1 | 50 |  |  |  | 9 | if (defined $buff) { | 
| 267 |  |  |  |  |  |  | # read more but leave some overlap for the match | 
| 268 | 0 |  |  |  |  | 0 | $buff = substr($buff, -16) . $buf2; | 
| 269 |  |  |  |  |  |  | } else { | 
| 270 | 1 |  |  |  |  | 4 | $buff = $buf2; | 
| 271 |  |  |  |  |  |  | } | 
| 272 | 1 | 50 |  |  |  | 17 | next unless $buff =~ /[^\\]\{[\n\r]*\\info([^a-zA-Z])/g; | 
| 273 |  |  |  |  |  |  | # anything but a space is included in the contents | 
| 274 | 1 | 50 |  |  |  | 8 | pos($buff) = pos($buff) - 1 if $1 ne ' '; | 
| 275 | 1 |  |  |  |  | 7 | my $info = ReadToNested(\$buff, $raf); | 
| 276 | 1 | 50 |  |  |  | 5 | unless (defined $info) { | 
| 277 | 0 |  |  |  |  | 0 | $et->Warn('Unterminated information group'); | 
| 278 | 0 |  |  |  |  | 0 | last; | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  | # process info commands (eg. "\author", "\*\copyright"); | 
| 281 | 1 |  |  |  |  | 9 | while ($info =~ /\{[\n\r]*(\\\*[\n\r]*)?\\([a-zA-Z]+)([^a-zA-Z])/g) { | 
| 282 | 8 | 100 |  |  |  | 32 | pos($info) = pos($info) - 1 if $3 ne ' '; | 
| 283 | 8 |  |  |  |  | 16 | my $tag = $2; | 
| 284 | 8 |  |  |  |  | 18 | my $val = ReadToNested(\$info); | 
| 285 | 8 | 50 |  |  |  | 23 | last unless defined $val; | 
| 286 | 8 |  |  |  |  | 13 | my $tagInfo = $$tagTablePtr{$tag}; | 
| 287 | 8 | 100 | 66 |  |  | 37 | if ($tagInfo and $$tagInfo{Format} and $$tagInfo{Format} eq 'date') { | 
|  |  |  | 66 |  |  |  |  | 
| 288 |  |  |  |  |  |  | # parse RTF date commands | 
| 289 | 1 |  |  |  |  | 9 | my %idx = (yr=>0,mo=>1,dy=>2,hr=>3,min=>4,sec=>5); | 
| 290 | 1 |  |  |  |  | 4 | my @t = (0) x 6; | 
| 291 | 1 |  |  |  |  | 8 | while ($val =~ /\\([a-z]+)(\d+)/g) { | 
| 292 | 4 | 50 |  |  |  | 12 | next unless defined $idx{$1}; | 
| 293 | 4 |  |  |  |  | 18 | $t[$idx{$1}] = $2; | 
| 294 |  |  |  |  |  |  | } | 
| 295 | 1 |  |  |  |  | 17 | $val = sprintf("%.4d:%.2d:%.2d %.2d:%.2d:%.2d", @t); | 
| 296 |  |  |  |  |  |  | } else { | 
| 297 |  |  |  |  |  |  | # unescape RTF string value | 
| 298 | 7 |  |  |  |  | 17 | $val = UnescapeRTF($et, $val, $charset); | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  | # create tagInfo for unknown tags | 
| 301 | 8 | 50 |  |  |  | 19 | if (not $tagInfo) { | 
| 302 | 0 |  |  |  |  | 0 | AddTagToTable($tagTablePtr, $tag, { Name => ucfirst($tag) }); | 
| 303 |  |  |  |  |  |  | } | 
| 304 | 8 |  |  |  |  | 25 | $et->HandleTag($tagTablePtr, $tag, $val); | 
| 305 |  |  |  |  |  |  | } | 
| 306 |  |  |  |  |  |  | } | 
| 307 | 1 | 50 |  |  |  | 3 | return 1 unless defined $buff; | 
| 308 |  |  |  |  |  |  | # | 
| 309 |  |  |  |  |  |  | # scan for \userprops (but don't read more from file to find the start of this command) | 
| 310 |  |  |  |  |  |  | # | 
| 311 | 1 |  |  |  |  | 3 | pos($buff) = 0; | 
| 312 | 1 |  |  |  |  | 15 | while ($buff =~ /[^\\]\{[\n\r]*\\\*[\n\r]*\\userprops([^a-zA-Z])/g) { | 
| 313 |  |  |  |  |  |  | # Note: The RTF spec places brackets around each propinfo structure, | 
| 314 |  |  |  |  |  |  | # but Microsoft Word doesn't write it this way, so tolerate either. | 
| 315 | 1 | 50 |  |  |  | 9 | pos($buff) = pos($buff) - 1 if $1 ne ' '; | 
| 316 | 1 |  |  |  |  | 5 | my $props = ReadToNested(\$buff, $raf); | 
| 317 | 1 |  |  |  |  | 5 | $tagTablePtr = Image::ExifTool::GetTagTable('Image::ExifTool::RTF::UserProps'); | 
| 318 | 1 | 50 |  |  |  | 3 | unless (defined $props) { | 
| 319 | 0 |  |  |  |  | 0 | $et->Warn('Unterminated user properties'); | 
| 320 | 0 |  |  |  |  | 0 | last; | 
| 321 |  |  |  |  |  |  | } | 
| 322 |  |  |  |  |  |  | # process user properties | 
| 323 | 1 |  |  |  |  | 2 | my $tag; | 
| 324 | 1 |  |  |  |  | 8 | while ($props =~ /\{[\n\r]*(\\\*[\n\r]*)?\\([a-zA-Z]+)([^a-zA-Z])/g) { | 
| 325 | 4 | 50 |  |  |  | 13 | pos($props) = pos($props) - 1 if $3 ne ' '; | 
| 326 | 4 |  |  |  |  | 8 | my $t = $2; | 
| 327 | 4 |  |  |  |  | 10 | my $val = ReadToNested(\$props); | 
| 328 | 4 | 50 |  |  |  | 10 | last unless defined $val; | 
| 329 | 4 |  |  |  |  | 7 | $val = UnescapeRTF($et, $val, $charset); | 
| 330 | 4 | 100 | 33 |  |  | 26 | if ($t eq 'propname') { | 
|  |  | 50 |  |  |  |  |  | 
| 331 | 2 |  |  |  |  | 4 | $tag = $val; | 
| 332 | 2 |  |  |  |  | 10 | next; | 
| 333 |  |  |  |  |  |  | } elsif ($t ne 'staticval' or not defined $tag) { | 
| 334 | 0 |  |  |  |  | 0 | next;   # ignore \linkval and \proptype for now | 
| 335 |  |  |  |  |  |  | } | 
| 336 | 2 |  |  |  |  | 7 | $tag =~ s/\s(.)/\U$1/g;     # capitalize all words in tag name | 
| 337 | 2 |  |  |  |  | 4 | $tag =~ tr/-_a-zA-Z0-9//dc; # remove illegal characters | 
| 338 | 2 | 50 |  |  |  | 5 | next unless $tag; | 
| 339 |  |  |  |  |  |  | # create tagInfo for unknown tags | 
| 340 | 2 | 50 |  |  |  | 7 | unless ($$tagTablePtr{$tag}) { | 
| 341 | 2 |  |  |  |  | 19 | AddTagToTable($tagTablePtr, $tag, { Name => $tag }); | 
| 342 |  |  |  |  |  |  | } | 
| 343 | 2 |  |  |  |  | 5 | $et->HandleTag($tagTablePtr, $tag, $val); | 
| 344 |  |  |  |  |  |  | } | 
| 345 | 1 |  |  |  |  | 2 | last;   # (didn't really want to loop) | 
| 346 |  |  |  |  |  |  | } | 
| 347 | 1 |  |  |  |  | 4 | return 1; | 
| 348 |  |  |  |  |  |  | } | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  | 1;  # end | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | __END__ |