line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# <@LICENSE> |
2
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
3
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
4
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
5
|
|
|
|
|
|
|
# The ASF licenses this file to you under the Apache License, Version 2.0 |
6
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
7
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at: |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
12
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
13
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
15
|
|
|
|
|
|
|
# limitations under the License. |
16
|
|
|
|
|
|
|
# </@LICENSE> |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 NAME |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Mail::SpamAssassin::Plugin::PDFInfo - PDFInfo Plugin for SpamAssassin |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
loadplugin Mail::SpamAssassin::Plugin::PDFInfo |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 DESCRIPTION |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
This plugin helps detected spam using attached PDF files |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=over 4 |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=item See "Usage:" below - more documentation see 20_pdfinfo.cf |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Original info kept for history. For later changes see SVN repo |
35
|
|
|
|
|
|
|
------------------------------------------------------- |
36
|
|
|
|
|
|
|
PDFInfo Plugin for SpamAssassin |
37
|
|
|
|
|
|
|
Version: 0.8 |
38
|
|
|
|
|
|
|
Info: $Id: PDFInfo.pm 904 2007-08-12 01:36:23Z root $ |
39
|
|
|
|
|
|
|
Created: 2007-08-10 |
40
|
|
|
|
|
|
|
Modified: 2007-08-10 |
41
|
|
|
|
|
|
|
By: Dallas Engelken |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Changes: |
44
|
|
|
|
|
|
|
0.8 - added .fdf detection (thanks John Lundin) [axb] |
45
|
|
|
|
|
|
|
0.7 - fixed empty body/pdf count buglet(thanks Jeremy) [axb] |
46
|
|
|
|
|
|
|
0.6 - added support for tags - PDFCOUNT, PDFVERSION, PDFPRODUCER, etc. |
47
|
|
|
|
|
|
|
- fixed issue on perl 5.6.1 where pdf_match_details() failed to call |
48
|
|
|
|
|
|
|
_find_pdf_mime_parts(), resulting in no detection of pdf mime parts. |
49
|
|
|
|
|
|
|
- quoted-printable support - requires MIME::QuotedPrint (which should be in everyones |
50
|
|
|
|
|
|
|
install as a part of the MIME-Base64 package which is a SA req) |
51
|
|
|
|
|
|
|
- added simple pdf_is_empty_body() function with counts the body bytes minus the |
52
|
|
|
|
|
|
|
subject line. can add optional <bytes> param if you need to allow for a few bytes. |
53
|
|
|
|
|
|
|
0.5 - fix warns for undef $pdf_tags |
54
|
|
|
|
|
|
|
- remove { } and \ before running eval in pdf_match_details to avoid eval error |
55
|
|
|
|
|
|
|
0.4 - added pdf_is_encrypted() function |
56
|
|
|
|
|
|
|
- added option to look for image HxW on same line |
57
|
|
|
|
|
|
|
0.3 - added 2nd fuzzy md5 which uses pdf tag layout as data |
58
|
|
|
|
|
|
|
- renamed pdf_image_named() to pdf_named() |
59
|
|
|
|
|
|
|
- PDF images are encapsulated and have no names. We are matching the PDF file name. |
60
|
|
|
|
|
|
|
- renamed pdf_image_name_regex() to pdf_name_regex() |
61
|
|
|
|
|
|
|
- PDF images are encapsulated and have no names. We are matching the PDF file name. |
62
|
|
|
|
|
|
|
- changed pdf_image_count() a bit and added pdf_count(). |
63
|
|
|
|
|
|
|
- pdf_count() checks how many pdf attachments there are on the mail |
64
|
|
|
|
|
|
|
- pdf_image_count() checks how many images are found within all pdfs in the mail. |
65
|
|
|
|
|
|
|
- removed the restriction of the pdf containing an image in order to md5 it. |
66
|
|
|
|
|
|
|
- added pdf_match_details() function to check the following 'details' |
67
|
|
|
|
|
|
|
- author: Author of PDF if specified |
68
|
|
|
|
|
|
|
- producer: Software used to produce PDF |
69
|
|
|
|
|
|
|
- creator: Software used to produce PDF, usually similar to producer |
70
|
|
|
|
|
|
|
- title: Title of PDF |
71
|
|
|
|
|
|
|
- created: Creation Date |
72
|
|
|
|
|
|
|
- modified: Last Modified |
73
|
|
|
|
|
|
|
0.2 - support PDF octet-stream |
74
|
|
|
|
|
|
|
0.1 - just ported over the imageinfo code, and renamed to pdfinfo. |
75
|
|
|
|
|
|
|
- removed all support for png, gif, and jpg from the code. |
76
|
|
|
|
|
|
|
- prepended pdf_ to all function names to avoid conflicts with ImageInfo in SA 3.2. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Usage: |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
pdf_count() |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
body RULENAME eval:pdf_count(<min>,[max]) |
83
|
|
|
|
|
|
|
min: required, message contains at least x pdf mime parts |
84
|
|
|
|
|
|
|
max: optional, if specified, must not contain more than x pdf mime parts |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
pdf_image_count() |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
body RULENAME eval:pdf_image_count(<min>,[max]) |
89
|
|
|
|
|
|
|
min: required, message contains at least x images in pdf attachments. |
90
|
|
|
|
|
|
|
max: optional, if specified, must not contain more than x pdf images |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
pdf_pixel_coverage() |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
body RULENAME eval:pdf_pixel_coverage(<min>,[max]) |
95
|
|
|
|
|
|
|
min: required, message contains at least this much pixel area |
96
|
|
|
|
|
|
|
max: optional, if specified, message must not contain more than this much pixel area |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
pdf_named() |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
body RULENAME eval:pdf_named(<string>) |
101
|
|
|
|
|
|
|
string: exact file name match, if you need partial match, see pdf_name_regex() |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
pdf_name_regex() |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
body RULENAME eval:pdf_name_regex(<regex>) |
106
|
|
|
|
|
|
|
regex: regular expression, see examples in ruleset |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
pdf_match_md5() |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
body RULENAME eval:pdf_match_md5(<string>) |
111
|
|
|
|
|
|
|
string: 32-byte md5 hex |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
pdf_match_fuzzy_md5() |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
body RULENAME eval:pdf_match_md5(<string>) |
116
|
|
|
|
|
|
|
string: 32-byte md5 hex - see ruleset for obtaining the fuzzy md5 |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
pdf_match_details() |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
body RULENAME eval:pdf_match_details(<detail>,<regex>); |
121
|
|
|
|
|
|
|
detail: author, creator, created, modified, producer, title |
122
|
|
|
|
|
|
|
regex: regular expression, see examples in ruleset |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
pdf_is_encrypted() |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
body RULENAME eval:pdf_is_encrypted() |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
pdf_is_empty_body() |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
body RULENAME eval:pdf_is_empty_body(<bytes>) |
131
|
|
|
|
|
|
|
bytes: maximum byte count to allow and still consider it empty |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
NOTE: See the ruleset for more examples that are not documented here. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=back |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=cut |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# ------------------------------------------------------- |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
use Mail::SpamAssassin::Plugin; |
143
|
20
|
|
|
20
|
|
138
|
use Mail::SpamAssassin::Logger; |
|
20
|
|
|
|
|
40
|
|
|
20
|
|
|
|
|
630
|
|
144
|
20
|
|
|
20
|
|
114
|
use Mail::SpamAssassin::Util qw(compile_regexp); |
|
20
|
|
|
|
|
69
|
|
|
20
|
|
|
|
|
1324
|
|
145
|
20
|
|
|
20
|
|
139
|
use strict; |
|
20
|
|
|
|
|
35
|
|
|
20
|
|
|
|
|
879
|
|
146
|
20
|
|
|
20
|
|
124
|
use warnings; |
|
20
|
|
|
|
|
47
|
|
|
20
|
|
|
|
|
506
|
|
147
|
20
|
|
|
20
|
|
107
|
# use bytes; |
|
20
|
|
|
|
|
45
|
|
|
20
|
|
|
|
|
669
|
|
148
|
|
|
|
|
|
|
use Digest::MD5 qw(md5_hex); |
149
|
20
|
|
|
20
|
|
107
|
use MIME::QuotedPrint; |
|
20
|
|
|
|
|
39
|
|
|
20
|
|
|
|
|
1162
|
|
150
|
20
|
|
|
20
|
|
8353
|
|
|
20
|
|
|
|
|
4927
|
|
|
20
|
|
|
|
|
66099
|
|
151
|
|
|
|
|
|
|
our @ISA = qw(Mail::SpamAssassin::Plugin); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# constructor: register the eval rule |
154
|
|
|
|
|
|
|
my $class = shift; |
155
|
|
|
|
|
|
|
my $mailsaobject = shift; |
156
|
61
|
|
|
61
|
1
|
201
|
|
157
|
61
|
|
|
|
|
177
|
# some boilerplate... |
158
|
|
|
|
|
|
|
$class = ref($class) || $class; |
159
|
|
|
|
|
|
|
my $self = $class->SUPER::new($mailsaobject); |
160
|
61
|
|
33
|
|
|
350
|
bless ($self, $class); |
161
|
61
|
|
|
|
|
323
|
|
162
|
61
|
|
|
|
|
140
|
$self->register_eval_rule ("pdf_count"); |
163
|
|
|
|
|
|
|
$self->register_eval_rule ("pdf_image_count"); |
164
|
61
|
|
|
|
|
250
|
$self->register_eval_rule ("pdf_pixel_coverage"); |
165
|
61
|
|
|
|
|
209
|
$self->register_eval_rule ("pdf_image_size_exact"); |
166
|
61
|
|
|
|
|
193
|
$self->register_eval_rule ("pdf_image_size_range"); |
167
|
61
|
|
|
|
|
199
|
$self->register_eval_rule ("pdf_named"); |
168
|
61
|
|
|
|
|
204
|
$self->register_eval_rule ("pdf_name_regex"); |
169
|
61
|
|
|
|
|
204
|
$self->register_eval_rule ("pdf_image_to_text_ratio"); |
170
|
61
|
|
|
|
|
209
|
$self->register_eval_rule ("pdf_match_md5"); |
171
|
61
|
|
|
|
|
185
|
$self->register_eval_rule ("pdf_match_fuzzy_md5"); |
172
|
61
|
|
|
|
|
177
|
$self->register_eval_rule ("pdf_match_details"); |
173
|
61
|
|
|
|
|
183
|
$self->register_eval_rule ("pdf_is_encrypted"); |
174
|
61
|
|
|
|
|
182
|
$self->register_eval_rule ("pdf_is_empty_body"); |
175
|
61
|
|
|
|
|
189
|
|
176
|
61
|
|
|
|
|
171
|
return $self; |
177
|
|
|
|
|
|
|
} |
178
|
61
|
|
|
|
|
513
|
|
179
|
|
|
|
|
|
|
# ----------------------------------------- |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
my %get_details = ( |
182
|
|
|
|
|
|
|
'pdf' => sub { |
183
|
|
|
|
|
|
|
my ($self, $pms, $part) = @_; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
my $type = $part->{'type'} || 'base64'; |
186
|
|
|
|
|
|
|
my $data = ''; |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
if ($type eq 'quoted-printable') { |
189
|
|
|
|
|
|
|
$data = decode_qp($data); # use QuotedPrint->decode_qp |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
else { |
192
|
|
|
|
|
|
|
$data = $part->decode(); # just use built in base64 decoder |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
my $index = substr($data, 0, 8); |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
return unless ($index =~ /.PDF\-(\d\.\d)/); |
198
|
|
|
|
|
|
|
my $version = $1; |
199
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFVERSION', $version); |
200
|
|
|
|
|
|
|
# dbg("pdfinfo: pdf version = $version"); |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
my ($height, $width, $fuzzy_data, $pdf_tags); |
203
|
|
|
|
|
|
|
my ($producer, $created, $modified, $title, $creator, $author) = ('unknown','0','0','untitled','unknown','unknown'); |
204
|
|
|
|
|
|
|
my ($md5, $fuzzy_md5) = ('', ''); |
205
|
|
|
|
|
|
|
my ($total_height, $total_width, $total_area, $line_count) = (0,0,0,0); |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
my $name = $part->{'name'} || ''; |
208
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFNAME', $name); |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
my $no_more_fuzzy = 0; |
211
|
|
|
|
|
|
|
my $got_image = 0; |
212
|
|
|
|
|
|
|
my $encrypted = 0; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
while($data =~ /([^\n]+)/g) { |
215
|
|
|
|
|
|
|
# dbg("pdfinfo: line=$1"); |
216
|
|
|
|
|
|
|
my $line = $1; |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
$line_count++; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# lines containing high bytes will have no data we need, so save some cycles |
221
|
|
|
|
|
|
|
next if ($line =~ /[\x80-\xff]/); |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
if (!$no_more_fuzzy && $line_count < 70) { |
224
|
|
|
|
|
|
|
if ($line !~ m/^\%/ && $line !~ m/^\/(?:Height|Width|(?:(?:Media|Crop)Box))/ && $line !~ m/^\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+\d+\s+cm$/) { |
225
|
|
|
|
|
|
|
$line =~ s/\s+$//; # strip off whitespace at end. |
226
|
|
|
|
|
|
|
$fuzzy_data .= $line; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
if ($line =~ m/^\/([A-Za-z]+)/) { |
231
|
|
|
|
|
|
|
$pdf_tags .= $1; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
$got_image=1 if ($line =~ m/\/Image/); |
235
|
|
|
|
|
|
|
$encrypted=1 if ($line =~ m/^\/Encrypt/); |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# once we hit the first stream, we stop collecting data for fuzzy md5 |
238
|
|
|
|
|
|
|
$no_more_fuzzy = 1 if ($line =~ m/stream/); |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# From a v1.3 pdf |
241
|
|
|
|
|
|
|
# [12234] dbg: pdfinfo: line=630 0 0 149 0 0 cm |
242
|
|
|
|
|
|
|
# [12234] dbg: pdfinfo: line=/Width 630 |
243
|
|
|
|
|
|
|
# [12234] dbg: pdfinfo: line=/Height 149 |
244
|
|
|
|
|
|
|
if ($got_image) { |
245
|
|
|
|
|
|
|
if ($line =~ /^(\d+)\s+\d+\s+\d+\s+(\d+)\s+\d+\s+\d+\s+cm$/) { |
246
|
|
|
|
|
|
|
$width = $1; |
247
|
|
|
|
|
|
|
$height = $2; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
elsif ($line =~ /^\/Width\s(\d+)/) { |
250
|
|
|
|
|
|
|
$width = $1; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
elsif ($line =~ /^\/Height\s(\d+)/) { |
253
|
|
|
|
|
|
|
$height = $1; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
elsif ($line =~ m/\/Width\s(\d+)\/Height\s(\d+)/) { |
256
|
|
|
|
|
|
|
$width = $1; |
257
|
|
|
|
|
|
|
$height = $2; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# did pdf contain image data? |
262
|
|
|
|
|
|
|
if ($got_image && $width && $height) { |
263
|
|
|
|
|
|
|
$no_more_fuzzy = 1; |
264
|
|
|
|
|
|
|
my $area = $width * $height; |
265
|
|
|
|
|
|
|
$total_height += $height; |
266
|
|
|
|
|
|
|
$total_width += $width; |
267
|
|
|
|
|
|
|
$total_area += $area; |
268
|
|
|
|
|
|
|
$pms->{pdfinfo}->{dems_pdf}->{"${height}x${width}"} = 1; |
269
|
|
|
|
|
|
|
$pms->{'pdfinfo'}->{"count_pdf_images"} ++; |
270
|
|
|
|
|
|
|
dbg("pdfinfo: Found image in PDF ".($name ? $name : '')." - $height x $width pixels ($area pixels sq.)"); |
271
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFIMGDIM', "${height}x${width}"); |
272
|
|
|
|
|
|
|
$height=0; $width=0; # reset and check for next image |
273
|
|
|
|
|
|
|
$got_image = 0; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# [5310] dbg: pdfinfo: line=<</Producer(GPL Ghostscript 8.15) |
277
|
|
|
|
|
|
|
# [5310] dbg: pdfinfo: line=/CreationDate(D:20070703144220) |
278
|
|
|
|
|
|
|
# [5310] dbg: pdfinfo: line=/ModDate(D:20070703144220) |
279
|
|
|
|
|
|
|
# [5310] dbg: pdfinfo: line=/Title(Microsoft Word - Document1) |
280
|
|
|
|
|
|
|
# [5310] dbg: pdfinfo: line=/Creator(PScript5.dll Version 5.2) |
281
|
|
|
|
|
|
|
# [5310] dbg: pdfinfo: line=/Author(colet)>>endobj |
282
|
|
|
|
|
|
|
# or all on same line inside xml - v1.6+ |
283
|
|
|
|
|
|
|
# <</CreationDate(D:20070226165054-06'00')/Creator( Adobe Photoshop CS2 Windows)/Producer(Adobe Photoshop for Windows -- Image Conversion Plug-in)/ModDate(D:20070226165100-06'00')>> |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
if ($line =~ /\/Producer\s?\(([^\)\\]+)/) { |
286
|
|
|
|
|
|
|
$producer = $1; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
if ($line =~ /\/CreationDate\s?\(D\:(\d+)/) { |
289
|
|
|
|
|
|
|
$created = $1; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
if ($line =~ /\/ModDate\s?\(D\:(\d+)/) { |
292
|
|
|
|
|
|
|
$modified = $1; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
if ($line =~ /\/Title\s?\(([^\)\\]+)/) { |
295
|
|
|
|
|
|
|
$title = $1; |
296
|
|
|
|
|
|
|
# Title=\376\377\000w\000w\000n\000g |
297
|
|
|
|
|
|
|
# Title=wwng |
298
|
|
|
|
|
|
|
$title =~ s/\\\d{3}//g; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
if ($line =~ /\/Creator\s?\(([^\)\\]+)/) { |
301
|
|
|
|
|
|
|
$creator = $1; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
if ($line =~ /\/Author\s?\(([^\)]+)/) { |
304
|
|
|
|
|
|
|
$author = $1; |
305
|
|
|
|
|
|
|
# Author=\376\377\000H\000P\000_\000A\000d\000m\000i\000n\000i\000s\000t\000r\000a\000t\000o\000r |
306
|
|
|
|
|
|
|
# Author=HP_Administrator |
307
|
|
|
|
|
|
|
$author =~ s/\\\d{3}//g; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
# store the file name so we can check pdf_named() or pdf_name_match() later. |
312
|
|
|
|
|
|
|
$pms->{pdfinfo}->{names_pdf}->{$name} = 1 if $name; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# store encrypted flag. |
315
|
|
|
|
|
|
|
$pms->{pdfinfo}->{encrypted} = $encrypted; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# if we had multiple images in the pdf, we need to store the total HxW as well. |
318
|
|
|
|
|
|
|
# If it was a single Image PDF, then this value will already be in the hash. |
319
|
|
|
|
|
|
|
$pms->{pdfinfo}->{dems_pdf}->{"${total_height}x${total_width}"} = 1 if ($total_height && $total_width);; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
if ($total_area) { |
322
|
|
|
|
|
|
|
$pms->{pdfinfo}->{pc_pdf} = $total_area; |
323
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFIMGAREA', $total_area); |
324
|
|
|
|
|
|
|
dbg("pdfinfo: Filename=$name Total HxW: $total_height x $total_width ($total_area area)") if ($total_area); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
dbg("pdfinfo: Filename=$name Title=$title Author=$author Producer=$producer Created=$created Modified=$modified"); |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
$md5 = uc(md5_hex($data)) if $data; |
330
|
|
|
|
|
|
|
$fuzzy_md5 = uc(md5_hex($fuzzy_data)) if $fuzzy_data; |
331
|
|
|
|
|
|
|
my $tags_md5; |
332
|
|
|
|
|
|
|
$tags_md5 = uc(md5_hex($pdf_tags)) if $pdf_tags; |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
dbg("pdfinfo: MD5 results for ".($name ? $name : '')." - md5=".($md5 ? $md5 : '')." fuzzy1=".($fuzzy_md5 ? $fuzzy_md5 : '')." fuzzy2=".($tags_md5 ? $tags_md5 : '')); |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
# we dont need tags for these. |
337
|
|
|
|
|
|
|
$pms->{pdfinfo}->{details}->{created} = $created if $created; |
338
|
|
|
|
|
|
|
$pms->{pdfinfo}->{details}->{modified} = $modified if $modified; |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
if ($producer) { |
341
|
|
|
|
|
|
|
$pms->{pdfinfo}->{details}->{producer} = $producer if $producer; |
342
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFPRODUCER', $producer); |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
if ($title) { |
345
|
|
|
|
|
|
|
$pms->{pdfinfo}->{details}->{title} = $title; |
346
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFTITLE', $title); |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
if ($creator) { |
349
|
|
|
|
|
|
|
$pms->{pdfinfo}->{details}->{creator} = $creator; |
350
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFCREATOR', $creator); |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
if ($author) { |
353
|
|
|
|
|
|
|
$pms->{pdfinfo}->{details}->{author} = $author; |
354
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFAUTHOR', $author); |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
if ($md5) { |
357
|
|
|
|
|
|
|
$pms->{pdfinfo}->{md5}->{$md5} = 1; |
358
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFMD5', $fuzzy_md5); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
if ($fuzzy_md5) { |
361
|
|
|
|
|
|
|
$pms->{pdfinfo}->{fuzzy_md5}->{$fuzzy_md5} = 1; |
362
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFMD5FUZZY1', $fuzzy_md5); |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
if ($tags_md5) { |
365
|
|
|
|
|
|
|
$pms->{pdfinfo}->{fuzzy_md5}->{$tags_md5} = 1; |
366
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFMD5FUZZY2', $tags_md5); |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
}, |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
); |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# ---------------------------------------- |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
my ($self, $pms, $tag, $value) = @_; |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
dbg("pdfinfo: set_tag called for $tag $value"); |
378
|
0
|
|
|
0
|
|
|
return unless ($tag && $value); |
379
|
|
|
|
|
|
|
|
380
|
0
|
|
|
|
|
|
if (exists $pms->{tag_data}->{$tag}) { |
381
|
0
|
0
|
0
|
|
|
|
$pms->{tag_data}->{$tag} .= " $value"; # append value |
382
|
|
|
|
|
|
|
} |
383
|
0
|
0
|
|
|
|
|
else { |
384
|
0
|
|
|
|
|
|
$pms->{tag_data}->{$tag} = $value; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
} |
387
|
0
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# ---------------------------------------- |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
my ($self,$pms) = @_; |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# bail early if message does not have pdf parts |
393
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
394
|
0
|
|
|
0
|
|
|
|
395
|
|
|
|
|
|
|
# initialize |
396
|
|
|
|
|
|
|
$pms->{'pdfinfo'}->{"pc_pdf"} = 0; |
397
|
0
|
0
|
|
|
|
|
$pms->{'pdfinfo'}->{"count_pdf"} = 0; |
398
|
|
|
|
|
|
|
$pms->{'pdfinfo'}->{"count_pdf_images"} = 0; |
399
|
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
|
my @parts = $pms->{msg}->find_parts(qr@^(image|application)/(pdf|octet\-stream)$@, 1); |
401
|
0
|
|
|
|
|
|
my $part_count = scalar @parts; |
402
|
0
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
dbg("pdfinfo: Identified $part_count possible mime parts that need checked for PDF content"); |
404
|
0
|
|
|
|
|
|
|
405
|
0
|
|
|
|
|
|
# cache this so we can easily bail |
406
|
|
|
|
|
|
|
$pms->{'pdfinfo'}->{'no_parts'} = 1 unless $part_count; |
407
|
0
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
foreach my $p (@parts) { |
409
|
|
|
|
|
|
|
my $type = $p->{'type'} =~ m@/([\w\-]+)$@; |
410
|
0
|
0
|
|
|
|
|
my $name = $p->{'name'} || ''; |
411
|
|
|
|
|
|
|
|
412
|
0
|
|
|
|
|
|
my $cte = lc( $p->get_header('content-transfer-encoding') || '' ); |
413
|
0
|
|
|
|
|
|
|
414
|
0
|
|
0
|
|
|
|
dbg("pdfinfo: found part, type=".($type ? $type : '')." file=".($name ? $name : '')." cte=".($cte ? $cte : '').""); |
415
|
|
|
|
|
|
|
|
416
|
0
|
|
0
|
|
|
|
# make sure its a cte we support |
417
|
|
|
|
|
|
|
next unless ($cte =~ /^(?:base64|quoted\-printable)$/); |
418
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# filename must end with .pdf, or application type can be pdf |
420
|
|
|
|
|
|
|
# sometimes windows muas will wrap a pdf up inside a .dat file |
421
|
0
|
0
|
|
|
|
|
# v0.8 - Added .fdf phoney PDF detection |
422
|
|
|
|
|
|
|
next unless ($name =~ /\.[fp]df$/ || $type eq 'pdf'); |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# if we get this far, make sure type is pdf for sure (not octet-stream or anything else) |
425
|
|
|
|
|
|
|
$type='pdf'; |
426
|
0
|
0
|
0
|
|
|
|
|
427
|
|
|
|
|
|
|
if ($type && exists $get_details{$type}) { |
428
|
|
|
|
|
|
|
$get_details{$type}->($self, $pms, $p); |
429
|
0
|
|
|
|
|
|
$pms->{'pdfinfo'}->{"count_$type"} ++; |
430
|
|
|
|
|
|
|
} |
431
|
0
|
0
|
0
|
|
|
|
} |
432
|
0
|
|
|
|
|
|
|
433
|
0
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFCOUNT', $pms->{'pdfinfo'}->{"count_pdf"}); |
434
|
|
|
|
|
|
|
$self->_set_tag($pms, 'PDFIMGCOUNT', $pms->{'pdfinfo'}->{"count_pdf_images"}); |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
} |
437
|
0
|
|
|
|
|
|
|
438
|
0
|
|
|
|
|
|
# ---------------------------------------- |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
my ($self,$pms,$body,$name) = @_; |
441
|
|
|
|
|
|
|
return unless (defined $name); |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
# make sure we have image data read in. |
444
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
445
|
0
|
|
|
0
|
0
|
|
$self->_find_pdf_mime_parts($pms); |
446
|
0
|
0
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
449
|
0
|
0
|
|
|
|
|
|
450
|
0
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"names_pdf"}); |
451
|
|
|
|
|
|
|
return 1 if (exists $pms->{'pdfinfo'}->{"names_pdf"}->{$name}); |
452
|
|
|
|
|
|
|
return 0; |
453
|
0
|
0
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
0
|
0
|
|
|
|
|
# ----------------------------------------- |
456
|
0
|
0
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
my ($self,$pms,$body,$re) = @_; |
458
|
|
|
|
|
|
|
return unless (defined $re); |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# make sure we have image data read in. |
461
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
462
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
463
|
0
|
|
|
0
|
0
|
|
} |
464
|
0
|
0
|
|
|
|
|
|
465
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
466
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"names_pdf"}); |
467
|
0
|
0
|
|
|
|
|
|
468
|
0
|
|
|
|
|
|
my ($rec, $err) = compile_regexp($re, 2); |
469
|
|
|
|
|
|
|
if (!$rec) { |
470
|
|
|
|
|
|
|
info("pdfinfo: invalid regexp '$re': $err"); |
471
|
0
|
0
|
|
|
|
|
return 0; |
472
|
0
|
0
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
0
|
|
|
|
|
|
my $hit = 0; |
475
|
0
|
0
|
|
|
|
|
foreach my $name (keys %{$pms->{'pdfinfo'}->{"names_pdf"}}) { |
476
|
0
|
|
|
|
|
|
if ($name =~ $rec) { |
477
|
0
|
|
|
|
|
|
dbg("pdfinfo: pdf_name_regex hit on $name"); |
478
|
|
|
|
|
|
|
return 1; |
479
|
|
|
|
|
|
|
} |
480
|
0
|
|
|
|
|
|
} |
481
|
0
|
|
|
|
|
|
return 0; |
|
0
|
|
|
|
|
|
|
482
|
0
|
0
|
|
|
|
|
|
483
|
0
|
|
|
|
|
|
} |
484
|
0
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# ----------------------------------------- |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
|
my ($self,$pms,$body) = @_; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# make sure we have image data read in. |
490
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
491
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
|
494
|
0
|
|
|
0
|
0
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
495
|
|
|
|
|
|
|
return $pms->{'pdfinfo'}->{'encrypted'}; |
496
|
|
|
|
|
|
|
} |
497
|
0
|
0
|
|
|
|
|
|
498
|
0
|
|
|
|
|
|
# ----------------------------------------- |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
my ($self,$pms,$body,$min,$max) = @_; |
501
|
0
|
0
|
|
|
|
|
return unless defined $min; |
502
|
0
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# make sure we have image data read in. |
504
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
505
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
0
|
|
|
0
|
0
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
509
|
0
|
0
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"count_pdf"}); |
510
|
|
|
|
|
|
|
return result_check($min, $max, $pms->{'pdfinfo'}->{"count_pdf"}); |
511
|
|
|
|
|
|
|
|
512
|
0
|
0
|
|
|
|
|
} |
513
|
0
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
# ----------------------------------------- |
515
|
|
|
|
|
|
|
|
516
|
0
|
0
|
|
|
|
|
my ($self,$pms,$body,$min,$max) = @_; |
517
|
0
|
0
|
|
|
|
|
return unless defined $min; |
518
|
0
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# make sure we have image data read in. |
520
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
521
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
525
|
0
|
|
|
0
|
0
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"count_pdf_images"}); |
526
|
0
|
0
|
|
|
|
|
return result_check($min, $max, $pms->{'pdfinfo'}->{"count_pdf_images"}); |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
} |
529
|
0
|
0
|
|
|
|
|
|
530
|
0
|
|
|
|
|
|
# ----------------------------------------- |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
my ($self,$pms,$body,$min,$max) = @_; |
533
|
0
|
0
|
|
|
|
|
return unless (defined $min); |
534
|
0
|
0
|
|
|
|
|
|
535
|
0
|
|
|
|
|
|
# make sure we have image data read in. |
536
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
537
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
541
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"pc_pdf"}); |
542
|
0
|
|
|
0
|
0
|
|
|
543
|
0
|
0
|
|
|
|
|
# dbg("pdfinfo: pc_$type: $min, ".($max ? $max:'').", $type, ".$pms->{'pdfinfo'}->{"pc_pdf"}); |
544
|
|
|
|
|
|
|
return result_check($min, $max, $pms->{'pdfinfo'}->{"pc_pdf"}); |
545
|
|
|
|
|
|
|
} |
546
|
0
|
0
|
|
|
|
|
|
547
|
0
|
|
|
|
|
|
# ----------------------------------------- |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
my ($self,$pms,$body,$min,$max) = @_; |
550
|
0
|
0
|
|
|
|
|
return unless (defined $min && defined $max); |
551
|
0
|
0
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# make sure we have image data read in. |
553
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
554
|
0
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
558
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"pc_pdf"}); |
559
|
|
|
|
|
|
|
|
560
|
0
|
|
|
0
|
0
|
|
# depending on how you call this eval (body vs rawbody), |
561
|
0
|
0
|
0
|
|
|
|
# the $textlen will differ. |
562
|
|
|
|
|
|
|
my $textlen = length(join('',@$body)); |
563
|
|
|
|
|
|
|
|
564
|
0
|
0
|
|
|
|
|
return 0 unless ( $textlen > 0 && exists $pms->{'pdfinfo'}->{"pc_pdf"} && $pms->{'pdfinfo'}->{"pc_pdf"} > 0); |
565
|
0
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
my $ratio = $textlen / $pms->{'pdfinfo'}->{"pc_pdf"}; |
567
|
|
|
|
|
|
|
dbg("pdfinfo: image ratio=$ratio, min=$min max=$max"); |
568
|
0
|
0
|
|
|
|
|
return result_check($min, $max, $ratio, 1); |
569
|
0
|
0
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# ----------------------------------------- |
572
|
|
|
|
|
|
|
|
573
|
0
|
|
|
|
|
|
my ($self,$pms,$body,$min) = @_; |
574
|
|
|
|
|
|
|
|
575
|
0
|
0
|
0
|
|
|
|
$min ||= 0; # default to 0 bytes |
|
|
|
0
|
|
|
|
|
576
|
|
|
|
|
|
|
|
577
|
0
|
|
|
|
|
|
# make sure we have image data read in. |
578
|
0
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
579
|
0
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
583
|
|
|
|
|
|
|
return 0 unless $pms->{'pdfinfo'}->{"count_pdf"}; |
584
|
|
|
|
|
|
|
|
585
|
0
|
|
|
0
|
0
|
|
# check for cached result |
586
|
|
|
|
|
|
|
return 1 if $pms->{'pdfinfo'}->{"no_body_text"}; |
587
|
0
|
|
0
|
|
|
|
|
588
|
|
|
|
|
|
|
shift @$body; # shift body array removes line #1 -> subject line. |
589
|
|
|
|
|
|
|
|
590
|
0
|
0
|
|
|
|
|
my $bytes = 0; |
591
|
0
|
|
|
|
|
|
my $textlen = length(join('',@$body)); |
592
|
|
|
|
|
|
|
foreach my $line (@$body) { |
593
|
|
|
|
|
|
|
next unless ($line =~ m/\S/); |
594
|
0
|
0
|
|
|
|
|
next if ($line =~ m/^Subject/); |
595
|
0
|
0
|
|
|
|
|
$bytes += length($line); |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
0
|
0
|
|
|
|
|
dbg("pdfinfo: is_empty_body = $bytes bytes"); |
599
|
|
|
|
|
|
|
|
600
|
0
|
|
|
|
|
|
if ($bytes == 0 || ($bytes <= $min)) { |
601
|
|
|
|
|
|
|
$pms->{'pdfinfo'}->{"no_body_text"} = 1; |
602
|
0
|
|
|
|
|
|
return 1; |
603
|
0
|
|
|
|
|
|
} |
604
|
0
|
|
|
|
|
|
|
605
|
0
|
0
|
|
|
|
|
# cache it and return 0 |
606
|
0
|
0
|
|
|
|
|
$pms->{'pdfinfo'}->{"no_body_text"} = 0; |
607
|
0
|
|
|
|
|
|
return 0; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
0
|
|
|
|
|
|
# ----------------------------------------- |
611
|
|
|
|
|
|
|
|
612
|
0
|
0
|
0
|
|
|
|
my ($self,$pms,$body,$height,$width) = @_; |
613
|
0
|
|
|
|
|
|
return unless (defined $height && defined $width); |
614
|
0
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# make sure we have image data read in. |
616
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
617
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
618
|
0
|
|
|
|
|
|
} |
619
|
0
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
621
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"dems_pdf"}); |
622
|
|
|
|
|
|
|
return 1 if (exists $pms->{'pdfinfo'}->{"dems_pdf"}->{"${height}x${width}"}); |
623
|
|
|
|
|
|
|
return 0; |
624
|
|
|
|
|
|
|
} |
625
|
0
|
|
|
0
|
0
|
|
|
626
|
0
|
0
|
0
|
|
|
|
# ----------------------------------------- |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
my ($self,$pms,$body,$minh,$minw,$maxh,$maxw) = @_; |
629
|
0
|
0
|
|
|
|
|
return unless (defined $minh && defined $minw); |
630
|
0
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
# make sure we have image data read in. |
632
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
633
|
0
|
0
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
634
|
0
|
0
|
|
|
|
|
} |
635
|
0
|
0
|
|
|
|
|
|
636
|
0
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
637
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"dems_pdf"}); |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
foreach my $dem ( keys %{$pms->{'pdfinfo'}->{"dems_pdf"}}) { |
640
|
|
|
|
|
|
|
my ($h,$w) = split(/x/,$dem); |
641
|
|
|
|
|
|
|
next if ($h < $minh); # height less than min height |
642
|
0
|
|
|
0
|
0
|
|
next if ($w < $minw); # width less than min width |
643
|
0
|
0
|
0
|
|
|
|
next if (defined $maxh && $h > $maxh); # height more than max height |
644
|
|
|
|
|
|
|
next if (defined $maxw && $w > $maxw); # width more than max width |
645
|
|
|
|
|
|
|
|
646
|
0
|
0
|
|
|
|
|
# if we make it here, we have a match |
647
|
0
|
|
|
|
|
|
return 1; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
0
|
0
|
|
|
|
|
return 0; |
651
|
0
|
0
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
0
|
|
|
|
|
|
# ----------------------------------------- |
|
0
|
|
|
|
|
|
|
654
|
0
|
|
|
|
|
|
|
655
|
0
|
0
|
|
|
|
|
|
656
|
0
|
0
|
|
|
|
|
my ($self,$pms,$body,$md5) = @_; |
657
|
0
|
0
|
0
|
|
|
|
return unless defined $md5; |
658
|
0
|
0
|
0
|
|
|
|
|
659
|
|
|
|
|
|
|
my $uc_md5 = uc($md5); # uppercase matches only |
660
|
|
|
|
|
|
|
|
661
|
0
|
|
|
|
|
|
# make sure we have pdf data read in. |
662
|
|
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
663
|
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
664
|
0
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
667
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"md5"}); |
668
|
|
|
|
|
|
|
return 1 if (exists $pms->{'pdfinfo'}->{"md5"}->{$uc_md5}); |
669
|
|
|
|
|
|
|
return 0; |
670
|
|
|
|
|
|
|
} |
671
|
0
|
|
|
0
|
0
|
|
|
672
|
0
|
0
|
|
|
|
|
# ----------------------------------------- |
673
|
|
|
|
|
|
|
|
674
|
0
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
my ($self,$pms,$body,$md5) = @_; |
676
|
|
|
|
|
|
|
return unless defined $md5; |
677
|
0
|
0
|
|
|
|
|
|
678
|
0
|
|
|
|
|
|
my $uc_md5 = uc($md5); # uppercase matches only |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
# make sure we have pdf data read in. |
681
|
0
|
0
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
682
|
0
|
0
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
683
|
0
|
0
|
|
|
|
|
} |
684
|
0
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
686
|
|
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{"fuzzy_md5"}); |
687
|
|
|
|
|
|
|
return 1 if (exists $pms->{'pdfinfo'}->{"fuzzy_md5"}->{$uc_md5}); |
688
|
|
|
|
|
|
|
return 0; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
|
691
|
0
|
|
|
0
|
0
|
|
# ----------------------------------------- |
692
|
0
|
0
|
|
|
|
|
|
693
|
|
|
|
|
|
|
my ($self, $pms, $body, $detail, $regex) = @_; |
694
|
0
|
|
|
|
|
|
return unless ($detail && $regex); |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
# make sure we have pdf data read in. |
697
|
0
|
0
|
|
|
|
|
if (!exists $pms->{'pdfinfo'}) { |
698
|
0
|
|
|
|
|
|
$self->_find_pdf_mime_parts($pms); |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
|
701
|
0
|
0
|
|
|
|
|
return 0 if (exists $pms->{'pdfinfo'}->{'no_parts'}); |
702
|
0
|
0
|
|
|
|
|
return 0 unless (exists $pms->{'pdfinfo'}->{'details'}); |
703
|
0
|
0
|
|
|
|
|
|
704
|
0
|
|
|
|
|
|
my $check_value = $pms->{pdfinfo}->{details}->{$detail}; |
705
|
|
|
|
|
|
|
return unless $check_value; |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
my ($rec, $err) = compile_regexp($regex, 2); |
708
|
|
|
|
|
|
|
if (!$rec) { |
709
|
|
|
|
|
|
|
info("pdfinfo: invalid regexp '$regex': $err"); |
710
|
0
|
|
|
0
|
0
|
|
return 0; |
711
|
0
|
0
|
0
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
if ($check_value =~ $rec) { |
714
|
0
|
0
|
|
|
|
|
dbg("pdfinfo: pdf_match_details $detail $regex matches $check_value"); |
715
|
0
|
|
|
|
|
|
return 1; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
return 0; |
718
|
0
|
0
|
|
|
|
|
} |
719
|
0
|
0
|
|
|
|
|
|
720
|
|
|
|
|
|
|
# ----------------------------------------- |
721
|
0
|
|
|
|
|
|
|
722
|
0
|
0
|
|
|
|
|
my ($min, $max, $value, $nomaxequal) = @_; |
723
|
|
|
|
|
|
|
return 0 unless defined $value; |
724
|
0
|
|
|
|
|
|
return 0 if ($value < $min); |
725
|
0
|
0
|
|
|
|
|
return 0 if (defined $max && $value > $max); |
726
|
0
|
|
|
|
|
|
return 0 if (defined $nomaxequal && $nomaxequal && $value == $max); |
727
|
0
|
|
|
|
|
|
return 1; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
0
|
0
|
|
|
|
|
# ----------------------------------------- |
731
|
0
|
|
|
|
|
|
|
732
|
0
|
|
|
|
|
|
1; |
733
|
|
|
|
|
|
|
|