| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* pdfmake_redact_rewrite.c — Content-stream rewriter for true redaction. |
|
3
|
|
|
|
|
|
|
* |
|
4
|
|
|
|
|
|
|
* Given an existing PDF content stream plus a list of redaction rects, |
|
5
|
|
|
|
|
|
|
* produce a new stream with every text-showing operator whose baseline |
|
6
|
|
|
|
|
|
|
* origin falls inside a rect omitted. Everything else is copied |
|
7
|
|
|
|
|
|
|
* verbatim. |
|
8
|
|
|
|
|
|
|
* |
|
9
|
|
|
|
|
|
|
* Scope (phase 1): tuned for streams produced by PDF::Make::Builder, |
|
10
|
|
|
|
|
|
|
* which emit each `add_text` as a self-contained BT..ET block: |
|
11
|
|
|
|
|
|
|
* |
|
12
|
|
|
|
|
|
|
* BT |
|
13
|
|
|
|
|
|
|
* 0 0 0 rg |
|
14
|
|
|
|
|
|
|
* /F_Helvetica_normal 9 Tf |
|
15
|
|
|
|
|
|
|
* 1 0 0 1 Tm |
|
16
|
|
|
|
|
|
|
* (text) Tj |
|
17
|
|
|
|
|
|
|
* ET |
|
18
|
|
|
|
|
|
|
* |
|
19
|
|
|
|
|
|
|
* A block is dropped when its Tm origin (the last two of the six Tm |
|
20
|
|
|
|
|
|
|
* operands) lies inside any redaction rect. All non-text operators |
|
21
|
|
|
|
|
|
|
* (graphics state, paths, shape fills used for the black rectangles |
|
22
|
|
|
|
|
|
|
* painted by mark_redaction) are preserved. |
|
23
|
|
|
|
|
|
|
* |
|
24
|
|
|
|
|
|
|
* Limitations: |
|
25
|
|
|
|
|
|
|
* - Does not handle CTM transformations (cm operator) – assumes |
|
26
|
|
|
|
|
|
|
* identity CTM, which matches Builder output. |
|
27
|
|
|
|
|
|
|
* - Does not parse TJ/'/\" operators individually; a block containing |
|
28
|
|
|
|
|
|
|
* those inside a rect would still be dropped via its Tm origin. |
|
29
|
|
|
|
|
|
|
* - Non-Builder streams with multiple Tj ops inside one BT..ET block |
|
30
|
|
|
|
|
|
|
* are handled at block granularity (all or nothing). |
|
31
|
|
|
|
|
|
|
*/ |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
#include "pdfmake_redact.h" |
|
34
|
|
|
|
|
|
|
#include "pdfmake_buf.h" |
|
35
|
|
|
|
|
|
|
#include |
|
36
|
|
|
|
|
|
|
#include |
|
37
|
|
|
|
|
|
|
#include |
|
38
|
|
|
|
|
|
|
|
|
39
|
10
|
|
|
|
|
|
static int point_in_rect(double x, double y, const double r[4]) { |
|
40
|
10
|
50
|
|
|
|
|
return x >= r[0] && x <= r[2] && y >= r[1] && y <= r[3]; |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
/* strstr on byte buffers, honouring the given length. */ |
|
44
|
19
|
|
|
|
|
|
static const uint8_t *find_bytes(const uint8_t *hay, size_t hay_len, |
|
45
|
|
|
|
|
|
|
const char *needle) { |
|
46
|
19
|
|
|
|
|
|
size_t n = strlen(needle); |
|
47
|
|
|
|
|
|
|
size_t i; |
|
48
|
19
|
50
|
|
|
|
|
if (n == 0 || n > hay_len) return NULL; |
|
|
|
50
|
|
|
|
|
|
|
49
|
855
|
100
|
|
|
|
|
for (i = 0; i + n <= hay_len; i++) { |
|
50
|
854
|
100
|
|
|
|
|
if (memcmp(hay + i, needle, n) == 0) return hay + i; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
1
|
|
|
|
|
|
return NULL; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
/* True iff `op` at position (op, op+op_len) is a standalone operator: |
|
56
|
|
|
|
|
|
|
* preceded by whitespace/boundary and followed by whitespace/boundary. */ |
|
57
|
18
|
|
|
|
|
|
static int is_standalone_op(const uint8_t *buf, size_t buf_len, |
|
58
|
|
|
|
|
|
|
const uint8_t *op, size_t op_len) { |
|
59
|
|
|
|
|
|
|
const uint8_t *after; |
|
60
|
18
|
50
|
|
|
|
|
if (op < buf) return 0; |
|
61
|
18
|
100
|
|
|
|
|
if (op > buf && !isspace((unsigned char)op[-1])) return 0; |
|
|
|
50
|
|
|
|
|
|
|
62
|
18
|
|
|
|
|
|
after = op + op_len; |
|
63
|
18
|
50
|
|
|
|
|
if (after >= buf + buf_len) return 1; |
|
64
|
18
|
|
|
|
|
|
return isspace((unsigned char)*after); |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
/* Find the next standalone occurrence of `tag` within [buf, buf+buf_len). */ |
|
68
|
19
|
|
|
|
|
|
static const uint8_t *find_op(const uint8_t *buf, size_t buf_len, |
|
69
|
|
|
|
|
|
|
const char *tag) { |
|
70
|
19
|
|
|
|
|
|
size_t tag_len = strlen(tag); |
|
71
|
19
|
|
|
|
|
|
const uint8_t *p = buf; |
|
72
|
19
|
|
|
|
|
|
size_t remaining = buf_len; |
|
73
|
|
|
|
|
|
|
size_t advanced; |
|
74
|
0
|
|
|
|
|
|
while (1) { |
|
75
|
19
|
|
|
|
|
|
const uint8_t *hit = find_bytes(p, remaining, tag); |
|
76
|
19
|
100
|
|
|
|
|
if (!hit) return NULL; |
|
77
|
18
|
50
|
|
|
|
|
if (is_standalone_op(buf, buf_len, hit, tag_len)) return hit; |
|
78
|
0
|
|
|
|
|
|
advanced = (hit - p) + 1; |
|
79
|
0
|
0
|
|
|
|
|
if (advanced >= remaining) return NULL; |
|
80
|
0
|
|
|
|
|
|
p = hit + 1; |
|
81
|
0
|
|
|
|
|
|
remaining -= advanced; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
/* Parse the 6 numbers preceding a Tm op. Numbers are space-separated. |
|
86
|
|
|
|
|
|
|
* Returns 1 on success with e/f filled from the last two. */ |
|
87
|
6
|
|
|
|
|
|
static int parse_tm_origin(const uint8_t *block_start, const uint8_t *tm_op, |
|
88
|
|
|
|
|
|
|
double *ex, double *ey) { |
|
89
|
|
|
|
|
|
|
/* Walk forward from block_start collecting numbers; reset the window |
|
90
|
|
|
|
|
|
|
* whenever a non-number token appears. The final six numbers before |
|
91
|
|
|
|
|
|
|
* Tm are its operands. */ |
|
92
|
|
|
|
|
|
|
double nums[6]; |
|
93
|
6
|
|
|
|
|
|
int count = 0; |
|
94
|
6
|
|
|
|
|
|
const uint8_t *p = block_start; |
|
95
|
|
|
|
|
|
|
const uint8_t *tok; |
|
96
|
|
|
|
|
|
|
size_t tok_len; |
|
97
|
|
|
|
|
|
|
char tmp[64]; |
|
98
|
|
|
|
|
|
|
char *endp; |
|
99
|
|
|
|
|
|
|
double v; |
|
100
|
|
|
|
|
|
|
int i; |
|
101
|
84
|
50
|
|
|
|
|
while (p < tm_op) { |
|
102
|
168
|
100
|
|
|
|
|
while (p < tm_op && isspace((unsigned char)*p)) p++; |
|
|
|
100
|
|
|
|
|
|
|
103
|
84
|
100
|
|
|
|
|
if (p >= tm_op) break; |
|
104
|
78
|
|
|
|
|
|
tok = p; |
|
105
|
302
|
50
|
|
|
|
|
while (p < tm_op && !isspace((unsigned char)*p)) p++; |
|
|
|
100
|
|
|
|
|
|
|
106
|
78
|
|
|
|
|
|
tok_len = (size_t)(p - tok); |
|
107
|
78
|
50
|
|
|
|
|
if (tok_len == 0 || tok_len >= 64) { count = 0; continue; } |
|
|
|
50
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
78
|
|
|
|
|
|
memcpy(tmp, tok, tok_len); |
|
110
|
78
|
|
|
|
|
|
tmp[tok_len] = '\0'; |
|
111
|
78
|
|
|
|
|
|
endp = NULL; |
|
112
|
78
|
|
|
|
|
|
v = strtod(tmp, &endp); |
|
113
|
78
|
100
|
|
|
|
|
if (endp != tmp + tok_len) { |
|
114
|
18
|
|
|
|
|
|
count = 0; /* non-number → reset window */ |
|
115
|
18
|
|
|
|
|
|
continue; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
60
|
50
|
|
|
|
|
if (count < 6) { |
|
118
|
60
|
|
|
|
|
|
nums[count++] = v; |
|
119
|
|
|
|
|
|
|
} else { |
|
120
|
0
|
0
|
|
|
|
|
for (i = 0; i < 5; i++) nums[i] = nums[i + 1]; |
|
121
|
0
|
|
|
|
|
|
nums[5] = v; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
} |
|
124
|
6
|
50
|
|
|
|
|
if (count < 6) return 0; |
|
125
|
6
|
|
|
|
|
|
*ex = nums[4]; |
|
126
|
6
|
|
|
|
|
|
*ey = nums[5]; |
|
127
|
6
|
|
|
|
|
|
return 1; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
1
|
|
|
|
|
|
pdfmake_err_t pdfmake_redact_rewrite_stream( |
|
131
|
|
|
|
|
|
|
const uint8_t *in, size_t in_len, |
|
132
|
|
|
|
|
|
|
const pdfmake_redact_t *redactions, size_t n_redactions, |
|
133
|
|
|
|
|
|
|
pdfmake_buf_t *out) |
|
134
|
|
|
|
|
|
|
{ |
|
135
|
|
|
|
|
|
|
const uint8_t *p; |
|
136
|
|
|
|
|
|
|
size_t remaining; |
|
137
|
1
|
50
|
|
|
|
|
if (!in || !out) return PDFMAKE_EINVAL; |
|
|
|
50
|
|
|
|
|
|
|
138
|
1
|
50
|
|
|
|
|
if (n_redactions == 0) { |
|
139
|
0
|
|
|
|
|
|
return pdfmake_buf_append(out, in, in_len); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
1
|
|
|
|
|
|
p = in; |
|
143
|
1
|
|
|
|
|
|
remaining = in_len; |
|
144
|
|
|
|
|
|
|
|
|
145
|
7
|
50
|
|
|
|
|
while (remaining > 0) { |
|
146
|
7
|
|
|
|
|
|
const uint8_t *bt = find_op(p, remaining, "BT"); |
|
147
|
|
|
|
|
|
|
pdfmake_err_t err; |
|
148
|
|
|
|
|
|
|
size_t after_bt_off; |
|
149
|
|
|
|
|
|
|
const uint8_t *et; |
|
150
|
|
|
|
|
|
|
const uint8_t *block_end; |
|
151
|
|
|
|
|
|
|
size_t block_len; |
|
152
|
|
|
|
|
|
|
int keep; |
|
153
|
|
|
|
|
|
|
const uint8_t *tm; |
|
154
|
|
|
|
|
|
|
size_t consumed; |
|
155
|
|
|
|
|
|
|
|
|
156
|
7
|
100
|
|
|
|
|
if (!bt) { |
|
157
|
1
|
|
|
|
|
|
err = pdfmake_buf_append(out, p, remaining); |
|
158
|
1
|
|
|
|
|
|
return err; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
/* Copy everything up to (but not including) BT. */ |
|
162
|
6
|
|
|
|
|
|
err = pdfmake_buf_append(out, p, (size_t)(bt - p)); |
|
163
|
6
|
50
|
|
|
|
|
if (err != PDFMAKE_OK) return err; |
|
164
|
|
|
|
|
|
|
|
|
165
|
6
|
|
|
|
|
|
after_bt_off = (size_t)(bt - p) + 2; |
|
166
|
6
|
|
|
|
|
|
et = find_op(bt + 2, remaining - after_bt_off, "ET"); |
|
167
|
6
|
50
|
|
|
|
|
if (!et) { |
|
168
|
0
|
|
|
|
|
|
err = pdfmake_buf_append(out, bt, remaining - (size_t)(bt - p)); |
|
169
|
0
|
|
|
|
|
|
return err; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
6
|
|
|
|
|
|
block_end = et + 2; /* include "ET" */ |
|
172
|
6
|
|
|
|
|
|
block_len = (size_t)(block_end - bt); |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
/* Decide whether to keep this block. */ |
|
175
|
6
|
|
|
|
|
|
keep = 1; |
|
176
|
6
|
|
|
|
|
|
tm = find_op(bt + 2, (size_t)(et - (bt + 2)), "Tm"); |
|
177
|
6
|
50
|
|
|
|
|
if (tm) { |
|
178
|
|
|
|
|
|
|
double ex, ey; |
|
179
|
6
|
50
|
|
|
|
|
if (parse_tm_origin(bt + 2, tm, &ex, &ey)) { |
|
180
|
|
|
|
|
|
|
size_t i; |
|
181
|
12
|
100
|
|
|
|
|
for (i = 0; i < n_redactions; i++) { |
|
182
|
10
|
100
|
|
|
|
|
if (point_in_rect(ex, ey, redactions[i].rect)) { |
|
183
|
4
|
|
|
|
|
|
keep = 0; |
|
184
|
4
|
|
|
|
|
|
break; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
6
|
100
|
|
|
|
|
if (keep) { |
|
191
|
2
|
|
|
|
|
|
err = pdfmake_buf_append(out, bt, block_len); |
|
192
|
2
|
50
|
|
|
|
|
if (err != PDFMAKE_OK) return err; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
/* Advance past the block. */ |
|
196
|
6
|
|
|
|
|
|
consumed = (size_t)(bt - p) + block_len; |
|
197
|
6
|
|
|
|
|
|
p = bt + block_len; |
|
198
|
6
|
|
|
|
|
|
remaining -= consumed; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
return PDFMAKE_OK; |
|
202
|
|
|
|
|
|
|
} |