File Coverage

src/pdfmake_redact_rewrite.c
Criterion Covered Total %
statement 75 86 87.2
branch 53 80 66.2
condition n/a
subroutine n/a
pod n/a
total 128 166 77.1


line stmt bran cond sub pod time code
1             /*
2             * pdfmake_redact_rewrite.c — Content-stream rewriter for true redaction.
3             *
4             * Given an existing PDF content stream plus a list of redaction rects,
5             * produce a new stream with every text-showing operator whose baseline
6             * origin falls inside a rect omitted. Everything else is copied
7             * verbatim.
8             *
9             * Scope (phase 1): tuned for streams produced by PDF::Make::Builder,
10             * which emit each `add_text` as a self-contained BT..ET block:
11             *
12             * BT
13             * 0 0 0 rg
14             * /F_Helvetica_normal 9 Tf
15             * 1 0 0 1 Tm
16             * (text) Tj
17             * ET
18             *
19             * A block is dropped when its Tm origin (the last two of the six Tm
20             * operands) lies inside any redaction rect. All non-text operators
21             * (graphics state, paths, shape fills used for the black rectangles
22             * painted by mark_redaction) are preserved.
23             *
24             * Limitations:
25             * - Does not handle CTM transformations (cm operator) – assumes
26             * identity CTM, which matches Builder output.
27             * - Does not parse TJ/'/\" operators individually; a block containing
28             * those inside a rect would still be dropped via its Tm origin.
29             * - Non-Builder streams with multiple Tj ops inside one BT..ET block
30             * are handled at block granularity (all or nothing).
31             */
32              
33             #include "pdfmake_redact.h"
34             #include "pdfmake_buf.h"
35             #include
36             #include
37             #include
38              
39 10           static int point_in_rect(double x, double y, const double r[4]) {
40 10 50         return x >= r[0] && x <= r[2] && y >= r[1] && y <= r[3];
    50          
    100          
    100          
41             }
42              
43             /* strstr on byte buffers, honouring the given length. */
44 19           static const uint8_t *find_bytes(const uint8_t *hay, size_t hay_len,
45             const char *needle) {
46 19           size_t n = strlen(needle);
47             size_t i;
48 19 50         if (n == 0 || n > hay_len) return NULL;
    50          
49 855 100         for (i = 0; i + n <= hay_len; i++) {
50 854 100         if (memcmp(hay + i, needle, n) == 0) return hay + i;
51             }
52 1           return NULL;
53             }
54              
55             /* True iff `op` at position (op, op+op_len) is a standalone operator:
56             * preceded by whitespace/boundary and followed by whitespace/boundary. */
57 18           static int is_standalone_op(const uint8_t *buf, size_t buf_len,
58             const uint8_t *op, size_t op_len) {
59             const uint8_t *after;
60 18 50         if (op < buf) return 0;
61 18 100         if (op > buf && !isspace((unsigned char)op[-1])) return 0;
    50          
62 18           after = op + op_len;
63 18 50         if (after >= buf + buf_len) return 1;
64 18           return isspace((unsigned char)*after);
65             }
66              
67             /* Find the next standalone occurrence of `tag` within [buf, buf+buf_len). */
68 19           static const uint8_t *find_op(const uint8_t *buf, size_t buf_len,
69             const char *tag) {
70 19           size_t tag_len = strlen(tag);
71 19           const uint8_t *p = buf;
72 19           size_t remaining = buf_len;
73             size_t advanced;
74 0           while (1) {
75 19           const uint8_t *hit = find_bytes(p, remaining, tag);
76 19 100         if (!hit) return NULL;
77 18 50         if (is_standalone_op(buf, buf_len, hit, tag_len)) return hit;
78 0           advanced = (hit - p) + 1;
79 0 0         if (advanced >= remaining) return NULL;
80 0           p = hit + 1;
81 0           remaining -= advanced;
82             }
83             }
84              
85             /* Parse the 6 numbers preceding a Tm op. Numbers are space-separated.
86             * Returns 1 on success with e/f filled from the last two. */
87 6           static int parse_tm_origin(const uint8_t *block_start, const uint8_t *tm_op,
88             double *ex, double *ey) {
89             /* Walk forward from block_start collecting numbers; reset the window
90             * whenever a non-number token appears. The final six numbers before
91             * Tm are its operands. */
92             double nums[6];
93 6           int count = 0;
94 6           const uint8_t *p = block_start;
95             const uint8_t *tok;
96             size_t tok_len;
97             char tmp[64];
98             char *endp;
99             double v;
100             int i;
101 84 50         while (p < tm_op) {
102 168 100         while (p < tm_op && isspace((unsigned char)*p)) p++;
    100          
103 84 100         if (p >= tm_op) break;
104 78           tok = p;
105 302 50         while (p < tm_op && !isspace((unsigned char)*p)) p++;
    100          
106 78           tok_len = (size_t)(p - tok);
107 78 50         if (tok_len == 0 || tok_len >= 64) { count = 0; continue; }
    50          
108              
109 78           memcpy(tmp, tok, tok_len);
110 78           tmp[tok_len] = '\0';
111 78           endp = NULL;
112 78           v = strtod(tmp, &endp);
113 78 100         if (endp != tmp + tok_len) {
114 18           count = 0; /* non-number → reset window */
115 18           continue;
116             }
117 60 50         if (count < 6) {
118 60           nums[count++] = v;
119             } else {
120 0 0         for (i = 0; i < 5; i++) nums[i] = nums[i + 1];
121 0           nums[5] = v;
122             }
123             }
124 6 50         if (count < 6) return 0;
125 6           *ex = nums[4];
126 6           *ey = nums[5];
127 6           return 1;
128             }
129              
130 1           pdfmake_err_t pdfmake_redact_rewrite_stream(
131             const uint8_t *in, size_t in_len,
132             const pdfmake_redact_t *redactions, size_t n_redactions,
133             pdfmake_buf_t *out)
134             {
135             const uint8_t *p;
136             size_t remaining;
137 1 50         if (!in || !out) return PDFMAKE_EINVAL;
    50          
138 1 50         if (n_redactions == 0) {
139 0           return pdfmake_buf_append(out, in, in_len);
140             }
141              
142 1           p = in;
143 1           remaining = in_len;
144              
145 7 50         while (remaining > 0) {
146 7           const uint8_t *bt = find_op(p, remaining, "BT");
147             pdfmake_err_t err;
148             size_t after_bt_off;
149             const uint8_t *et;
150             const uint8_t *block_end;
151             size_t block_len;
152             int keep;
153             const uint8_t *tm;
154             size_t consumed;
155              
156 7 100         if (!bt) {
157 1           err = pdfmake_buf_append(out, p, remaining);
158 1           return err;
159             }
160              
161             /* Copy everything up to (but not including) BT. */
162 6           err = pdfmake_buf_append(out, p, (size_t)(bt - p));
163 6 50         if (err != PDFMAKE_OK) return err;
164              
165 6           after_bt_off = (size_t)(bt - p) + 2;
166 6           et = find_op(bt + 2, remaining - after_bt_off, "ET");
167 6 50         if (!et) {
168 0           err = pdfmake_buf_append(out, bt, remaining - (size_t)(bt - p));
169 0           return err;
170             }
171 6           block_end = et + 2; /* include "ET" */
172 6           block_len = (size_t)(block_end - bt);
173              
174             /* Decide whether to keep this block. */
175 6           keep = 1;
176 6           tm = find_op(bt + 2, (size_t)(et - (bt + 2)), "Tm");
177 6 50         if (tm) {
178             double ex, ey;
179 6 50         if (parse_tm_origin(bt + 2, tm, &ex, &ey)) {
180             size_t i;
181 12 100         for (i = 0; i < n_redactions; i++) {
182 10 100         if (point_in_rect(ex, ey, redactions[i].rect)) {
183 4           keep = 0;
184 4           break;
185             }
186             }
187             }
188             }
189              
190 6 100         if (keep) {
191 2           err = pdfmake_buf_append(out, bt, block_len);
192 2 50         if (err != PDFMAKE_OK) return err;
193             }
194              
195             /* Advance past the block. */
196 6           consumed = (size_t)(bt - p) + block_len;
197 6           p = bt + block_len;
198 6           remaining -= consumed;
199             }
200              
201 0           return PDFMAKE_OK;
202             }