Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/pdf/pdf-clean.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2025 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 #include "mupdf/fitz.h" | |
| 24 #include "pdf-annot-imp.h" | |
| 25 | |
| 26 #include <string.h> | |
| 27 #include <assert.h> | |
| 28 | |
| 29 static void | |
| 30 pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *xobj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up); | |
| 31 | |
| 32 static void | |
| 33 pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up); | |
| 34 | |
| 35 static void | |
| 36 pdf_filter_resources(fz_context *ctx, pdf_document *doc, pdf_obj *in_res, pdf_obj *res, pdf_filter_options *options, pdf_cycle_list *cycle_up) | |
| 37 { | |
| 38 pdf_obj *obj; | |
| 39 int i, n; | |
| 40 | |
| 41 if (!options->recurse) | |
| 42 return; | |
| 43 | |
| 44 /* ExtGState */ | |
| 45 obj = pdf_dict_get(ctx, res, PDF_NAME(ExtGState)); | |
| 46 if (obj) | |
| 47 { | |
| 48 n = pdf_dict_len(ctx, obj); | |
| 49 for (i = 0; i < n; i++) | |
| 50 { | |
| 51 pdf_obj *smask = pdf_dict_get(ctx, pdf_dict_get_val(ctx, obj, i), PDF_NAME(SMask)); | |
| 52 if (smask) | |
| 53 { | |
| 54 pdf_obj *g = pdf_dict_get(ctx, smask, PDF_NAME(G)); | |
| 55 if (g) | |
| 56 { | |
| 57 /* Transparency group XObject */ | |
| 58 pdf_filter_xobject(ctx, doc, g, in_res, options, cycle_up); | |
| 59 } | |
| 60 } | |
| 61 } | |
| 62 } | |
| 63 | |
| 64 /* Pattern */ | |
| 65 obj = pdf_dict_get(ctx, res, PDF_NAME(Pattern)); | |
| 66 if (obj) | |
| 67 { | |
| 68 n = pdf_dict_len(ctx, obj); | |
| 69 for (i = 0; i < n; i++) | |
| 70 { | |
| 71 pdf_obj *pat = pdf_dict_get_val(ctx, obj, i); | |
| 72 if (pat && pdf_dict_get_int(ctx, pat, PDF_NAME(PatternType)) == 1) | |
| 73 { | |
| 74 pdf_filter_xobject(ctx, doc, pat, in_res, options, cycle_up); | |
| 75 } | |
| 76 } | |
| 77 } | |
| 78 | |
| 79 /* XObject */ | |
| 80 if (!options->instance_forms) | |
| 81 { | |
| 82 obj = pdf_dict_get(ctx, res, PDF_NAME(XObject)); | |
| 83 if (obj) | |
| 84 { | |
| 85 n = pdf_dict_len(ctx, obj); | |
| 86 for (i = 0; i < n; i++) | |
| 87 { | |
| 88 pdf_obj *xobj = pdf_dict_get_val(ctx, obj, i); | |
| 89 if (xobj && pdf_dict_get(ctx, xobj, PDF_NAME(Subtype)) == PDF_NAME(Form)) | |
| 90 { | |
| 91 pdf_filter_xobject(ctx, doc, xobj, in_res, options, cycle_up); | |
| 92 } | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 /* Font */ | |
| 98 obj = pdf_dict_get(ctx, res, PDF_NAME(Font)); | |
| 99 if (obj) | |
| 100 { | |
| 101 n = pdf_dict_len(ctx, obj); | |
| 102 for (i = 0; i < n; i++) | |
| 103 { | |
| 104 pdf_obj *font = pdf_dict_get_val(ctx, obj, i); | |
| 105 if (font && pdf_dict_get(ctx, font, PDF_NAME(Subtype)) == PDF_NAME(Type3)) | |
| 106 { | |
| 107 pdf_filter_type3(ctx, doc, font, in_res, options, cycle_up); | |
| 108 } | |
| 109 } | |
| 110 } | |
| 111 | |
| 112 } | |
| 113 | |
| 114 /* | |
| 115 Clean a content stream's rendering operations, with an optional post | |
| 116 processing step. | |
| 117 | |
| 118 Firstly, this filters the PDF operators used to avoid (some cases of) | |
| 119 repetition, and leaves the content stream in a balanced state with an | |
| 120 unchanged top level matrix etc. At the same time, the resources actually | |
| 121 used are collected into a new resource dictionary. | |
| 122 | |
| 123 Next, the resources themselves are recursively cleaned (as appropriate) | |
| 124 in the same way, if the 'recurse' flag is set. | |
| 125 */ | |
| 126 static void | |
| 127 pdf_filter_content_stream( | |
| 128 fz_context *ctx, | |
| 129 pdf_document *doc, | |
| 130 pdf_obj *in_stm, | |
| 131 pdf_obj *in_res, | |
| 132 fz_matrix transform, | |
| 133 pdf_filter_options *options, | |
| 134 int struct_parents, | |
| 135 fz_buffer **out_buf, | |
| 136 pdf_obj **out_res, | |
| 137 pdf_cycle_list *cycle_up) | |
| 138 { | |
| 139 pdf_processor *proc_buffer = NULL; | |
| 140 pdf_processor *top = NULL; | |
| 141 pdf_processor **list = NULL; | |
| 142 int num_filters = 0; | |
| 143 int i; | |
| 144 | |
| 145 fz_var(proc_buffer); | |
| 146 | |
| 147 *out_buf = NULL; | |
| 148 *out_res = NULL; | |
| 149 | |
| 150 if (options->filters) | |
| 151 for (; options->filters[num_filters].filter != NULL; num_filters++); | |
| 152 | |
| 153 if (num_filters > 0) | |
| 154 list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *)); | |
| 155 | |
| 156 fz_try(ctx) | |
| 157 { | |
| 158 *out_buf = fz_new_buffer(ctx, 1024); | |
| 159 top = proc_buffer = pdf_new_buffer_processor(ctx, *out_buf, options->ascii, options->newlines); | |
| 160 if (num_filters > 0) | |
| 161 { | |
| 162 for (i = num_filters - 1; i >= 0; i--) | |
| 163 top = list[i] = options->filters[i].filter(ctx, doc, top, struct_parents, transform, options, options->filters[i].options); | |
| 164 } | |
| 165 | |
| 166 pdf_process_contents(ctx, top, doc, in_res, in_stm, NULL, out_res); | |
| 167 pdf_close_processor(ctx, top); | |
| 168 | |
| 169 pdf_filter_resources(ctx, doc, in_res, *out_res, options, cycle_up); | |
| 170 } | |
| 171 fz_always(ctx) | |
| 172 { | |
| 173 for (i = 0; i < num_filters; i++) | |
| 174 pdf_drop_processor(ctx, list[i]); | |
| 175 pdf_drop_processor(ctx, proc_buffer); | |
| 176 fz_free(ctx, list); | |
| 177 } | |
| 178 fz_catch(ctx) | |
| 179 { | |
| 180 fz_drop_buffer(ctx, *out_buf); | |
| 181 *out_buf = NULL; | |
| 182 pdf_drop_obj(ctx, *out_res); | |
| 183 *out_res = NULL; | |
| 184 fz_rethrow(ctx); | |
| 185 } | |
| 186 } | |
| 187 | |
| 188 /* | |
| 189 Clean a Type 3 font's CharProcs content streams. This works almost | |
| 190 exactly like pdf_filter_content_stream, but the resource dictionary is | |
| 191 shared between all off the CharProcs. | |
| 192 */ | |
| 193 static void | |
| 194 pdf_filter_type3(fz_context *ctx, pdf_document *doc, pdf_obj *obj, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up) | |
| 195 { | |
| 196 pdf_cycle_list cycle; | |
| 197 pdf_processor *proc_buffer = NULL; | |
| 198 pdf_processor *proc_filter = NULL; | |
| 199 pdf_obj *in_res; | |
| 200 pdf_obj *out_res = NULL; | |
| 201 pdf_obj *charprocs; | |
| 202 int i, n; | |
| 203 int num_filters = 0; | |
| 204 pdf_processor **list = NULL; | |
| 205 fz_buffer *buffer = NULL; | |
| 206 pdf_processor *top = NULL; | |
| 207 pdf_obj *res = NULL; | |
| 208 fz_buffer *new_buf = NULL; | |
| 209 | |
| 210 fz_var(out_res); | |
| 211 fz_var(proc_buffer); | |
| 212 fz_var(proc_filter); | |
| 213 fz_var(buffer); | |
| 214 fz_var(res); | |
| 215 fz_var(new_buf); | |
| 216 | |
| 217 /* We cannot combine instancing with type3 fonts. The new names for | |
| 218 * instanced form/image resources would clash, since they start over for | |
| 219 * each content stream. This is not a problem for now, because we only | |
| 220 * use instancing with redaction, and redaction doesn't clean type3 | |
| 221 * fonts. | |
| 222 */ | |
| 223 assert(!options->instance_forms); | |
| 224 | |
| 225 /* Avoid recursive cycles! */ | |
| 226 if (pdf_cycle(ctx, &cycle, cycle_up, obj)) | |
| 227 return; | |
| 228 | |
| 229 if (options->filters) | |
| 230 for (; options->filters[num_filters].filter != NULL; num_filters++); | |
| 231 | |
| 232 if (num_filters > 0) | |
| 233 list = fz_calloc(ctx, num_filters, sizeof(pdf_processor *)); | |
| 234 | |
| 235 fz_try(ctx) | |
| 236 { | |
| 237 in_res = pdf_dict_get(ctx, obj, PDF_NAME(Resources)); | |
| 238 if (!in_res) | |
| 239 in_res = page_res; | |
| 240 | |
| 241 buffer = fz_new_buffer(ctx, 1024); | |
| 242 top = proc_buffer = pdf_new_buffer_processor(ctx, buffer, options->ascii, options->newlines); | |
| 243 if (num_filters > 0) | |
| 244 { | |
| 245 for (i = num_filters - 1; i >= 0; i--) | |
| 246 top = list[i] = options->filters[i].filter(ctx, doc, top, -1, fz_identity, options, options->filters[i].options); | |
| 247 } | |
| 248 | |
| 249 pdf_processor_push_resources(ctx, top, in_res); | |
| 250 charprocs = pdf_dict_get(ctx, obj, PDF_NAME(CharProcs)); | |
| 251 n = pdf_dict_len(ctx, charprocs); | |
| 252 for (i = 0; i < n; i++) | |
| 253 { | |
| 254 pdf_obj *val = pdf_dict_get_val(ctx, charprocs, i); | |
| 255 | |
| 256 if (i > 0) | |
| 257 { | |
| 258 pdf_reset_processor(ctx, top); | |
| 259 fz_clear_buffer(ctx, buffer); | |
| 260 } | |
| 261 pdf_process_raw_contents(ctx, top, doc, in_res, val, NULL); | |
| 262 | |
| 263 pdf_close_processor(ctx, top); | |
| 264 | |
| 265 if (!options->no_update) | |
| 266 { | |
| 267 new_buf = fz_clone_buffer(ctx, buffer); | |
| 268 pdf_update_stream(ctx, doc, val, new_buf, 0); | |
| 269 fz_drop_buffer(ctx, new_buf); | |
| 270 new_buf = NULL; | |
| 271 } | |
| 272 } | |
| 273 | |
| 274 } | |
| 275 fz_always(ctx) | |
| 276 { | |
| 277 res = pdf_processor_pop_resources(ctx, top); | |
| 278 for (i = 0; i < num_filters; i++) | |
| 279 pdf_drop_processor(ctx, list[i]); | |
| 280 pdf_drop_processor(ctx, proc_buffer); | |
| 281 fz_free(ctx, list); | |
| 282 fz_drop_buffer(ctx, new_buf); | |
| 283 fz_drop_buffer(ctx, buffer); | |
| 284 } | |
| 285 fz_catch(ctx) | |
| 286 { | |
| 287 pdf_drop_obj(ctx, res); | |
| 288 fz_rethrow(ctx); | |
| 289 } | |
| 290 pdf_dict_put_drop(ctx, obj, PDF_NAME(Resources), res); | |
| 291 } | |
| 292 | |
| 293 static void | |
| 294 pdf_filter_xobject(fz_context *ctx, pdf_document *doc, pdf_obj *stm, pdf_obj *page_res, pdf_filter_options *options, pdf_cycle_list *cycle_up) | |
| 295 { | |
| 296 pdf_cycle_list cycle; | |
| 297 int struct_parents; | |
| 298 pdf_obj *new_res = NULL; | |
| 299 fz_buffer *new_buf = NULL; | |
| 300 pdf_obj *old_res; | |
| 301 | |
| 302 fz_var(new_buf); | |
| 303 fz_var(new_res); | |
| 304 | |
| 305 // TODO for RJW: XObject can also be a StructParent; how do we handle that case? | |
| 306 | |
| 307 struct_parents = pdf_dict_get_int_default(ctx, stm, PDF_NAME(StructParents), -1); | |
| 308 | |
| 309 old_res = pdf_dict_get(ctx, stm, PDF_NAME(Resources)); | |
| 310 if (!old_res) | |
| 311 old_res = page_res; | |
| 312 | |
| 313 // TODO: don't clean objects more than once. | |
| 314 | |
| 315 /* Avoid recursive cycles! */ | |
| 316 if (pdf_cycle(ctx, &cycle, cycle_up, stm)) | |
| 317 return; | |
| 318 fz_try(ctx) | |
| 319 { | |
| 320 pdf_filter_content_stream(ctx, doc, stm, old_res, fz_identity, options, struct_parents, &new_buf, &new_res, &cycle); | |
| 321 if (!options->no_update) | |
| 322 { | |
| 323 pdf_update_stream(ctx, doc, stm, new_buf, 0); | |
| 324 pdf_dict_put(ctx, stm, PDF_NAME(Resources), new_res); | |
| 325 } | |
| 326 } | |
| 327 fz_always(ctx) | |
| 328 { | |
| 329 fz_drop_buffer(ctx, new_buf); | |
| 330 pdf_drop_obj(ctx, new_res); | |
| 331 } | |
| 332 fz_catch(ctx) | |
| 333 fz_rethrow(ctx); | |
| 334 } | |
| 335 | |
| 336 pdf_obj * | |
| 337 pdf_filter_xobject_instance(fz_context *ctx, pdf_obj *old_xobj, pdf_obj *page_res, fz_matrix transform, pdf_filter_options *options, pdf_cycle_list *cycle_up) | |
| 338 { | |
| 339 pdf_cycle_list cycle; | |
| 340 pdf_document *doc = pdf_get_bound_document(ctx, old_xobj); | |
| 341 pdf_obj *new_xobj; | |
| 342 pdf_obj *new_res, *old_res; | |
| 343 fz_buffer *new_buf; | |
| 344 int struct_parents; | |
| 345 fz_matrix matrix; | |
| 346 | |
| 347 fz_var(new_xobj); | |
| 348 fz_var(new_buf); | |
| 349 fz_var(new_res); | |
| 350 | |
| 351 // TODO for RJW: XObject can also be a StructParent; how do we handle that case? | |
| 352 // TODO for RJW: will we run into trouble by duplicating StructParents stuff? | |
| 353 | |
| 354 struct_parents = pdf_dict_get_int_default(ctx, old_xobj, PDF_NAME(StructParents), -1); | |
| 355 | |
| 356 old_res = pdf_dict_get(ctx, old_xobj, PDF_NAME(Resources)); | |
| 357 if (!old_res) | |
| 358 old_res = page_res; | |
| 359 | |
| 360 if (pdf_cycle(ctx, &cycle, cycle_up, old_xobj)) | |
| 361 return pdf_keep_obj(ctx, old_xobj); | |
| 362 | |
| 363 matrix = pdf_dict_get_matrix(ctx, old_xobj, PDF_NAME(Matrix)); | |
| 364 transform = fz_concat(matrix, transform); | |
| 365 | |
| 366 fz_try(ctx) | |
| 367 { | |
| 368 new_xobj = pdf_add_object_drop(ctx, doc, pdf_copy_dict(ctx, old_xobj)); | |
| 369 pdf_filter_content_stream(ctx, doc, old_xobj, old_res, transform, options, struct_parents, &new_buf, &new_res, &cycle); | |
| 370 if (!options->no_update) | |
| 371 { | |
| 372 pdf_update_stream(ctx, doc, new_xobj, new_buf, 0); | |
| 373 pdf_dict_put(ctx, new_xobj, PDF_NAME(Resources), new_res); | |
| 374 } | |
| 375 } | |
| 376 fz_always(ctx) | |
| 377 { | |
| 378 fz_drop_buffer(ctx, new_buf); | |
| 379 pdf_drop_obj(ctx, new_res); | |
| 380 } | |
| 381 fz_catch(ctx) | |
| 382 { | |
| 383 pdf_drop_obj(ctx, new_xobj); | |
| 384 fz_rethrow(ctx); | |
| 385 } | |
| 386 | |
| 387 return new_xobj; | |
| 388 } | |
| 389 | |
| 390 void pdf_filter_page_contents(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_filter_options *options) | |
| 391 { | |
| 392 pdf_obj *contents, *old_res; | |
| 393 pdf_obj *new_res; | |
| 394 fz_buffer *buffer; | |
| 395 int struct_parents; | |
| 396 | |
| 397 struct_parents = pdf_dict_get_int_default(ctx, page->obj, PDF_NAME(StructParents), -1); | |
| 398 | |
| 399 contents = pdf_page_contents(ctx, page); | |
| 400 old_res = pdf_page_resources(ctx, page); | |
| 401 | |
| 402 pdf_filter_content_stream(ctx, doc, contents, old_res, fz_identity, options, struct_parents, &buffer, &new_res, NULL); | |
| 403 | |
| 404 fz_try(ctx) | |
| 405 { | |
| 406 if (options->complete) | |
| 407 options->complete(ctx, buffer, options->opaque); | |
| 408 if (!options->no_update) | |
| 409 { | |
| 410 /* Always create a new stream object to replace the page contents. This is useful | |
| 411 both if the contents is an array of streams, is entirely missing or if the contents | |
| 412 are shared between pages. */ | |
| 413 contents = pdf_add_object_drop(ctx, doc, pdf_new_dict(ctx, doc, 1)); | |
| 414 pdf_dict_put_drop(ctx, page->obj, PDF_NAME(Contents), contents); | |
| 415 pdf_update_stream(ctx, doc, contents, buffer, 0); | |
| 416 pdf_dict_put(ctx, page->obj, PDF_NAME(Resources), new_res); | |
| 417 } | |
| 418 } | |
| 419 fz_always(ctx) | |
| 420 { | |
| 421 fz_drop_buffer(ctx, buffer); | |
| 422 pdf_drop_obj(ctx, new_res); | |
| 423 } | |
| 424 fz_catch(ctx) | |
| 425 fz_rethrow(ctx); | |
| 426 } | |
| 427 | |
| 428 void pdf_filter_annot_contents(fz_context *ctx, pdf_document *doc, pdf_annot *annot, pdf_filter_options *options) | |
| 429 { | |
| 430 pdf_obj *ap = pdf_dict_get(ctx, annot->obj, PDF_NAME(AP)); | |
| 431 if (pdf_is_dict(ctx, ap)) | |
| 432 { | |
| 433 int i, n = pdf_dict_len(ctx, ap); | |
| 434 for (i = 0; i < n; i++) | |
| 435 { | |
| 436 pdf_obj *stm = pdf_dict_get_val(ctx, ap, i); | |
| 437 if (pdf_is_stream(ctx, stm)) | |
| 438 { | |
| 439 pdf_filter_xobject(ctx, doc, stm, NULL, options, NULL); | |
| 440 } | |
| 441 } | |
| 442 } | |
| 443 } | |
| 444 | |
| 445 /* REDACTIONS */ | |
| 446 | |
| 447 struct redact_filter_state { | |
| 448 pdf_filter_options filter_opts; | |
| 449 pdf_sanitize_filter_options sanitize_opts; | |
| 450 pdf_filter_factory filter_list[2]; | |
| 451 pdf_page *page; | |
| 452 pdf_annot *target; // NULL if all | |
| 453 int line_art; | |
| 454 int text; | |
| 455 }; | |
| 456 | |
| 457 | |
| 458 static void pdf_run_obj_to_buf(fz_context *ctx, fz_buffer *buffer, pdf_obj *obj, pdf_page *page) | |
| 459 { | |
| 460 pdf_processor *proc = pdf_new_buffer_processor(ctx, buffer, 0, 0); | |
| 461 pdf_obj *res; | |
| 462 | |
| 463 | |
| 464 fz_try(ctx) | |
| 465 { | |
| 466 res = pdf_xobject_resources(ctx, obj); | |
| 467 if (res == NULL) | |
| 468 res = pdf_page_resources(ctx, page); | |
| 469 | |
| 470 pdf_process_contents(ctx, proc, page->doc, res, obj, NULL, NULL); | |
| 471 pdf_close_processor(ctx, proc); | |
| 472 } | |
| 473 fz_always(ctx) | |
| 474 pdf_drop_processor(ctx, proc); | |
| 475 fz_catch(ctx) | |
| 476 fz_rethrow(ctx); | |
| 477 } | |
| 478 | |
| 479 static void | |
| 480 pdf_redact_end_page(fz_context *ctx, fz_buffer *buf, void *opaque) | |
| 481 { | |
| 482 struct redact_filter_state *red = opaque; | |
| 483 pdf_page *page = red->page; | |
| 484 pdf_annot *annot; | |
| 485 pdf_obj *qp; | |
| 486 int i, n; | |
| 487 | |
| 488 fz_append_string(ctx, buf, " 0 g\n"); | |
| 489 | |
| 490 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 491 { | |
| 492 if (red->target != NULL && red->target != annot) | |
| 493 continue; | |
| 494 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 495 { | |
| 496 pdf_obj *ro = pdf_dict_get(ctx, annot->obj, PDF_NAME(RO)); | |
| 497 if (ro) | |
| 498 { | |
| 499 pdf_run_obj_to_buf(ctx, buf, ro, page); | |
| 500 } | |
| 501 else | |
| 502 { | |
| 503 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 504 n = pdf_array_len(ctx, qp); | |
| 505 if (n > 0) | |
| 506 { | |
| 507 for (i = 0; i < n; i += 8) | |
| 508 { | |
| 509 fz_quad q = pdf_to_quad(ctx, qp, i); | |
| 510 fz_append_printf(ctx, buf, "%g %g m\n", q.ll.x, q.ll.y); | |
| 511 fz_append_printf(ctx, buf, "%g %g l\n", q.lr.x, q.lr.y); | |
| 512 fz_append_printf(ctx, buf, "%g %g l\n", q.ur.x, q.ur.y); | |
| 513 fz_append_printf(ctx, buf, "%g %g l\n", q.ul.x, q.ul.y); | |
| 514 fz_append_string(ctx, buf, "f\n"); | |
| 515 } | |
| 516 } | |
| 517 else | |
| 518 { | |
| 519 fz_rect r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 520 fz_append_printf(ctx, buf, "%g %g m\n", r.x0, r.y0); | |
| 521 fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y0); | |
| 522 fz_append_printf(ctx, buf, "%g %g l\n", r.x1, r.y1); | |
| 523 fz_append_printf(ctx, buf, "%g %g l\n", r.x0, r.y1); | |
| 524 fz_append_string(ctx, buf, "f\n"); | |
| 525 } | |
| 526 } | |
| 527 } | |
| 528 } | |
| 529 } | |
| 530 | |
| 531 static int | |
| 532 pdf_redact_text_filter(fz_context *ctx, void *opaque, int *ucsbuf, int ucslen, fz_matrix trm, fz_matrix ctm, fz_rect bbox) | |
| 533 { | |
| 534 struct redact_filter_state *red = opaque; | |
| 535 pdf_page *page = red->page; | |
| 536 pdf_annot *annot; | |
| 537 pdf_obj *qp; | |
| 538 fz_rect r; | |
| 539 fz_quad q; | |
| 540 int i, n; | |
| 541 float w, h; | |
| 542 | |
| 543 trm = fz_concat(trm, ctm); | |
| 544 bbox = fz_transform_rect(bbox, trm); | |
| 545 | |
| 546 /* Shrink character bbox a bit */ | |
| 547 w = bbox.x1 - bbox.x0; | |
| 548 h = bbox.y1 - bbox.y0; | |
| 549 bbox.x0 += w / 10; | |
| 550 bbox.x1 -= w / 10; | |
| 551 bbox.y0 += h / 10; | |
| 552 bbox.y1 -= h / 10; | |
| 553 | |
| 554 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 555 { | |
| 556 if (red->target != NULL && red->target != annot) | |
| 557 continue; | |
| 558 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 559 { | |
| 560 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 561 n = pdf_array_len(ctx, qp); | |
| 562 /* Note, we test for the intersection being a valid rectangle, NOT | |
| 563 * a non-empty one. This is because we can have 'empty' character | |
| 564 * boxes (say for diacritics), that while 0 width, do have a defined | |
| 565 * position on the plane, and hence inclusion makes sense. */ | |
| 566 if (n > 0) | |
| 567 { | |
| 568 for (i = 0; i < n; i += 8) | |
| 569 { | |
| 570 q = pdf_to_quad(ctx, qp, i); | |
| 571 r = fz_rect_from_quad(q); | |
| 572 if (fz_is_valid_rect(fz_intersect_rect(bbox, r))) | |
| 573 return 1; | |
| 574 } | |
| 575 } | |
| 576 else | |
| 577 { | |
| 578 r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 579 if (fz_is_valid_rect(fz_intersect_rect(bbox, r))) | |
| 580 return 1; | |
| 581 } | |
| 582 } | |
| 583 } | |
| 584 | |
| 585 return 0; | |
| 586 } | |
| 587 | |
| 588 static fz_pixmap * | |
| 589 pdf_redact_image_imp(fz_context *ctx, fz_matrix ctm, fz_image *image, fz_pixmap *pixmap, fz_pixmap **pmask, fz_quad q) | |
| 590 { | |
| 591 fz_matrix inv_ctm; | |
| 592 fz_irect r; | |
| 593 int x, y, k, n, bpp; | |
| 594 unsigned char white; | |
| 595 fz_pixmap *mask = *pmask; | |
| 596 int pixmap_cloned = 0; | |
| 597 | |
| 598 if (!pixmap) | |
| 599 { | |
| 600 fz_pixmap *original = fz_get_pixmap_from_image(ctx, image, NULL, NULL, NULL, NULL); | |
| 601 int imagemask = image->imagemask; | |
| 602 | |
| 603 fz_try(ctx) | |
| 604 { | |
| 605 pixmap = fz_clone_pixmap(ctx, original); | |
| 606 if (imagemask) | |
| 607 fz_invert_pixmap_alpha(ctx, pixmap); | |
| 608 } | |
| 609 fz_always(ctx) | |
| 610 fz_drop_pixmap(ctx, original); | |
| 611 fz_catch(ctx) | |
| 612 fz_rethrow(ctx); | |
| 613 pixmap_cloned = 1; | |
| 614 } | |
| 615 | |
| 616 if (!mask && image->mask) | |
| 617 { | |
| 618 fz_pixmap *original = fz_get_pixmap_from_image(ctx, image->mask, NULL, NULL, NULL, NULL); | |
| 619 | |
| 620 fz_try(ctx) | |
| 621 { | |
| 622 mask = fz_clone_pixmap(ctx, original); | |
| 623 *pmask = mask; | |
| 624 } | |
| 625 fz_always(ctx) | |
| 626 { | |
| 627 fz_drop_pixmap(ctx, original); | |
| 628 } | |
| 629 fz_catch(ctx) | |
| 630 { | |
| 631 if (pixmap_cloned) | |
| 632 fz_drop_pixmap(ctx, pixmap); | |
| 633 fz_rethrow(ctx); | |
| 634 } | |
| 635 } | |
| 636 | |
| 637 /* If we have a 1x1 image, to which a mask is being applied | |
| 638 * then it's the mask we really want to change, not the | |
| 639 * image. We might have just a small section of the image | |
| 640 * being covered, and setting the whole thing to white | |
| 641 * will blank stuff outside the desired area. */ | |
| 642 if (!mask || pixmap->w > 1 || pixmap->h > 1) | |
| 643 { | |
| 644 n = pixmap->n - pixmap->alpha; | |
| 645 bpp = pixmap->n; | |
| 646 if (fz_colorspace_is_subtractive(ctx, pixmap->colorspace)) | |
| 647 white = 0; | |
| 648 else | |
| 649 white = 255; | |
| 650 | |
| 651 inv_ctm = fz_post_scale(fz_invert_matrix(ctm), pixmap->w, pixmap->h); | |
| 652 r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm)); | |
| 653 r.x0 = fz_clampi(r.x0, 0, pixmap->w); | |
| 654 r.x1 = fz_clampi(r.x1, 0, pixmap->w); | |
| 655 r.y1 = fz_clampi(pixmap->h - r.y1, 0, pixmap->h); | |
| 656 r.y0 = fz_clampi(pixmap->h - r.y0, 0, pixmap->h); | |
| 657 for (y = r.y1; y < r.y0; ++y) | |
| 658 { | |
| 659 for (x = r.x0; x < r.x1; ++x) | |
| 660 { | |
| 661 unsigned char *s = &pixmap->samples[(size_t)y * pixmap->stride + (size_t)x * bpp]; | |
| 662 for (k = 0; k < n; ++k) | |
| 663 s[k] = white; | |
| 664 if (pixmap->alpha) | |
| 665 s[k] = 255; | |
| 666 } | |
| 667 } | |
| 668 } | |
| 669 | |
| 670 if (mask) | |
| 671 { | |
| 672 inv_ctm = fz_post_scale(fz_invert_matrix(ctm), mask->w, mask->h); | |
| 673 r = fz_round_rect(fz_transform_rect(fz_rect_from_quad(q), inv_ctm)); | |
| 674 r.x0 = fz_clampi(r.x0, 0, mask->w); | |
| 675 r.x1 = fz_clampi(r.x1, 0, mask->w); | |
| 676 r.y1 = fz_clampi(mask->h - r.y1, 0, mask->h); | |
| 677 r.y0 = fz_clampi(mask->h - r.y0, 0, mask->h); | |
| 678 for (y = r.y1; y < r.y0; ++y) | |
| 679 { | |
| 680 unsigned char *s = &mask->samples[(size_t)y * mask->stride + (size_t)r.x0]; | |
| 681 memset(s, 0xff, r.x1-r.x0); | |
| 682 } | |
| 683 } | |
| 684 | |
| 685 return pixmap; | |
| 686 } | |
| 687 | |
| 688 static fz_image * | |
| 689 pdf_redact_image_filter_remove(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip) | |
| 690 { | |
| 691 fz_pixmap *redacted = NULL; | |
| 692 struct redact_filter_state *red = opaque; | |
| 693 pdf_page *page = red->page; | |
| 694 pdf_annot *annot; | |
| 695 pdf_obj *qp; | |
| 696 fz_rect area; | |
| 697 fz_rect r; | |
| 698 int i, n; | |
| 699 | |
| 700 fz_var(redacted); | |
| 701 | |
| 702 area = fz_transform_rect(fz_unit_rect, ctm); | |
| 703 | |
| 704 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 705 { | |
| 706 if (red->target != NULL && red->target != annot) | |
| 707 continue; | |
| 708 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 709 { | |
| 710 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 711 n = pdf_array_len(ctx, qp); | |
| 712 if (n > 0) | |
| 713 { | |
| 714 for (i = 0; i < n; i += 8) | |
| 715 { | |
| 716 r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i)); | |
| 717 r = fz_intersect_rect(r, area); | |
| 718 if (!fz_is_empty_rect(r)) | |
| 719 return NULL; | |
| 720 } | |
| 721 } | |
| 722 else | |
| 723 { | |
| 724 r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 725 r = fz_intersect_rect(r, area); | |
| 726 if (!fz_is_empty_rect(r)) | |
| 727 return NULL; | |
| 728 } | |
| 729 } | |
| 730 } | |
| 731 | |
| 732 return fz_keep_image(ctx, image); | |
| 733 } | |
| 734 | |
| 735 static fz_image * | |
| 736 pdf_redact_image_filter_remove_invisible(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip) | |
| 737 { | |
| 738 fz_pixmap *redacted = NULL; | |
| 739 struct redact_filter_state *red = opaque; | |
| 740 pdf_page *page = red->page; | |
| 741 pdf_annot *annot; | |
| 742 pdf_obj *qp; | |
| 743 fz_rect area; | |
| 744 fz_rect r; | |
| 745 int i, n; | |
| 746 | |
| 747 fz_var(redacted); | |
| 748 | |
| 749 area = fz_transform_rect(fz_unit_rect, ctm); | |
| 750 | |
| 751 /* Restrict the are of the image to that which can actually be seen. */ | |
| 752 area = fz_intersect_rect(area, clip); | |
| 753 | |
| 754 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 755 { | |
| 756 if (red->target != NULL && red->target != annot) | |
| 757 continue; | |
| 758 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 759 { | |
| 760 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 761 n = pdf_array_len(ctx, qp); | |
| 762 if (n > 0) | |
| 763 { | |
| 764 for (i = 0; i < n; i += 8) | |
| 765 { | |
| 766 r = fz_rect_from_quad(pdf_to_quad(ctx, qp, i)); | |
| 767 r = fz_intersect_rect(r, area); | |
| 768 if (!fz_is_empty_rect(r)) | |
| 769 return NULL; | |
| 770 } | |
| 771 } | |
| 772 else | |
| 773 { | |
| 774 r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 775 r = fz_intersect_rect(r, area); | |
| 776 if (!fz_is_empty_rect(r)) | |
| 777 return NULL; | |
| 778 } | |
| 779 } | |
| 780 } | |
| 781 | |
| 782 return fz_keep_image(ctx, image); | |
| 783 } | |
| 784 | |
| 785 static fz_image * | |
| 786 pdf_redact_image_filter_pixels(fz_context *ctx, void *opaque, fz_matrix ctm, const char *name, fz_image *image, fz_rect clip) | |
| 787 { | |
| 788 fz_pixmap *redacted = NULL; | |
| 789 fz_pixmap *mask = NULL; | |
| 790 struct redact_filter_state *red = opaque; | |
| 791 pdf_page *page = red->page; | |
| 792 pdf_annot *annot; | |
| 793 pdf_obj *qp; | |
| 794 fz_quad area, q; | |
| 795 fz_rect r; | |
| 796 int i, n; | |
| 797 | |
| 798 fz_var(redacted); | |
| 799 fz_var(mask); | |
| 800 | |
| 801 area = fz_transform_quad(fz_quad_from_rect(fz_unit_rect), ctm); | |
| 802 | |
| 803 /* First see if we can redact the image completely */ | |
| 804 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 805 { | |
| 806 if (red->target != NULL && red->target != annot) | |
| 807 continue; | |
| 808 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 809 { | |
| 810 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 811 n = pdf_array_len(ctx, qp); | |
| 812 if (n > 0) | |
| 813 { | |
| 814 for (i = 0; i < n; i += 8) | |
| 815 { | |
| 816 q = pdf_to_quad(ctx, qp, i); | |
| 817 if (fz_is_quad_inside_quad(area, q)) | |
| 818 return NULL; | |
| 819 } | |
| 820 } | |
| 821 else | |
| 822 { | |
| 823 r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 824 q = fz_quad_from_rect(r); | |
| 825 if (fz_is_quad_inside_quad(area, q)) | |
| 826 return NULL; | |
| 827 } | |
| 828 } | |
| 829 } | |
| 830 | |
| 831 /* Blank out redacted parts of the image if necessary */ | |
| 832 fz_try(ctx) | |
| 833 { | |
| 834 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 835 { | |
| 836 if (red->target != NULL && red->target != annot) | |
| 837 continue; | |
| 838 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 839 { | |
| 840 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 841 n = pdf_array_len(ctx, qp); | |
| 842 if (n > 0) | |
| 843 { | |
| 844 for (i = 0; i < n; i += 8) | |
| 845 { | |
| 846 q = pdf_to_quad(ctx, qp, i); | |
| 847 if (fz_is_quad_intersecting_quad(area, q)) | |
| 848 redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q); | |
| 849 } | |
| 850 } | |
| 851 else | |
| 852 { | |
| 853 r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 854 q = fz_quad_from_rect(r); | |
| 855 if (fz_is_quad_intersecting_quad(area, q)) | |
| 856 redacted = pdf_redact_image_imp(ctx, ctm, image, redacted, &mask, q); | |
| 857 } | |
| 858 } | |
| 859 } | |
| 860 } | |
| 861 fz_catch(ctx) | |
| 862 { | |
| 863 fz_drop_pixmap(ctx, redacted); | |
| 864 fz_drop_pixmap(ctx, mask); | |
| 865 fz_rethrow(ctx); | |
| 866 } | |
| 867 | |
| 868 if (redacted) | |
| 869 { | |
| 870 int imagemask = image->imagemask; | |
| 871 fz_image *imask = fz_keep_image(ctx, image->mask); | |
| 872 | |
| 873 fz_var(imask); | |
| 874 | |
| 875 fz_try(ctx) | |
| 876 { | |
| 877 if (mask) | |
| 878 { | |
| 879 fz_drop_image(ctx, imask); | |
| 880 imask = NULL; | |
| 881 imask = fz_new_image_from_pixmap(ctx, mask, NULL); | |
| 882 } | |
| 883 image = fz_new_image_from_pixmap(ctx, redacted, NULL); | |
| 884 image->imagemask = imagemask; | |
| 885 image->mask = imask; | |
| 886 imask = NULL; | |
| 887 } | |
| 888 fz_always(ctx) | |
| 889 { | |
| 890 fz_drop_pixmap(ctx, redacted); | |
| 891 fz_drop_pixmap(ctx, mask); | |
| 892 fz_drop_image(ctx, imask); | |
| 893 } | |
| 894 fz_catch(ctx) | |
| 895 fz_rethrow(ctx); | |
| 896 return image; | |
| 897 } | |
| 898 | |
| 899 return fz_keep_image(ctx, image); | |
| 900 } | |
| 901 | |
| 902 /* Returns 0 if area does not intersect with any of our redactions. | |
| 903 * Returns 2 if area is completely included within one of our redactions. | |
| 904 * Returns 1 otherwise. */ | |
| 905 static int | |
| 906 rect_touches_redactions(fz_context *ctx, fz_rect area, struct redact_filter_state *red) | |
| 907 { | |
| 908 pdf_annot *annot; | |
| 909 pdf_obj *qp; | |
| 910 fz_quad q; | |
| 911 fz_rect r, s; | |
| 912 int i, n; | |
| 913 pdf_page *page = red->page; | |
| 914 | |
| 915 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 916 { | |
| 917 if (red->target != NULL && red->target != annot) | |
| 918 continue; | |
| 919 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 920 { | |
| 921 qp = pdf_dict_get(ctx, annot->obj, PDF_NAME(QuadPoints)); | |
| 922 n = pdf_array_len(ctx, qp); | |
| 923 if (n > 0) | |
| 924 { | |
| 925 for (i = 0; i < n; i += 8) | |
| 926 { | |
| 927 q = pdf_to_quad(ctx, qp, i); | |
| 928 r = fz_rect_from_quad(q); | |
| 929 s = fz_intersect_rect(r, area); | |
| 930 if (!fz_is_empty_rect(s)) | |
| 931 { | |
| 932 if (fz_contains_rect(r, area)) | |
| 933 return 2; | |
| 934 return 1; | |
| 935 } | |
| 936 } | |
| 937 } | |
| 938 else | |
| 939 { | |
| 940 r = pdf_dict_get_rect(ctx, annot->obj, PDF_NAME(Rect)); | |
| 941 s = fz_intersect_rect(r, area); | |
| 942 if (!fz_is_empty_rect(s)) | |
| 943 { | |
| 944 if (fz_contains_rect(r, area)) | |
| 945 return 2; | |
| 946 return 1; | |
| 947 } | |
| 948 } | |
| 949 } | |
| 950 } | |
| 951 return 0; | |
| 952 } | |
| 953 | |
| 954 static void | |
| 955 pdf_redact_page_links(fz_context *ctx, struct redact_filter_state *red) | |
| 956 { | |
| 957 pdf_obj *annots; | |
| 958 pdf_obj *link; | |
| 959 fz_rect area; | |
| 960 int k; | |
| 961 | |
| 962 annots = pdf_dict_get(ctx, red->page->obj, PDF_NAME(Annots)); | |
| 963 k = 0; | |
| 964 while (k < pdf_array_len(ctx, annots)) | |
| 965 { | |
| 966 link = pdf_array_get(ctx, annots, k); | |
| 967 if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link)) | |
| 968 { | |
| 969 area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect)); | |
| 970 if (rect_touches_redactions(ctx, area, red)) | |
| 971 { | |
| 972 pdf_array_delete(ctx, annots, k); | |
| 973 continue; | |
| 974 } | |
| 975 } | |
| 976 ++k; | |
| 977 } | |
| 978 } | |
| 979 | |
| 980 static void | |
| 981 pdf_redact_page_annotations(fz_context *ctx, struct redact_filter_state *red) | |
| 982 { | |
| 983 pdf_annot *annot; | |
| 984 fz_rect area; | |
| 985 | |
| 986 restart: | |
| 987 for (annot = pdf_first_annot(ctx, red->page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 988 { | |
| 989 if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT) | |
| 990 { | |
| 991 area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect)); | |
| 992 if (rect_touches_redactions(ctx, area, red)) | |
| 993 { | |
| 994 pdf_delete_annot(ctx, red->page, annot); | |
| 995 goto restart; | |
| 996 } | |
| 997 } | |
| 998 } | |
| 999 } | |
| 1000 | |
| 1001 static int culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type) | |
| 1002 { | |
| 1003 struct redact_filter_state *red = opaque; | |
| 1004 | |
| 1005 switch (type) | |
| 1006 { | |
| 1007 case FZ_CULL_PATH_FILL: | |
| 1008 case FZ_CULL_PATH_STROKE: | |
| 1009 case FZ_CULL_PATH_FILL_STROKE: | |
| 1010 case FZ_CULL_CLIP_PATH_FILL: | |
| 1011 case FZ_CULL_CLIP_PATH_STROKE: | |
| 1012 case FZ_CULL_CLIP_PATH_FILL_STROKE: | |
| 1013 if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_COVERED) | |
| 1014 return (rect_touches_redactions(ctx, bbox, red) == 2); | |
| 1015 else if (red->line_art == PDF_REDACT_LINE_ART_REMOVE_IF_TOUCHED) | |
| 1016 return (rect_touches_redactions(ctx, bbox, red) != 0); | |
| 1017 return 0; | |
| 1018 default: | |
| 1019 return 0; | |
| 1020 } | |
| 1021 } | |
| 1022 | |
| 1023 static | |
| 1024 void init_redact_filter(fz_context *ctx, pdf_redact_options *redact_opts, struct redact_filter_state *red, pdf_page *page, pdf_annot *target) | |
| 1025 { | |
| 1026 int black_boxes = redact_opts ? redact_opts->black_boxes : 0; | |
| 1027 int image_method = redact_opts ? redact_opts->image_method : PDF_REDACT_IMAGE_PIXELS; | |
| 1028 int line_art = redact_opts ? redact_opts->line_art : PDF_REDACT_LINE_ART_NONE; | |
| 1029 int text = redact_opts ? redact_opts->text : PDF_REDACT_TEXT_REMOVE; | |
| 1030 | |
| 1031 memset(&red->filter_opts, 0, sizeof red->filter_opts); | |
| 1032 memset(&red->sanitize_opts, 0, sizeof red->sanitize_opts); | |
| 1033 | |
| 1034 red->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */ | |
| 1035 red->filter_opts.instance_forms = 1; /* redact xobjects with instancing */ | |
| 1036 red->filter_opts.ascii = 1; | |
| 1037 red->filter_opts.opaque = red; | |
| 1038 red->filter_opts.filters = red->filter_list; | |
| 1039 if (black_boxes) | |
| 1040 red->filter_opts.complete = pdf_redact_end_page; | |
| 1041 red->line_art = line_art; | |
| 1042 red->text = text; | |
| 1043 | |
| 1044 red->sanitize_opts.opaque = red; | |
| 1045 if (text == PDF_REDACT_TEXT_REMOVE) | |
| 1046 red->sanitize_opts.text_filter = pdf_redact_text_filter; | |
| 1047 if (image_method == PDF_REDACT_IMAGE_PIXELS) | |
| 1048 red->sanitize_opts.image_filter = pdf_redact_image_filter_pixels; | |
| 1049 if (image_method == PDF_REDACT_IMAGE_REMOVE) | |
| 1050 red->sanitize_opts.image_filter = pdf_redact_image_filter_remove; | |
| 1051 if (image_method == PDF_REDACT_IMAGE_REMOVE_UNLESS_INVISIBLE) | |
| 1052 red->sanitize_opts.image_filter = pdf_redact_image_filter_remove_invisible; | |
| 1053 red->sanitize_opts.culler = culler; | |
| 1054 | |
| 1055 red->filter_list[0].filter = pdf_new_sanitize_filter; | |
| 1056 red->filter_list[0].options = &red->sanitize_opts; | |
| 1057 red->filter_list[1].filter = NULL; | |
| 1058 red->filter_list[1].options = NULL; | |
| 1059 | |
| 1060 red->page = page; | |
| 1061 red->target = target; | |
| 1062 } | |
| 1063 | |
| 1064 static int | |
| 1065 pdf_apply_redaction_imp(fz_context *ctx, pdf_page *page, pdf_annot *target, pdf_redact_options *redact_opts) | |
| 1066 { | |
| 1067 pdf_annot *annot; | |
| 1068 int has_redactions = 0; | |
| 1069 struct redact_filter_state red; | |
| 1070 pdf_document *doc = page->doc; | |
| 1071 | |
| 1072 for (annot = pdf_first_annot(ctx, page); annot; annot = pdf_next_annot(ctx, annot)) { | |
| 1073 if (target != NULL && target != annot) | |
| 1074 continue; | |
| 1075 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 1076 has_redactions = 1; | |
| 1077 } | |
| 1078 | |
| 1079 if (!has_redactions) | |
| 1080 return 0; | |
| 1081 | |
| 1082 init_redact_filter(ctx, redact_opts, &red, page, target); | |
| 1083 | |
| 1084 if (target) | |
| 1085 pdf_begin_operation(ctx, doc, "Apply redaction"); | |
| 1086 else | |
| 1087 pdf_begin_operation(ctx, doc, "Apply redactions on page"); | |
| 1088 fz_try(ctx) | |
| 1089 { | |
| 1090 pdf_filter_page_contents(ctx, doc, page, &red.filter_opts); | |
| 1091 pdf_redact_page_links(ctx, &red); | |
| 1092 pdf_redact_page_annotations(ctx, &red); | |
| 1093 | |
| 1094 annot = pdf_first_annot(ctx, page); | |
| 1095 while (annot) | |
| 1096 { | |
| 1097 if (target == NULL || annot == target) | |
| 1098 { | |
| 1099 if (pdf_dict_get(ctx, annot->obj, PDF_NAME(Subtype)) == PDF_NAME(Redact)) | |
| 1100 { | |
| 1101 pdf_delete_annot(ctx, page, annot); | |
| 1102 annot = pdf_first_annot(ctx, page); | |
| 1103 continue; | |
| 1104 } | |
| 1105 } | |
| 1106 annot = pdf_next_annot(ctx, annot); | |
| 1107 } | |
| 1108 | |
| 1109 doc->redacted = 1; | |
| 1110 pdf_end_operation(ctx, doc); | |
| 1111 } | |
| 1112 fz_catch(ctx) | |
| 1113 { | |
| 1114 pdf_abandon_operation(ctx, doc); | |
| 1115 fz_rethrow(ctx); | |
| 1116 } | |
| 1117 | |
| 1118 return 1; | |
| 1119 } | |
| 1120 | |
| 1121 int | |
| 1122 pdf_redact_page(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_redact_options *redact_opts) | |
| 1123 { | |
| 1124 if (page == NULL || page->doc != doc) | |
| 1125 fz_throw(ctx, FZ_ERROR_ARGUMENT, "Can't redact a page not from the doc"); | |
| 1126 return pdf_apply_redaction_imp(ctx, page, NULL, redact_opts); | |
| 1127 } | |
| 1128 | |
| 1129 int | |
| 1130 pdf_apply_redaction(fz_context *ctx, pdf_annot *annot, pdf_redact_options *redact_opts) | |
| 1131 { | |
| 1132 return pdf_apply_redaction_imp(ctx, annot->page, annot, redact_opts); | |
| 1133 } | |
| 1134 | |
| 1135 /* Hard clipping of pages */ | |
| 1136 | |
| 1137 struct clip_filter_state { | |
| 1138 pdf_filter_options filter_opts; | |
| 1139 pdf_sanitize_filter_options sanitize_opts; | |
| 1140 pdf_filter_factory filter_list[2]; | |
| 1141 pdf_page *page; | |
| 1142 fz_rect clip; | |
| 1143 }; | |
| 1144 | |
| 1145 static int clip_culler(fz_context *ctx, void *opaque, fz_rect bbox, fz_cull_type type) | |
| 1146 { | |
| 1147 struct clip_filter_state *hc = opaque; | |
| 1148 | |
| 1149 switch (type) | |
| 1150 { | |
| 1151 case FZ_CULL_PATH_FILL: | |
| 1152 case FZ_CULL_PATH_STROKE: | |
| 1153 case FZ_CULL_PATH_FILL_STROKE: | |
| 1154 case FZ_CULL_CLIP_PATH_FILL: | |
| 1155 case FZ_CULL_CLIP_PATH_STROKE: | |
| 1156 case FZ_CULL_CLIP_PATH_FILL_STROKE: | |
| 1157 case FZ_CULL_GLYPH: | |
| 1158 case FZ_CULL_IMAGE: | |
| 1159 case FZ_CULL_SHADING: | |
| 1160 return (fz_is_empty_rect(fz_intersect_rect(bbox, hc->clip))); | |
| 1161 default: | |
| 1162 return 0; | |
| 1163 } | |
| 1164 } | |
| 1165 | |
| 1166 static | |
| 1167 void init_clip_filter(fz_context *ctx, struct clip_filter_state *hc, pdf_page *page, fz_rect *clip) | |
| 1168 { | |
| 1169 memset(&hc->filter_opts, 0, sizeof hc->filter_opts); | |
| 1170 memset(&hc->sanitize_opts, 0, sizeof hc->sanitize_opts); | |
| 1171 | |
| 1172 hc->filter_opts.recurse = 0; /* don't redact patterns, softmasks, and type3 fonts */ | |
| 1173 hc->filter_opts.instance_forms = 1; /* redact xobjects with instancing */ | |
| 1174 hc->filter_opts.ascii = 0; | |
| 1175 hc->filter_opts.opaque = hc; | |
| 1176 hc->filter_opts.filters = hc->filter_list; | |
| 1177 hc->clip = *clip; | |
| 1178 | |
| 1179 hc->sanitize_opts.opaque = hc; | |
| 1180 hc->sanitize_opts.culler = clip_culler; | |
| 1181 | |
| 1182 hc->filter_list[0].filter = pdf_new_sanitize_filter; | |
| 1183 hc->filter_list[0].options = &hc->sanitize_opts; | |
| 1184 hc->filter_list[1].filter = NULL; | |
| 1185 hc->filter_list[1].options = NULL; | |
| 1186 | |
| 1187 hc->page = page; | |
| 1188 } | |
| 1189 | |
| 1190 static void | |
| 1191 pdf_clip_page_links(fz_context *ctx, struct clip_filter_state *hc) | |
| 1192 { | |
| 1193 pdf_obj *annots; | |
| 1194 pdf_obj *link; | |
| 1195 fz_rect area; | |
| 1196 int k; | |
| 1197 | |
| 1198 annots = pdf_dict_get(ctx, hc->page->obj, PDF_NAME(Annots)); | |
| 1199 k = 0; | |
| 1200 while (k < pdf_array_len(ctx, annots)) | |
| 1201 { | |
| 1202 link = pdf_array_get(ctx, annots, k); | |
| 1203 if (pdf_dict_get(ctx, link, PDF_NAME(Subtype)) == PDF_NAME(Link)) | |
| 1204 { | |
| 1205 area = pdf_dict_get_rect(ctx, link, PDF_NAME(Rect)); | |
| 1206 if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip))) | |
| 1207 { | |
| 1208 pdf_array_delete(ctx, annots, k); | |
| 1209 continue; | |
| 1210 } | |
| 1211 } | |
| 1212 ++k; | |
| 1213 } | |
| 1214 } | |
| 1215 | |
| 1216 static void | |
| 1217 pdf_clip_page_annotations(fz_context *ctx, struct clip_filter_state *hc) | |
| 1218 { | |
| 1219 pdf_annot *annot; | |
| 1220 fz_rect area; | |
| 1221 | |
| 1222 restart: | |
| 1223 for (annot = pdf_first_annot(ctx, hc->page); annot; annot = pdf_next_annot(ctx, annot)) | |
| 1224 { | |
| 1225 if (pdf_annot_type(ctx, annot) == PDF_ANNOT_FREE_TEXT) | |
| 1226 { | |
| 1227 area = pdf_dict_get_rect(ctx, pdf_annot_obj(ctx, annot), PDF_NAME(Rect)); | |
| 1228 if (fz_is_empty_rect(fz_intersect_rect(area, hc->clip))) | |
| 1229 { | |
| 1230 pdf_delete_annot(ctx, hc->page, annot); | |
| 1231 goto restart; | |
| 1232 } | |
| 1233 } | |
| 1234 } | |
| 1235 } | |
| 1236 | |
| 1237 void | |
| 1238 pdf_clip_page(fz_context *ctx, pdf_page *page, fz_rect *clip) | |
| 1239 { | |
| 1240 pdf_document *doc; | |
| 1241 struct clip_filter_state hc; | |
| 1242 | |
| 1243 if (page == NULL) | |
| 1244 return; | |
| 1245 | |
| 1246 doc = page->doc; | |
| 1247 | |
| 1248 init_clip_filter(ctx, &hc, page, clip); | |
| 1249 | |
| 1250 pdf_begin_operation(ctx, doc, "Apply hard clip to page"); | |
| 1251 fz_try(ctx) | |
| 1252 { | |
| 1253 pdf_filter_page_contents(ctx, doc, page, &hc.filter_opts); | |
| 1254 pdf_clip_page_links(ctx, &hc); | |
| 1255 pdf_clip_page_annotations(ctx, &hc); | |
| 1256 pdf_end_operation(ctx, doc); | |
| 1257 } | |
| 1258 fz_catch(ctx) | |
| 1259 { | |
| 1260 pdf_abandon_operation(ctx, doc); | |
| 1261 fz_rethrow(ctx); | |
| 1262 } | |
| 1263 } |
