comparison mupdf-source/source/tools/pdftrim.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2023 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 /* PDF content trimming tool. */
24
25 #include "mupdf/fitz.h"
26 #include "mupdf/pdf.h"
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <ctype.h>
32
33 typedef struct
34 {
35 fz_rect cullbox;
36 int exclude;
37 } culler_data_t;
38
39 static int
40 culler(fz_context *ctx, void *opaque, fz_rect r, fz_cull_type type)
41 {
42 culler_data_t *cd = (culler_data_t *)opaque;
43
44 r = fz_intersect_rect(r, cd->cullbox);
45 if (cd->exclude)
46 {
47 if (!fz_is_empty_rect(r))
48 return 1;
49 }
50 else
51 {
52 if (fz_is_empty_rect(r))
53 return 1;
54 }
55
56 return 0;
57 }
58
59 static void
60 rewrite_page_streams(fz_context *ctx, pdf_document *doc, int page_num, fz_box_type box, float *margins, int exclude, int fallback)
61 {
62 pdf_page *page = pdf_load_page(ctx, doc, page_num);
63 pdf_filter_options options = { 0 };
64 pdf_filter_factory list[2] = { 0 };
65 pdf_sanitize_filter_options sopts = { 0 };
66 pdf_annot *annot;
67 culler_data_t cd;
68
69 cd.exclude = exclude;
70 sopts.opaque = &cd;
71 sopts.culler = culler;
72 options.filters = list;
73 options.recurse = 1;
74 list[0].filter = pdf_new_sanitize_filter;
75 list[0].options = &sopts;
76
77 fz_try(ctx)
78 {
79 switch (box)
80 {
81 default:
82 case FZ_MEDIA_BOX:
83 cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(MediaBox));
84 break;
85 case FZ_BLEED_BOX:
86 cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(BleedBox));
87 break;
88 case FZ_CROP_BOX:
89 cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(CropBox));
90 break;
91 case FZ_TRIM_BOX:
92 cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(TrimBox));
93 break;
94 case FZ_ART_BOX:
95 cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(ArtBox));
96 break;
97 }
98
99 cd.cullbox.x0 += margins[3];
100 cd.cullbox.y0 += margins[2];
101 cd.cullbox.x1 -= margins[1];
102 cd.cullbox.y1 -= margins[0];
103
104 if (fz_is_empty_rect(cd.cullbox) && fallback && box != FZ_MEDIA_BOX)
105 {
106 fprintf(stderr, "Falling back to Mediabox for page %d\n", page_num);
107 cd.cullbox = pdf_dict_get_rect(ctx, page->obj, PDF_NAME(MediaBox));
108 }
109 if (fz_is_empty_rect(cd.cullbox))
110 {
111 fprintf(stderr, "No box found for page %d\n", page_num);
112 break;
113 }
114
115 pdf_filter_page_contents(ctx, doc, page, &options);
116
117 for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
118 pdf_filter_annot_contents(ctx, doc, annot, &options);
119 }
120 fz_always(ctx)
121 fz_drop_page(ctx, &page->super);
122 fz_catch(ctx)
123 fz_rethrow(ctx);
124 }
125
126 static char *
127 skip_comma(char *s)
128 {
129 while (isspace(*s))
130 s++;
131 if (*s == ',')
132 s++;
133 while (isspace(*s))
134 s++;
135 return s;
136 }
137
138 static void
139 read_margins(float *margin, char *arg)
140 {
141 char *e;
142
143 /* A single one reads for all margins. */
144 margin[0] = fz_strtof(arg, &e);
145 margin[1] = margin[2] = margin[3] = margin[0];
146 e = skip_comma(e);
147 if (*e == 0)
148 return;
149 /* 2 entries reads for V,H. */
150 margin[1] = fz_strtof(e, &e);
151 margin[3] = margin[1];
152 e = skip_comma(e);
153 if (*e == 0)
154 return;
155 /* 4 entries reads for T,R,B,L. */
156 margin[2] = fz_strtof(e, &e);
157 margin[3] = 0;
158 e = skip_comma(e);
159 if (*e == 0)
160 return;
161 margin[3] = fz_strtof(e, &e);
162 }
163
164 static int
165 usage(void)
166 {
167 fprintf(stderr, "usage: mutool trim [options] <input filename>\n");
168 fprintf(stderr, "\t-b -\tWhich box to trim to (MediaBox(default), CropBox, BleedBox, TrimBox, ArtBox)\n");
169 fprintf(stderr, "\t-m -\tAdd margins to box (+ve for inwards, -ve outwards).\n");
170 fprintf(stderr, "\t\t\t<All> or <V>,<H> or <T>,<R>,<B>,<L>\n");
171 fprintf(stderr, "\t-e\tExclude contents of box, rather than include them\n");
172 fprintf(stderr, "\t-f\tFallback to mediabox if specified box not available\n");
173 fprintf(stderr, "\t-o -\tOutput file\n");
174 return 1;
175 }
176
177 int pdftrim_main(int argc, char **argv)
178 {
179 fz_context *ctx = NULL;
180 pdf_document *pdf = NULL;
181 fz_document *doc = NULL;
182 pdf_write_options opts = pdf_default_write_options;
183 int n, i;
184 char *infile = NULL;
185 char *outputfile = NULL;
186 int code = EXIT_SUCCESS;
187 int exclude = 0;
188 const char *boxname = NULL;
189 fz_box_type box = FZ_CROP_BOX;
190 int fallback = 0;
191 float margins[4] = { 0 };
192 int c;
193
194 while ((c = fz_getopt(argc, argv, "b:o:efm:")) != -1)
195 {
196 switch (c)
197 {
198 default: return usage();
199
200 case 'b': boxname = fz_optarg; break;
201 case 'o': outputfile = fz_optpath(fz_optarg); break;
202 case 'e': exclude = 1; break;
203 case 'f': fallback = 1; break;
204 case 'm': read_margins(margins, fz_optarg); break;
205 }
206 }
207
208 if (fz_optind == argc)
209 return usage();
210
211 infile = argv[fz_optind];
212
213 if (boxname)
214 {
215 box = fz_box_type_from_string(boxname);
216 if (box == FZ_UNKNOWN_BOX)
217 {
218 fprintf(stderr, "Unknown box %s specified!\n", boxname);
219 return 1;
220 }
221 }
222
223 /* Set up the options for the file saving. */
224 #if 1
225 opts.do_compress = 1;
226 opts.do_compress_images = 1;
227 opts.do_compress_fonts = 1;
228 opts.do_garbage = 3;
229 #else
230 opts.do_compress = 0;
231 opts.do_pretty = 1;
232 opts.do_compress = 0;
233 opts.do_compress_images = 1;
234 opts.do_compress_fonts = 0;
235 opts.do_garbage = 0;
236 opts.do_clean = 1;
237 #endif
238
239 /* Create a MuPDF library context. */
240 ctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
241 if (!ctx)
242 {
243 fprintf(stderr, "Could not create global context.\n");
244 return EXIT_FAILURE;
245 }
246
247 /* Register the document handlers (only really need PDF, but this is
248 * the simplest way. */
249 fz_register_document_handlers(ctx);
250
251 fz_try(ctx)
252 {
253 /* Load the input document. */
254 doc = fz_open_document(ctx, infile);
255
256 /* Get a PDF specific pointer, and count the pages. */
257 pdf = pdf_document_from_fz_document(ctx, doc);
258 n = fz_count_pages(ctx, doc);
259
260 for (i = 0; i < n; i++)
261 rewrite_page_streams(ctx, pdf, i, box, margins, exclude, fallback);
262
263 pdf_save_document(ctx, pdf, outputfile, &opts);
264 }
265 fz_always(ctx)
266 {
267 fz_drop_document(ctx, doc);
268 }
269 fz_catch(ctx)
270 {
271 fz_report_error(ctx);
272 code = EXIT_FAILURE;
273 }
274 fz_drop_context(ctx);
275
276 return code;
277 }