comparison mupdf-source/source/tools/pdfmerge.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 /*
24 * PDF merge tool: Tool for merging pdf content.
25 *
26 * Simple test bed to work with merging pages from multiple PDFs into a single PDF.
27 */
28
29 #include "mupdf/fitz.h"
30 #include "mupdf/pdf.h"
31
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35
36 static int usage(void)
37 {
38 fprintf(stderr,
39 "usage: mutool merge [-o output.pdf] [-O options] input.pdf [pages] [input2.pdf] [pages2] ...\n"
40 "\t-o -\tname of PDF file to create\n"
41 "\t-O -\tcomma separated list of output options\n"
42 "\tinput.pdf\tname of input file from which to copy pages\n"
43 "\tpages\tcomma separated list of page numbers and ranges\n\n"
44 );
45 fputs(fz_pdf_write_options_usage, stderr);
46 return 1;
47 }
48
49 static pdf_document *doc_des = NULL;
50 static pdf_document *doc_src = NULL;
51 int output_page_count = 0;
52
53 static void page_merge(fz_context *ctx, int page_from, int page_to, pdf_graft_map *graft_map)
54 {
55 pdf_graft_mapped_page(ctx, graft_map, page_to - 1, doc_src, page_from - 1);
56 }
57
58 /*
59 While we are processing, it_src tracks the current position we are copying from.
60
61 items is the list of things we have stepped through to get to the current position.
62 A prefix of these items may have already been copied across. copied_to_depth is
63 the length of that prefix. 0 < = copied_to_depth <= len.
64 */
65 typedef struct
66 {
67 fz_context *ctx;
68 fz_outline_iterator *it_dst;
69 fz_outline_iterator *it_src;
70 const char *range;
71 int page_count;
72 int max;
73 int len;
74 fz_outline_item *items;
75 int copied_to_depth;
76 int page_output_base;
77 } cor_state;
78
79 /* Given a range, and a page in the range 1 to count, return the position
80 * which the page occupies in the output range (or 0 for not in range).
81 * So page 12 within 10-20 would return 3.
82 */
83 static int
84 position_in_range(fz_context *ctx, const char *range, int count, int page)
85 {
86 int start, end;
87 int n = 0;
88
89 while ((range = fz_parse_page_range(ctx, range, &start, &end, count)))
90 {
91 if (start < end)
92 {
93 if (start <= page && page <= end)
94 return n + page - start + 1;
95 n += end - start + 1;
96 }
97 else
98 {
99 if (end <= page && page <= start)
100 return n + page - end + 1;
101 n += start - end + 1;
102 }
103 }
104
105 return 0;
106 }
107
108 static void
109 copy_item(cor_state *cor)
110 {
111 fz_context *ctx = cor->ctx;
112
113 while (cor->copied_to_depth < cor->len)
114 {
115 /* All items copied in a run get the same uri - that of the last one. */
116 fz_outline_item item = cor->items[cor->copied_to_depth];
117 item.uri = cor->items[cor->len-1].uri;
118 fz_outline_iterator_insert(ctx, cor->it_dst, &item);
119 cor->copied_to_depth++;
120 fz_outline_iterator_prev(ctx, cor->it_dst);
121 fz_outline_iterator_down(ctx, cor->it_dst);
122 }
123 }
124
125 static char *
126 rewrite_page(fz_context *ctx, const char *uri, int n)
127 {
128 const char *p;
129
130 if (uri == NULL)
131 return NULL;
132
133 if (strncmp(uri, "#page=", 6) != 0)
134 return fz_strdup(ctx, uri);
135 p = strchr(uri+6, '&');
136 if (p == NULL)
137 return fz_asprintf(ctx, "#page=%d", n);
138
139 return fz_asprintf(ctx, "#page=%d%s", n, p);
140 }
141
142 static void
143 do_copy_outline_range(cor_state *cor)
144 {
145 fz_context *ctx = cor->ctx;
146
147 do
148 {
149 int has_children;
150 float x, y;
151 fz_outline_item *item = fz_outline_iterator_item(ctx, cor->it_src);
152 int page_num = fz_page_number_from_location(ctx, (fz_document *)doc_src, fz_resolve_link(ctx, (fz_document *)doc_src, item->uri, &x, &y));
153 int page_in_range = position_in_range(ctx, cor->range, cor->page_count, page_num+1);
154 int new_page_number = page_in_range + cor->page_output_base;
155
156 if (cor->len == cor->max)
157 {
158 int newmax = cor->max ? cor->max * 2 : 8;
159 cor->items = fz_realloc_array(ctx, cor->items, newmax, fz_outline_item);
160 cor->max = newmax;
161 }
162 cor->len++;
163 cor->items[cor->len-1].title = NULL;
164 cor->items[cor->len-1].uri = NULL;
165 cor->items[cor->len-1].is_open = item->is_open;
166 cor->items[cor->len-1].title = item->title ? fz_strdup(ctx, item->title) : NULL;
167 cor->items[cor->len-1].uri = rewrite_page(ctx, item->uri, new_page_number);
168
169 if (page_in_range != 0)
170 copy_item(cor);
171
172 has_children = fz_outline_iterator_down(ctx, cor->it_src);
173 if (has_children == 0)
174 do_copy_outline_range(cor);
175 if (has_children >= 0)
176 fz_outline_iterator_up(ctx, cor->it_src);
177
178 cor->len--;
179 if (cor->copied_to_depth > cor->len)
180 {
181 cor->copied_to_depth = cor->len;
182 fz_outline_iterator_up(ctx, cor->it_dst);
183 }
184 fz_outline_iterator_next(ctx, cor->it_dst);
185 fz_free(ctx, cor->items[cor->len].title);
186 fz_free(ctx, cor->items[cor->len].uri);
187 }
188 while (fz_outline_iterator_next(ctx, cor->it_src) == 0);
189 }
190
191 static void
192 copy_outline_range(fz_context *ctx, fz_outline_iterator *it_dst, fz_outline_iterator *it_src, const char *range, int page_count, int page_output_base)
193 {
194 cor_state cor;
195
196 cor.ctx = ctx;
197 cor.it_dst = it_dst;
198 cor.it_src = it_src;
199 cor.max = 0;
200 cor.len = 0;
201 cor.copied_to_depth = 0;
202 cor.range = range;
203 cor.items = NULL;
204 cor.page_count = page_count;
205 cor.page_output_base = page_output_base;
206
207 fz_try(ctx)
208 do_copy_outline_range(&cor);
209 fz_always(ctx)
210 {
211 int i;
212
213 for (i = 0; i < cor.len; i++)
214 {
215 fz_free(ctx, cor.items[i].title);
216 fz_free(ctx, cor.items[i].uri);
217 }
218 fz_free(ctx, cor.items);
219 }
220 fz_catch(ctx)
221 fz_rethrow(ctx);
222 }
223
224
225 static void merge_range(fz_context *ctx, const char *range)
226 {
227 int start, end, i, count;
228 pdf_graft_map *graft_map;
229 const char *r;
230 fz_outline_iterator *it_src = NULL;
231 fz_outline_iterator *it_dst = NULL;
232 int pages_merged = 0;
233
234 count = pdf_count_pages(ctx, doc_src);
235 graft_map = pdf_new_graft_map(ctx, doc_des);
236
237 fz_var(it_src);
238 fz_var(it_dst);
239
240 fz_try(ctx)
241 {
242 r = range;
243 while ((r = fz_parse_page_range(ctx, r, &start, &end, count)))
244 {
245 if (start < end)
246 for (i = start; i <= end; ++i)
247 {
248 page_merge(ctx, i, 0, graft_map);
249 pages_merged++;
250 }
251 else
252 for (i = start; i >= end; --i)
253 {
254 page_merge(ctx, i, 0, graft_map);
255 pages_merged++;
256 }
257 }
258
259 it_src = fz_new_outline_iterator(ctx, (fz_document *)doc_src);
260 if (it_src == NULL)
261 break; /* Should never happen */
262 it_dst = fz_new_outline_iterator(ctx, (fz_document *)doc_des);
263 if (it_dst == NULL)
264 break; /* Should never happen */
265
266 /* Run to the end of it_dst. */
267 if (fz_outline_iterator_item(ctx, it_dst) != NULL)
268 {
269 while (fz_outline_iterator_next(ctx, it_dst) == 0);
270 }
271
272 if (fz_outline_iterator_item(ctx, it_src) != NULL)
273 copy_outline_range(ctx, it_dst, it_src, range, count, output_page_count);
274
275 output_page_count += pages_merged;
276 }
277 fz_always(ctx)
278 {
279 fz_drop_outline_iterator(ctx, it_src);
280 fz_drop_outline_iterator(ctx, it_dst);
281 pdf_drop_graft_map(ctx, graft_map);
282 }
283 fz_catch(ctx)
284 {
285 fz_rethrow(ctx);
286 }
287 }
288
289 int pdfmerge_main(int argc, char **argv)
290 {
291 pdf_write_options opts = pdf_default_write_options;
292 char *output = "out.pdf";
293 char *flags = "";
294 char *input;
295 int c;
296 fz_context *ctx;
297
298 while ((c = fz_getopt(argc, argv, "o:O:")) != -1)
299 {
300 switch (c)
301 {
302 case 'o': output = fz_optpath(fz_optarg); break;
303 case 'O': flags = fz_optarg; break;
304 default: return usage();
305 }
306 }
307
308 if (fz_optind == argc)
309 return usage();
310
311 ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
312 if (!ctx)
313 {
314 fprintf(stderr, "error: Cannot initialize MuPDF context.\n");
315 exit(1);
316 }
317
318 pdf_parse_write_options(ctx, &opts, flags);
319
320 fz_try(ctx)
321 {
322 doc_des = pdf_create_document(ctx);
323 }
324 fz_catch(ctx)
325 {
326 fz_report_error(ctx);
327 fz_log_error(ctx, "Cannot create destination document.");
328 fz_flush_warnings(ctx);
329 fz_drop_context(ctx);
330 exit(1);
331 }
332
333 /* Step through the source files */
334 while (fz_optind < argc)
335 {
336 doc_src = NULL;
337 input = argv[fz_optind++];
338
339 fz_try(ctx)
340 {
341 doc_src = pdf_open_document(ctx, input);
342 if (fz_optind == argc || !fz_is_page_range(ctx, argv[fz_optind]))
343 merge_range(ctx, "1-N");
344 else
345 merge_range(ctx, argv[fz_optind++]);
346 }
347 fz_always(ctx)
348 pdf_drop_document(ctx, doc_src);
349 fz_catch(ctx)
350 {
351 fz_report_error(ctx);
352 fz_log_error_printf(ctx, "Cannot merge document '%s'.", input);
353 }
354 }
355
356 if (fz_optind == argc)
357 {
358 fz_try(ctx)
359 pdf_save_document(ctx, doc_des, output, &opts);
360 fz_catch(ctx)
361 {
362 fz_report_error(ctx);
363 fz_log_error_printf(ctx, "Cannot save output file: '%s'.", output);
364 }
365 }
366
367 pdf_drop_document(ctx, doc_des);
368 fz_flush_warnings(ctx);
369 fz_drop_context(ctx);
370 return 0;
371 }