Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/source/tools/pdfmerge.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 // Copyright (C) 2004-2021 Artifex Software, Inc. | |
| 2 // | |
| 3 // This file is part of MuPDF. | |
| 4 // | |
| 5 // MuPDF is free software: you can redistribute it and/or modify it under the | |
| 6 // terms of the GNU Affero General Public License as published by the Free | |
| 7 // Software Foundation, either version 3 of the License, or (at your option) | |
| 8 // any later version. | |
| 9 // | |
| 10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY | |
| 11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
| 12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more | |
| 13 // details. | |
| 14 // | |
| 15 // You should have received a copy of the GNU Affero General Public License | |
| 16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> | |
| 17 // | |
| 18 // Alternative licensing terms are available from the licensor. | |
| 19 // For commercial licensing, see <https://www.artifex.com/> or contact | |
| 20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, | |
| 21 // CA 94129, USA, for further information. | |
| 22 | |
| 23 /* | |
| 24 * PDF merge tool: Tool for merging pdf content. | |
| 25 * | |
| 26 * Simple test bed to work with merging pages from multiple PDFs into a single PDF. | |
| 27 */ | |
| 28 | |
| 29 #include "mupdf/fitz.h" | |
| 30 #include "mupdf/pdf.h" | |
| 31 | |
| 32 #include <stdlib.h> | |
| 33 #include <stdio.h> | |
| 34 #include <string.h> | |
| 35 | |
| 36 static int usage(void) | |
| 37 { | |
| 38 fprintf(stderr, | |
| 39 "usage: mutool merge [-o output.pdf] [-O options] input.pdf [pages] [input2.pdf] [pages2] ...\n" | |
| 40 "\t-o -\tname of PDF file to create\n" | |
| 41 "\t-O -\tcomma separated list of output options\n" | |
| 42 "\tinput.pdf\tname of input file from which to copy pages\n" | |
| 43 "\tpages\tcomma separated list of page numbers and ranges\n\n" | |
| 44 ); | |
| 45 fputs(fz_pdf_write_options_usage, stderr); | |
| 46 return 1; | |
| 47 } | |
| 48 | |
| 49 static pdf_document *doc_des = NULL; | |
| 50 static pdf_document *doc_src = NULL; | |
| 51 int output_page_count = 0; | |
| 52 | |
| 53 static void page_merge(fz_context *ctx, int page_from, int page_to, pdf_graft_map *graft_map) | |
| 54 { | |
| 55 pdf_graft_mapped_page(ctx, graft_map, page_to - 1, doc_src, page_from - 1); | |
| 56 } | |
| 57 | |
| 58 /* | |
| 59 While we are processing, it_src tracks the current position we are copying from. | |
| 60 | |
| 61 items is the list of things we have stepped through to get to the current position. | |
| 62 A prefix of these items may have already been copied across. copied_to_depth is | |
| 63 the length of that prefix. 0 < = copied_to_depth <= len. | |
| 64 */ | |
| 65 typedef struct | |
| 66 { | |
| 67 fz_context *ctx; | |
| 68 fz_outline_iterator *it_dst; | |
| 69 fz_outline_iterator *it_src; | |
| 70 const char *range; | |
| 71 int page_count; | |
| 72 int max; | |
| 73 int len; | |
| 74 fz_outline_item *items; | |
| 75 int copied_to_depth; | |
| 76 int page_output_base; | |
| 77 } cor_state; | |
| 78 | |
| 79 /* Given a range, and a page in the range 1 to count, return the position | |
| 80 * which the page occupies in the output range (or 0 for not in range). | |
| 81 * So page 12 within 10-20 would return 3. | |
| 82 */ | |
| 83 static int | |
| 84 position_in_range(fz_context *ctx, const char *range, int count, int page) | |
| 85 { | |
| 86 int start, end; | |
| 87 int n = 0; | |
| 88 | |
| 89 while ((range = fz_parse_page_range(ctx, range, &start, &end, count))) | |
| 90 { | |
| 91 if (start < end) | |
| 92 { | |
| 93 if (start <= page && page <= end) | |
| 94 return n + page - start + 1; | |
| 95 n += end - start + 1; | |
| 96 } | |
| 97 else | |
| 98 { | |
| 99 if (end <= page && page <= start) | |
| 100 return n + page - end + 1; | |
| 101 n += start - end + 1; | |
| 102 } | |
| 103 } | |
| 104 | |
| 105 return 0; | |
| 106 } | |
| 107 | |
| 108 static void | |
| 109 copy_item(cor_state *cor) | |
| 110 { | |
| 111 fz_context *ctx = cor->ctx; | |
| 112 | |
| 113 while (cor->copied_to_depth < cor->len) | |
| 114 { | |
| 115 /* All items copied in a run get the same uri - that of the last one. */ | |
| 116 fz_outline_item item = cor->items[cor->copied_to_depth]; | |
| 117 item.uri = cor->items[cor->len-1].uri; | |
| 118 fz_outline_iterator_insert(ctx, cor->it_dst, &item); | |
| 119 cor->copied_to_depth++; | |
| 120 fz_outline_iterator_prev(ctx, cor->it_dst); | |
| 121 fz_outline_iterator_down(ctx, cor->it_dst); | |
| 122 } | |
| 123 } | |
| 124 | |
| 125 static char * | |
| 126 rewrite_page(fz_context *ctx, const char *uri, int n) | |
| 127 { | |
| 128 const char *p; | |
| 129 | |
| 130 if (uri == NULL) | |
| 131 return NULL; | |
| 132 | |
| 133 if (strncmp(uri, "#page=", 6) != 0) | |
| 134 return fz_strdup(ctx, uri); | |
| 135 p = strchr(uri+6, '&'); | |
| 136 if (p == NULL) | |
| 137 return fz_asprintf(ctx, "#page=%d", n); | |
| 138 | |
| 139 return fz_asprintf(ctx, "#page=%d%s", n, p); | |
| 140 } | |
| 141 | |
| 142 static void | |
| 143 do_copy_outline_range(cor_state *cor) | |
| 144 { | |
| 145 fz_context *ctx = cor->ctx; | |
| 146 | |
| 147 do | |
| 148 { | |
| 149 int has_children; | |
| 150 float x, y; | |
| 151 fz_outline_item *item = fz_outline_iterator_item(ctx, cor->it_src); | |
| 152 int page_num = fz_page_number_from_location(ctx, (fz_document *)doc_src, fz_resolve_link(ctx, (fz_document *)doc_src, item->uri, &x, &y)); | |
| 153 int page_in_range = position_in_range(ctx, cor->range, cor->page_count, page_num+1); | |
| 154 int new_page_number = page_in_range + cor->page_output_base; | |
| 155 | |
| 156 if (cor->len == cor->max) | |
| 157 { | |
| 158 int newmax = cor->max ? cor->max * 2 : 8; | |
| 159 cor->items = fz_realloc_array(ctx, cor->items, newmax, fz_outline_item); | |
| 160 cor->max = newmax; | |
| 161 } | |
| 162 cor->len++; | |
| 163 cor->items[cor->len-1].title = NULL; | |
| 164 cor->items[cor->len-1].uri = NULL; | |
| 165 cor->items[cor->len-1].is_open = item->is_open; | |
| 166 cor->items[cor->len-1].title = item->title ? fz_strdup(ctx, item->title) : NULL; | |
| 167 cor->items[cor->len-1].uri = rewrite_page(ctx, item->uri, new_page_number); | |
| 168 | |
| 169 if (page_in_range != 0) | |
| 170 copy_item(cor); | |
| 171 | |
| 172 has_children = fz_outline_iterator_down(ctx, cor->it_src); | |
| 173 if (has_children == 0) | |
| 174 do_copy_outline_range(cor); | |
| 175 if (has_children >= 0) | |
| 176 fz_outline_iterator_up(ctx, cor->it_src); | |
| 177 | |
| 178 cor->len--; | |
| 179 if (cor->copied_to_depth > cor->len) | |
| 180 { | |
| 181 cor->copied_to_depth = cor->len; | |
| 182 fz_outline_iterator_up(ctx, cor->it_dst); | |
| 183 } | |
| 184 fz_outline_iterator_next(ctx, cor->it_dst); | |
| 185 fz_free(ctx, cor->items[cor->len].title); | |
| 186 fz_free(ctx, cor->items[cor->len].uri); | |
| 187 } | |
| 188 while (fz_outline_iterator_next(ctx, cor->it_src) == 0); | |
| 189 } | |
| 190 | |
| 191 static void | |
| 192 copy_outline_range(fz_context *ctx, fz_outline_iterator *it_dst, fz_outline_iterator *it_src, const char *range, int page_count, int page_output_base) | |
| 193 { | |
| 194 cor_state cor; | |
| 195 | |
| 196 cor.ctx = ctx; | |
| 197 cor.it_dst = it_dst; | |
| 198 cor.it_src = it_src; | |
| 199 cor.max = 0; | |
| 200 cor.len = 0; | |
| 201 cor.copied_to_depth = 0; | |
| 202 cor.range = range; | |
| 203 cor.items = NULL; | |
| 204 cor.page_count = page_count; | |
| 205 cor.page_output_base = page_output_base; | |
| 206 | |
| 207 fz_try(ctx) | |
| 208 do_copy_outline_range(&cor); | |
| 209 fz_always(ctx) | |
| 210 { | |
| 211 int i; | |
| 212 | |
| 213 for (i = 0; i < cor.len; i++) | |
| 214 { | |
| 215 fz_free(ctx, cor.items[i].title); | |
| 216 fz_free(ctx, cor.items[i].uri); | |
| 217 } | |
| 218 fz_free(ctx, cor.items); | |
| 219 } | |
| 220 fz_catch(ctx) | |
| 221 fz_rethrow(ctx); | |
| 222 } | |
| 223 | |
| 224 | |
| 225 static void merge_range(fz_context *ctx, const char *range) | |
| 226 { | |
| 227 int start, end, i, count; | |
| 228 pdf_graft_map *graft_map; | |
| 229 const char *r; | |
| 230 fz_outline_iterator *it_src = NULL; | |
| 231 fz_outline_iterator *it_dst = NULL; | |
| 232 int pages_merged = 0; | |
| 233 | |
| 234 count = pdf_count_pages(ctx, doc_src); | |
| 235 graft_map = pdf_new_graft_map(ctx, doc_des); | |
| 236 | |
| 237 fz_var(it_src); | |
| 238 fz_var(it_dst); | |
| 239 | |
| 240 fz_try(ctx) | |
| 241 { | |
| 242 r = range; | |
| 243 while ((r = fz_parse_page_range(ctx, r, &start, &end, count))) | |
| 244 { | |
| 245 if (start < end) | |
| 246 for (i = start; i <= end; ++i) | |
| 247 { | |
| 248 page_merge(ctx, i, 0, graft_map); | |
| 249 pages_merged++; | |
| 250 } | |
| 251 else | |
| 252 for (i = start; i >= end; --i) | |
| 253 { | |
| 254 page_merge(ctx, i, 0, graft_map); | |
| 255 pages_merged++; | |
| 256 } | |
| 257 } | |
| 258 | |
| 259 it_src = fz_new_outline_iterator(ctx, (fz_document *)doc_src); | |
| 260 if (it_src == NULL) | |
| 261 break; /* Should never happen */ | |
| 262 it_dst = fz_new_outline_iterator(ctx, (fz_document *)doc_des); | |
| 263 if (it_dst == NULL) | |
| 264 break; /* Should never happen */ | |
| 265 | |
| 266 /* Run to the end of it_dst. */ | |
| 267 if (fz_outline_iterator_item(ctx, it_dst) != NULL) | |
| 268 { | |
| 269 while (fz_outline_iterator_next(ctx, it_dst) == 0); | |
| 270 } | |
| 271 | |
| 272 if (fz_outline_iterator_item(ctx, it_src) != NULL) | |
| 273 copy_outline_range(ctx, it_dst, it_src, range, count, output_page_count); | |
| 274 | |
| 275 output_page_count += pages_merged; | |
| 276 } | |
| 277 fz_always(ctx) | |
| 278 { | |
| 279 fz_drop_outline_iterator(ctx, it_src); | |
| 280 fz_drop_outline_iterator(ctx, it_dst); | |
| 281 pdf_drop_graft_map(ctx, graft_map); | |
| 282 } | |
| 283 fz_catch(ctx) | |
| 284 { | |
| 285 fz_rethrow(ctx); | |
| 286 } | |
| 287 } | |
| 288 | |
| 289 int pdfmerge_main(int argc, char **argv) | |
| 290 { | |
| 291 pdf_write_options opts = pdf_default_write_options; | |
| 292 char *output = "out.pdf"; | |
| 293 char *flags = ""; | |
| 294 char *input; | |
| 295 int c; | |
| 296 fz_context *ctx; | |
| 297 | |
| 298 while ((c = fz_getopt(argc, argv, "o:O:")) != -1) | |
| 299 { | |
| 300 switch (c) | |
| 301 { | |
| 302 case 'o': output = fz_optpath(fz_optarg); break; | |
| 303 case 'O': flags = fz_optarg; break; | |
| 304 default: return usage(); | |
| 305 } | |
| 306 } | |
| 307 | |
| 308 if (fz_optind == argc) | |
| 309 return usage(); | |
| 310 | |
| 311 ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); | |
| 312 if (!ctx) | |
| 313 { | |
| 314 fprintf(stderr, "error: Cannot initialize MuPDF context.\n"); | |
| 315 exit(1); | |
| 316 } | |
| 317 | |
| 318 pdf_parse_write_options(ctx, &opts, flags); | |
| 319 | |
| 320 fz_try(ctx) | |
| 321 { | |
| 322 doc_des = pdf_create_document(ctx); | |
| 323 } | |
| 324 fz_catch(ctx) | |
| 325 { | |
| 326 fz_report_error(ctx); | |
| 327 fz_log_error(ctx, "Cannot create destination document."); | |
| 328 fz_flush_warnings(ctx); | |
| 329 fz_drop_context(ctx); | |
| 330 exit(1); | |
| 331 } | |
| 332 | |
| 333 /* Step through the source files */ | |
| 334 while (fz_optind < argc) | |
| 335 { | |
| 336 doc_src = NULL; | |
| 337 input = argv[fz_optind++]; | |
| 338 | |
| 339 fz_try(ctx) | |
| 340 { | |
| 341 doc_src = pdf_open_document(ctx, input); | |
| 342 if (fz_optind == argc || !fz_is_page_range(ctx, argv[fz_optind])) | |
| 343 merge_range(ctx, "1-N"); | |
| 344 else | |
| 345 merge_range(ctx, argv[fz_optind++]); | |
| 346 } | |
| 347 fz_always(ctx) | |
| 348 pdf_drop_document(ctx, doc_src); | |
| 349 fz_catch(ctx) | |
| 350 { | |
| 351 fz_report_error(ctx); | |
| 352 fz_log_error_printf(ctx, "Cannot merge document '%s'.", input); | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 if (fz_optind == argc) | |
| 357 { | |
| 358 fz_try(ctx) | |
| 359 pdf_save_document(ctx, doc_des, output, &opts); | |
| 360 fz_catch(ctx) | |
| 361 { | |
| 362 fz_report_error(ctx); | |
| 363 fz_log_error_printf(ctx, "Cannot save output file: '%s'.", output); | |
| 364 } | |
| 365 } | |
| 366 | |
| 367 pdf_drop_document(ctx, doc_des); | |
| 368 fz_flush_warnings(ctx); | |
| 369 fz_drop_context(ctx); | |
| 370 return 0; | |
| 371 } |
