Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/tools/pdfmerge.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/tools/pdfmerge.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,371 @@ +// Copyright (C) 2004-2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +/* + * PDF merge tool: Tool for merging pdf content. + * + * Simple test bed to work with merging pages from multiple PDFs into a single PDF. + */ + +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +static int usage(void) +{ + fprintf(stderr, + "usage: mutool merge [-o output.pdf] [-O options] input.pdf [pages] [input2.pdf] [pages2] ...\n" + "\t-o -\tname of PDF file to create\n" + "\t-O -\tcomma separated list of output options\n" + "\tinput.pdf\tname of input file from which to copy pages\n" + "\tpages\tcomma separated list of page numbers and ranges\n\n" + ); + fputs(fz_pdf_write_options_usage, stderr); + return 1; +} + +static pdf_document *doc_des = NULL; +static pdf_document *doc_src = NULL; +int output_page_count = 0; + +static void page_merge(fz_context *ctx, int page_from, int page_to, pdf_graft_map *graft_map) +{ + pdf_graft_mapped_page(ctx, graft_map, page_to - 1, doc_src, page_from - 1); +} + +/* + While we are processing, it_src tracks the current position we are copying from. + + items is the list of things we have stepped through to get to the current position. + A prefix of these items may have already been copied across. copied_to_depth is + the length of that prefix. 0 < = copied_to_depth <= len. +*/ +typedef struct +{ + fz_context *ctx; + fz_outline_iterator *it_dst; + fz_outline_iterator *it_src; + const char *range; + int page_count; + int max; + int len; + fz_outline_item *items; + int copied_to_depth; + int page_output_base; +} cor_state; + +/* Given a range, and a page in the range 1 to count, return the position + * which the page occupies in the output range (or 0 for not in range). + * So page 12 within 10-20 would return 3. + */ +static int +position_in_range(fz_context *ctx, const char *range, int count, int page) +{ + int start, end; + int n = 0; + + while ((range = fz_parse_page_range(ctx, range, &start, &end, count))) + { + if (start < end) + { + if (start <= page && page <= end) + return n + page - start + 1; + n += end - start + 1; + } + else + { + if (end <= page && page <= start) + return n + page - end + 1; + n += start - end + 1; + } + } + + return 0; +} + +static void +copy_item(cor_state *cor) +{ + fz_context *ctx = cor->ctx; + + while (cor->copied_to_depth < cor->len) + { + /* All items copied in a run get the same uri - that of the last one. */ + fz_outline_item item = cor->items[cor->copied_to_depth]; + item.uri = cor->items[cor->len-1].uri; + fz_outline_iterator_insert(ctx, cor->it_dst, &item); + cor->copied_to_depth++; + fz_outline_iterator_prev(ctx, cor->it_dst); + fz_outline_iterator_down(ctx, cor->it_dst); + } +} + +static char * +rewrite_page(fz_context *ctx, const char *uri, int n) +{ + const char *p; + + if (uri == NULL) + return NULL; + + if (strncmp(uri, "#page=", 6) != 0) + return fz_strdup(ctx, uri); + p = strchr(uri+6, '&'); + if (p == NULL) + return fz_asprintf(ctx, "#page=%d", n); + + return fz_asprintf(ctx, "#page=%d%s", n, p); +} + +static void +do_copy_outline_range(cor_state *cor) +{ + fz_context *ctx = cor->ctx; + + do + { + int has_children; + float x, y; + fz_outline_item *item = fz_outline_iterator_item(ctx, cor->it_src); + int page_num = fz_page_number_from_location(ctx, (fz_document *)doc_src, fz_resolve_link(ctx, (fz_document *)doc_src, item->uri, &x, &y)); + int page_in_range = position_in_range(ctx, cor->range, cor->page_count, page_num+1); + int new_page_number = page_in_range + cor->page_output_base; + + if (cor->len == cor->max) + { + int newmax = cor->max ? cor->max * 2 : 8; + cor->items = fz_realloc_array(ctx, cor->items, newmax, fz_outline_item); + cor->max = newmax; + } + cor->len++; + cor->items[cor->len-1].title = NULL; + cor->items[cor->len-1].uri = NULL; + cor->items[cor->len-1].is_open = item->is_open; + cor->items[cor->len-1].title = item->title ? fz_strdup(ctx, item->title) : NULL; + cor->items[cor->len-1].uri = rewrite_page(ctx, item->uri, new_page_number); + + if (page_in_range != 0) + copy_item(cor); + + has_children = fz_outline_iterator_down(ctx, cor->it_src); + if (has_children == 0) + do_copy_outline_range(cor); + if (has_children >= 0) + fz_outline_iterator_up(ctx, cor->it_src); + + cor->len--; + if (cor->copied_to_depth > cor->len) + { + cor->copied_to_depth = cor->len; + fz_outline_iterator_up(ctx, cor->it_dst); + } + fz_outline_iterator_next(ctx, cor->it_dst); + fz_free(ctx, cor->items[cor->len].title); + fz_free(ctx, cor->items[cor->len].uri); + } + while (fz_outline_iterator_next(ctx, cor->it_src) == 0); +} + +static void +copy_outline_range(fz_context *ctx, fz_outline_iterator *it_dst, fz_outline_iterator *it_src, const char *range, int page_count, int page_output_base) +{ + cor_state cor; + + cor.ctx = ctx; + cor.it_dst = it_dst; + cor.it_src = it_src; + cor.max = 0; + cor.len = 0; + cor.copied_to_depth = 0; + cor.range = range; + cor.items = NULL; + cor.page_count = page_count; + cor.page_output_base = page_output_base; + + fz_try(ctx) + do_copy_outline_range(&cor); + fz_always(ctx) + { + int i; + + for (i = 0; i < cor.len; i++) + { + fz_free(ctx, cor.items[i].title); + fz_free(ctx, cor.items[i].uri); + } + fz_free(ctx, cor.items); + } + fz_catch(ctx) + fz_rethrow(ctx); +} + + +static void merge_range(fz_context *ctx, const char *range) +{ + int start, end, i, count; + pdf_graft_map *graft_map; + const char *r; + fz_outline_iterator *it_src = NULL; + fz_outline_iterator *it_dst = NULL; + int pages_merged = 0; + + count = pdf_count_pages(ctx, doc_src); + graft_map = pdf_new_graft_map(ctx, doc_des); + + fz_var(it_src); + fz_var(it_dst); + + fz_try(ctx) + { + r = range; + while ((r = fz_parse_page_range(ctx, r, &start, &end, count))) + { + if (start < end) + for (i = start; i <= end; ++i) + { + page_merge(ctx, i, 0, graft_map); + pages_merged++; + } + else + for (i = start; i >= end; --i) + { + page_merge(ctx, i, 0, graft_map); + pages_merged++; + } + } + + it_src = fz_new_outline_iterator(ctx, (fz_document *)doc_src); + if (it_src == NULL) + break; /* Should never happen */ + it_dst = fz_new_outline_iterator(ctx, (fz_document *)doc_des); + if (it_dst == NULL) + break; /* Should never happen */ + + /* Run to the end of it_dst. */ + if (fz_outline_iterator_item(ctx, it_dst) != NULL) + { + while (fz_outline_iterator_next(ctx, it_dst) == 0); + } + + if (fz_outline_iterator_item(ctx, it_src) != NULL) + copy_outline_range(ctx, it_dst, it_src, range, count, output_page_count); + + output_page_count += pages_merged; + } + fz_always(ctx) + { + fz_drop_outline_iterator(ctx, it_src); + fz_drop_outline_iterator(ctx, it_dst); + pdf_drop_graft_map(ctx, graft_map); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +int pdfmerge_main(int argc, char **argv) +{ + pdf_write_options opts = pdf_default_write_options; + char *output = "out.pdf"; + char *flags = ""; + char *input; + int c; + fz_context *ctx; + + while ((c = fz_getopt(argc, argv, "o:O:")) != -1) + { + switch (c) + { + case 'o': output = fz_optpath(fz_optarg); break; + case 'O': flags = fz_optarg; break; + default: return usage(); + } + } + + if (fz_optind == argc) + return usage(); + + ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); + if (!ctx) + { + fprintf(stderr, "error: Cannot initialize MuPDF context.\n"); + exit(1); + } + + pdf_parse_write_options(ctx, &opts, flags); + + fz_try(ctx) + { + doc_des = pdf_create_document(ctx); + } + fz_catch(ctx) + { + fz_report_error(ctx); + fz_log_error(ctx, "Cannot create destination document."); + fz_flush_warnings(ctx); + fz_drop_context(ctx); + exit(1); + } + + /* Step through the source files */ + while (fz_optind < argc) + { + doc_src = NULL; + input = argv[fz_optind++]; + + fz_try(ctx) + { + doc_src = pdf_open_document(ctx, input); + if (fz_optind == argc || !fz_is_page_range(ctx, argv[fz_optind])) + merge_range(ctx, "1-N"); + else + merge_range(ctx, argv[fz_optind++]); + } + fz_always(ctx) + pdf_drop_document(ctx, doc_src); + fz_catch(ctx) + { + fz_report_error(ctx); + fz_log_error_printf(ctx, "Cannot merge document '%s'.", input); + } + } + + if (fz_optind == argc) + { + fz_try(ctx) + pdf_save_document(ctx, doc_des, output, &opts); + fz_catch(ctx) + { + fz_report_error(ctx); + fz_log_error_printf(ctx, "Cannot save output file: '%s'.", output); + } + } + + pdf_drop_document(ctx, doc_des); + fz_flush_warnings(ctx); + fz_drop_context(ctx); + return 0; +}
