Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/xps/xps-doc.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/xps/xps-doc.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,622 @@ +// Copyright (C) 2004-2024 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" +#include "xps-imp.h" + +#include <string.h> +#include <stdlib.h> + +#define REL_START_PART \ + "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation" +#define REL_DOC_STRUCTURE \ + "http://schemas.microsoft.com/xps/2005/06/documentstructure" +#define REL_REQUIRED_RESOURCE \ + "http://schemas.microsoft.com/xps/2005/06/required-resource" +#define REL_REQUIRED_RESOURCE_RECURSIVE \ + "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive" + +#define REL_START_PART_OXPS \ + "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation" +#define REL_DOC_STRUCTURE_OXPS \ + "http://schemas.openxps.org/oxps/v1.0/documentstructure" + +static void +xps_rels_for_part(fz_context *ctx, xps_document *doc, char *buf, char *name, int buflen) +{ + char *p, *basename; + p = strrchr(name, '/'); + basename = p ? p + 1 : name; + fz_strlcpy(buf, name, buflen); + p = strrchr(buf, '/'); + if (p) *p = 0; + fz_strlcat(buf, "/_rels/", buflen); + fz_strlcat(buf, basename, buflen); + fz_strlcat(buf, ".rels", buflen); +} + +/* + * The FixedDocumentSequence and FixedDocument parts determine + * which parts correspond to actual pages, and the page order. + */ + +static void +xps_add_fixed_document(fz_context *ctx, xps_document *doc, char *name) +{ + xps_fixdoc *fixdoc; + + /* Check for duplicates first */ + for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next) + if (!strcmp(fixdoc->name, name)) + return; + + fixdoc = fz_malloc_struct(ctx, xps_fixdoc); + fz_try(ctx) + { + fixdoc->name = fz_strdup(ctx, name); + fixdoc->outline = NULL; + fixdoc->next = NULL; + } + fz_catch(ctx) + { + fz_free(ctx, fixdoc); + fz_rethrow(ctx); + } + + if (!doc->first_fixdoc) + { + doc->first_fixdoc = fixdoc; + doc->last_fixdoc = fixdoc; + } + else + { + doc->last_fixdoc->next = fixdoc; + doc->last_fixdoc = fixdoc; + } +} + +static void +xps_add_fixed_page(fz_context *ctx, xps_document *doc, char *name, int width, int height) +{ + xps_fixpage *page; + + /* Check for duplicates first */ + for (page = doc->first_page; page; page = page->next) + if (!strcmp(page->name, name)) + return; + + page = fz_malloc_struct(ctx, xps_fixpage); + page->name = NULL; + + fz_try(ctx) + { + page->name = fz_strdup(ctx, name); + page->number = doc->page_count++; + page->width = width; + page->height = height; + page->next = NULL; + } + fz_catch(ctx) + { + fz_free(ctx, page->name); + fz_free(ctx, page); + fz_rethrow(ctx); + } + + if (!doc->first_page) + { + doc->first_page = page; + doc->last_page = page; + } + else + { + doc->last_page->next = page; + doc->last_page = page; + } +} + +static void +xps_add_link_target(fz_context *ctx, xps_document *doc, char *name) +{ + xps_fixpage *page = doc->last_page; + xps_target *target; + + if (page == NULL) + { + fz_warn(ctx, "Dropping link target with no page"); + return; + } + + target = fz_malloc_struct(ctx, xps_target); + + fz_try(ctx) + { + target->name = fz_strdup(ctx, name); + target->page = page->number; + target->next = doc->target; + } + fz_catch(ctx) + { + fz_free(ctx, target); + fz_rethrow(ctx); + } + + doc->target = target; +} + +fz_link_dest +xps_lookup_link_target(fz_context *ctx, fz_document *doc_, const char *target_uri) +{ + xps_document *doc = (xps_document*)doc_; + xps_target *target; + const char *needle = strrchr(target_uri, '#'); + needle = needle ? needle + 1 : target_uri; + for (target = doc->target; target; target = target->next) + if (!strcmp(target->name, needle)) + return fz_make_link_dest_xyz(0, target->page, 0, 0, 0); + return fz_make_link_dest_xyz(0, fz_atoi(needle) - 1, 0, 0, 0); +} + +static void +xps_drop_link_targets(fz_context *ctx, xps_document *doc) +{ + xps_target *target = doc->target, *next; + while (target) + { + next = target->next; + fz_free(ctx, target->name); + fz_free(ctx, target); + target = next; + } +} + +static void +xps_drop_fixed_pages(fz_context *ctx, xps_document *doc) +{ + xps_fixpage *page = doc->first_page; + while (page) + { + xps_fixpage *next = page->next; + fz_free(ctx, page->name); + fz_free(ctx, page); + page = next; + } + doc->first_page = NULL; + doc->last_page = NULL; +} + +static void +xps_drop_fixed_documents(fz_context *ctx, xps_document *doc) +{ + xps_fixdoc *fixdoc = doc->first_fixdoc; + while (fixdoc) + { + xps_fixdoc *next = fixdoc->next; + fz_free(ctx, fixdoc->name); + fz_free(ctx, fixdoc->outline); + fz_free(ctx, fixdoc); + fixdoc = next; + } + doc->first_fixdoc = NULL; + doc->last_fixdoc = NULL; +} + +void +xps_drop_page_list(fz_context *ctx, xps_document *doc) +{ + xps_drop_fixed_documents(ctx, doc); + xps_drop_fixed_pages(ctx, doc); + xps_drop_link_targets(ctx, doc); +} + +/* + * Parse the fixed document sequence structure and _rels/.rels to find the start part. + */ + +static void +xps_parse_metadata_imp(fz_context *ctx, xps_document *doc, fz_xml *item, xps_fixdoc *fixdoc) +{ + while (item) + { + if (fz_xml_is_tag(item, "Relationship")) + { + char *target = fz_xml_att(item, "Target"); + char *type = fz_xml_att(item, "Type"); + if (target && type) + { + char tgtbuf[1024]; + xps_resolve_url(ctx, doc, tgtbuf, doc->base_uri, target, sizeof tgtbuf); + if (!strcmp(type, REL_START_PART) || !strcmp(type, REL_START_PART_OXPS)) + { + fz_free(ctx, doc->start_part); + doc->start_part = fz_strdup(ctx, tgtbuf); + } + if ((!strcmp(type, REL_DOC_STRUCTURE) || !strcmp(type, REL_DOC_STRUCTURE_OXPS)) && fixdoc) + fixdoc->outline = fz_strdup(ctx, tgtbuf); + if (!fz_xml_att(item, "Id")) + fz_warn(ctx, "missing relationship id for %s", target); + } + } + + if (fz_xml_is_tag(item, "DocumentReference")) + { + char *source = fz_xml_att(item, "Source"); + if (source) + { + char srcbuf[1024]; + xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf); + xps_add_fixed_document(ctx, doc, srcbuf); + } + } + + if (fz_xml_is_tag(item, "PageContent")) + { + char *source = fz_xml_att(item, "Source"); + char *width_att = fz_xml_att(item, "Width"); + char *height_att = fz_xml_att(item, "Height"); + int width = width_att ? atoi(width_att) : 0; + int height = height_att ? atoi(height_att) : 0; + if (source) + { + char srcbuf[1024]; + xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf); + xps_add_fixed_page(ctx, doc, srcbuf, width, height); + } + } + + if (fz_xml_is_tag(item, "LinkTarget")) + { + char *name = fz_xml_att(item, "Name"); + if (name) + xps_add_link_target(ctx, doc, name); + } + + xps_parse_metadata_imp(ctx, doc, fz_xml_down(item), fixdoc); + + item = fz_xml_next(item); + } +} + +static void +xps_parse_metadata(fz_context *ctx, xps_document *doc, xps_part *part, xps_fixdoc *fixdoc) +{ + fz_xml_doc *xml; + char buf[1024]; + char *s; + + /* Save directory name part */ + fz_strlcpy(buf, part->name, sizeof buf); + s = strrchr(buf, '/'); + if (s) + s[0] = 0; + + /* _rels parts are voodoo: their URI references are from + * the part they are associated with, not the actual _rels + * part being parsed. + */ + s = strstr(buf, "/_rels"); + if (s) + *s = 0; + + doc->base_uri = buf; + doc->part_uri = part->name; + + xml = fz_parse_xml(ctx, part->data, 0); + fz_try(ctx) + { + xps_parse_metadata_imp(ctx, doc, fz_xml_root(xml), fixdoc); + } + fz_always(ctx) + { + fz_drop_xml(ctx, xml); + doc->base_uri = NULL; + doc->part_uri = NULL; + } + fz_catch(ctx) + fz_rethrow(ctx); +} + +static void +xps_read_and_process_metadata_part(fz_context *ctx, xps_document *doc, char *name, xps_fixdoc *fixdoc) +{ + xps_part *part; + + if (!xps_has_part(ctx, doc, name)) + return; + + part = xps_read_part(ctx, doc, name); + fz_try(ctx) + { + xps_parse_metadata(ctx, doc, part, fixdoc); + } + fz_always(ctx) + { + xps_drop_part(ctx, doc, part); + } + fz_catch(ctx) + { + fz_rethrow(ctx); + } +} + +void +xps_read_page_list(fz_context *ctx, xps_document *doc) +{ + xps_fixdoc *fixdoc; + + xps_read_and_process_metadata_part(ctx, doc, "/_rels/.rels", NULL); + + if (!doc->start_part) + fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find fixed document sequence start part"); + + xps_read_and_process_metadata_part(ctx, doc, doc->start_part, NULL); + + for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next) + { + char relbuf[1024]; + fz_try(ctx) + { + xps_rels_for_part(ctx, doc, relbuf, fixdoc->name, sizeof relbuf); + xps_read_and_process_metadata_part(ctx, doc, relbuf, fixdoc); + } + fz_catch(ctx) + { + fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); + fz_rethrow_if(ctx, FZ_ERROR_SYSTEM); + fz_report_error(ctx); + fz_warn(ctx, "cannot process FixedDocument rels part"); + } + xps_read_and_process_metadata_part(ctx, doc, fixdoc->name, fixdoc); + } +} + +int +xps_count_pages(fz_context *ctx, fz_document *doc_, int chapter) +{ + xps_document *doc = (xps_document*)doc_; + return doc->page_count; +} + +static fz_xml_doc * +xps_load_fixed_page(fz_context *ctx, xps_document *doc, xps_fixpage *page) +{ + xps_part *part; + fz_xml_doc *xml = NULL; + fz_xml *root; + char *width_att; + char *height_att; + + part = xps_read_part(ctx, doc, page->name); + fz_try(ctx) + { + xml = fz_parse_xml(ctx, part->data, 0); + + root = fz_xml_root(xml); + if (!root) + fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing root element"); + + if (fz_xml_is_tag(root, "AlternateContent")) + { + fz_xml *node = xps_lookup_alternate_content(ctx, doc, root); + if (!node) + fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing alternate root element"); + fz_detach_xml(ctx, node); + root = node; + } + + if (!fz_xml_is_tag(root, "FixedPage")) + fz_throw(ctx, FZ_ERROR_FORMAT, "expected FixedPage element"); + width_att = fz_xml_att(root, "Width"); + if (!width_att) + fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Width"); + height_att = fz_xml_att(root, "Height"); + if (!height_att) + fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Height"); + + page->width = atoi(width_att); + page->height = atoi(height_att); + } + fz_always(ctx) + { + xps_drop_part(ctx, doc, part); + } + fz_catch(ctx) + { + fz_drop_xml(ctx, xml); + fz_rethrow(ctx); + } + + return xml; +} + +static fz_rect +xps_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box) +{ + xps_page *page = (xps_page*)page_; + fz_rect bounds; + bounds.x0 = bounds.y0 = 0; + bounds.x1 = page->fix->width * 72.0f / 96.0f; + bounds.y1 = page->fix->height * 72.0f / 96.0f; + return bounds; +} + +static void +xps_drop_page_imp(fz_context *ctx, fz_page *page_) +{ + xps_page *page = (xps_page*)page_; + fz_drop_xml(ctx, page->xml); +} + +fz_page * +xps_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number) +{ + xps_document *doc = (xps_document*)doc_; + xps_page *page = NULL; + xps_fixpage *fix; + fz_xml_doc *xml; + int n = 0; + + fz_var(page); + + for (fix = doc->first_page; fix; fix = fix->next) + { + if (n == number) + { + xml = xps_load_fixed_page(ctx, doc, fix); + fz_try(ctx) + { + page = fz_new_derived_page(ctx, xps_page, doc_); + page->super.load_links = xps_load_links; + page->super.bound_page = xps_bound_page; + page->super.run_page_contents = xps_run_page; + page->super.drop_page = xps_drop_page_imp; + + page->fix = fix; + page->xml = xml; + } + fz_catch(ctx) + { + fz_drop_xml(ctx, xml); + fz_rethrow(ctx); + } + return (fz_page*)page; + } + n ++; + } + + fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find page %d", number + 1); +} + +static const char *xps_extensions[] = +{ + "oxps", + "xps", + NULL +}; + +static const char *xps_mimetypes[] = +{ + "application/oxps", + "application/vnd.ms-xpsdocument", + "application/xps", + NULL +}; + +static int +xps_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state) +{ + fz_archive *arch = NULL; + int ret = 0; + fz_xml *xml = NULL; + fz_xml *pos; + + if (state) + *state = NULL; + if (free_state) + *free_state = NULL; + + fz_var(arch); + fz_var(ret); + fz_var(xml); + + fz_try(ctx) + { + int i, count; + const char *name; + + if (stream == NULL) + arch = fz_keep_archive(ctx, dir); + else + { + arch = fz_try_open_archive_with_stream(ctx, stream); + if (arch == NULL) + break; + } + + xml = fz_try_parse_xml_archive_entry(ctx, arch, "/_rels/.rels", 0); + if (xml == NULL) + xml = fz_try_parse_xml_archive_entry(ctx, arch, "\\_rels\\.rels", 0); + + if (xml) + { + pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"); + if (pos) + ret = 100; + break; + } + + /* Cope with tricksy XPS's have the rels in multiple bits. */ + count = fz_count_archive_entries(ctx, arch); + + for (i = 0; i < count; i++) + { + name = fz_list_archive_entry(ctx, arch, i); + if (!name) + continue; + if (strncmp(name, "/_rels/.rels/", 13) == 0 || + strncmp(name, "_rels/.rels/", 12) == 0 || + strncmp(name, "\\_rels\\.rels\\", 13) == 0 || + strncmp(name, "_rels\\.rels\\", 12) == 0) + { + xml = fz_try_parse_xml_archive_entry(ctx, arch, name, 0); + if (xml) + { + pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"); + if (pos) + { + ret = 100; + break; + } + fz_drop_xml(ctx, xml); + xml = NULL; + } + } + } + } + fz_always(ctx) + { + fz_drop_xml(ctx, xml); + fz_drop_archive(ctx, arch); + } + fz_catch(ctx) + fz_rethrow(ctx); + + return ret; +} + +static fz_document * +xps_open(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state) +{ + if (file) + return xps_open_document_with_stream(ctx, file); + else + return xps_open_document_with_directory(ctx, dir); +} + +fz_document_handler xps_document_handler = +{ + NULL, + xps_open, + xps_extensions, + xps_mimetypes, + xps_recognize_doc_content +};
