Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/pdf/pdf-zugferd.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/pdf/pdf-zugferd.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,277 @@ +// Copyright (C) 2024-2025 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" +#include "mupdf/pdf.h" + +static const char * +tag_or_text(fz_xml *x, const char *find) +{ + const char *text; + const char *f = strchr(find, ':'); + + /* If we find a : we have a namespace. Search for both with and + * without the namespace. */ + if (f) + f++; + + text = fz_xml_att(x, find); + if (text == NULL && f) + text = fz_xml_att(x, f); + if (text == NULL) + text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, find))); + if (text == NULL && f) + text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, f))); + + return text; +} + +static enum pdf_zugferd_profile +do_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version, char **fname) +{ + pdf_obj *metadata = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Metadata), NULL); + fz_buffer *buf; + fz_xml *xml = NULL; + fz_xml *x; + enum pdf_zugferd_profile ret = PDF_NOT_ZUGFERD; + + if (version) + *version = 0; + if (fname) + *fname = NULL; + + if (metadata == NULL) + return PDF_NOT_ZUGFERD; + + buf = pdf_load_stream(ctx, metadata); + + fz_var(xml); + + fz_try(ctx) + { + xml = fz_parse_xml(ctx, buf, 0); + + /* Version 1. */ + x = fz_xml_find_dfs(xml, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#"); + if (x) + { + while (x) + { + /* The Version tag in the document appears to always be 1.0 */ + const char *v = tag_or_text(x, "zf:Version"); + const char *cl = tag_or_text(x, "zf:ConformanceLevel"); + const char *df = tag_or_text(x, "zf:DocumentFileName"); + const char *dt = tag_or_text(x, "zf:DocumentType"); + if (v && dt && !strcmp(dt, "INVOICE")) + { + if (!cl) + fz_warn(ctx, "No conformance level specified"); + else if (!strcmp(cl, "COMFORT")) + ret = PDF_ZUGFERD_COMFORT; + else if (!strcmp(cl, "BASIC")) + ret = PDF_ZUGFERD_BASIC; + else if (!strcmp(cl, "EXTENDED")) + ret = PDF_ZUGFERD_EXTENDED; + + if (version) + *version = fz_atof(v); + + if (!df) + fz_warn(ctx, "ZUGFeRD doc is missing filename"); + else if (strcmp(df, "ZUGFeRD-invoice.xml")) + fz_warn(ctx, "ZUGFeRD doc has non-standard filename"); + if (fname && df) + *fname = fz_strdup(ctx, df); /* Nothing can throw after this */ + break; + } + + x = fz_xml_find_next_dfs(x, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#"); + } + break; + } + + /* Version 2. */ + x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#"); + if (x) + { + while (x) + { + const char *v = tag_or_text(x, "fx:Version"); + const char *cl = tag_or_text(x, "fx:ConformanceLevel"); + const char *df = tag_or_text(x, "fx:DocumentFileName"); + const char *dt = tag_or_text(x, "fx:DocumentType"); + if (v && dt && !strcmp(dt, "INVOICE")) + { + if (!cl) + fz_warn(ctx, "No conformance level specified"); + else if (!strcmp(cl, "EN 16931")) + ret = PDF_ZUGFERD_COMFORT; + else if (!strcmp(cl, "BASIC")) + ret = PDF_ZUGFERD_BASIC; + else if (!strcmp(cl, "EXTENDED")) + ret = PDF_ZUGFERD_EXTENDED; + else if (!strcmp(cl, "BASIC WL")) + ret = PDF_ZUGFERD_BASIC_WL; + else if (!strcmp(cl, "MINIMUM")) + ret = PDF_ZUGFERD_MINIMUM; + + if (version) + *version = fz_atof(v); + + if (!df) + fz_warn(ctx, "ZUGFeRD doc is missing filename"); + else if (strcmp(df, "zugferd-invoice.xml")) + fz_warn(ctx, "ZUGFeRD doc has non-standard filename"); + if (fname && df) + *fname = fz_strdup(ctx, df); /* Nothing can throw after this */ + break; + } + + x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#"); + } + break; + } + + /* Version 2.1 + 2.11 */ + x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#"); + if (x) + { + while (x) + { + const char *v = tag_or_text(x, "fx:Version"); + const char *cl = tag_or_text(x, "fx:ConformanceLevel"); + const char *df = tag_or_text(x, "fx:DocumentFileName"); + const char *dt = tag_or_text(x, "fx:DocumentType"); + if (v && dt && !strcmp(dt, "INVOICE")) + { + if (!cl) + fz_warn(ctx, "No conformance level specified"); + else if (!strcmp(cl, "EN 16931")) + ret = PDF_ZUGFERD_COMFORT; + else if (!strcmp(cl, "BASIC")) + ret = PDF_ZUGFERD_BASIC; + else if (!strcmp(cl, "EXTENDED")) + ret = PDF_ZUGFERD_EXTENDED; + else if (!strcmp(cl, "BASIC WL")) + ret = PDF_ZUGFERD_BASIC_WL; + else if (!strcmp(cl, "MINIMUM")) + ret = PDF_ZUGFERD_MINIMUM; + else if (!strcmp(cl, "XRECHNUNG")) + ret = PDF_ZUGFERD_XRECHNUNG; + + if (version) + *version = fz_atof(v); + + if (!df) + fz_warn(ctx, "ZUGFeRD doc is missing filename"); + else if (ret == PDF_ZUGFERD_XRECHNUNG && strcmp(df, "xrechnung.xml")) + fz_warn(ctx, "ZUGFeRD doc has non-standard filename"); + else if (ret != PDF_ZUGFERD_XRECHNUNG && strcmp(df, "factur-x.xml")) + fz_warn(ctx, "ZUGFeRD doc has non-standard filename"); + if (fname && df) + *fname = fz_strdup(ctx, df); /* Nothing can throw after this */ + break; + } + + x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#"); + } + break; + } + } + fz_always(ctx) + { + fz_drop_xml(ctx, xml); + fz_drop_buffer(ctx, buf); + } + fz_catch(ctx) + fz_rethrow(ctx); + + return ret; +} + +enum pdf_zugferd_profile pdf_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version) +{ + return do_zugferd_profile(ctx, doc, version, NULL); +} + +fz_buffer *pdf_zugferd_xml(fz_context *ctx, pdf_document *doc) +{ + char *fname; + float version; + enum pdf_zugferd_profile p = do_zugferd_profile(ctx, doc, &version, &fname); + int count, i; + fz_buffer *buf = NULL; + + if (p == PDF_NOT_ZUGFERD) + { + fz_free(ctx, fname); + return NULL; + } + + fz_try(ctx) + { + count = pdf_count_document_associated_files(ctx, doc); + for (i = 0; i < count; i++) + { + pdf_obj *fs = pdf_document_associated_file(ctx, doc, i); + pdf_filespec_params params; + + pdf_get_filespec_params(ctx, fs, ¶ms); + + if (!strcmp(fname, params.filename)) + { + if (!pdf_is_embedded_file(ctx, fs)) + fz_throw(ctx, FZ_ERROR_FORMAT, "ZUGFeRD XML was not embedded"); + + buf = pdf_load_embedded_file_contents(ctx, fs); + break; + } + } + } + fz_always(ctx) + fz_free(ctx, fname); + fz_catch(ctx) + fz_rethrow(ctx); + + return buf; +} + +const char * +pdf_zugferd_profile_to_string(fz_context *ctx, enum pdf_zugferd_profile profile) +{ + static const char *strings[] = + { + "NOT ZUGFERD", + "COMFORT", + "BASIC", + "EXTENDED", + "BASIC WL", + "MINIMUM", + "XRECHNUNG", + "UNKNOWN" + }; + + if (profile < PDF_NOT_ZUGFERD || profile > PDF_ZUGFERD_UNKNOWN) + profile = PDF_ZUGFERD_UNKNOWN; + + return strings[profile]; +}
