comparison mupdf-source/source/pdf/pdf-zugferd.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2024-2025 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "mupdf/pdf.h"
25
26 static const char *
27 tag_or_text(fz_xml *x, const char *find)
28 {
29 const char *text;
30 const char *f = strchr(find, ':');
31
32 /* If we find a : we have a namespace. Search for both with and
33 * without the namespace. */
34 if (f)
35 f++;
36
37 text = fz_xml_att(x, find);
38 if (text == NULL && f)
39 text = fz_xml_att(x, f);
40 if (text == NULL)
41 text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, find)));
42 if (text == NULL && f)
43 text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, f)));
44
45 return text;
46 }
47
48 static enum pdf_zugferd_profile
49 do_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version, char **fname)
50 {
51 pdf_obj *metadata = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Metadata), NULL);
52 fz_buffer *buf;
53 fz_xml *xml = NULL;
54 fz_xml *x;
55 enum pdf_zugferd_profile ret = PDF_NOT_ZUGFERD;
56
57 if (version)
58 *version = 0;
59 if (fname)
60 *fname = NULL;
61
62 if (metadata == NULL)
63 return PDF_NOT_ZUGFERD;
64
65 buf = pdf_load_stream(ctx, metadata);
66
67 fz_var(xml);
68
69 fz_try(ctx)
70 {
71 xml = fz_parse_xml(ctx, buf, 0);
72
73 /* Version 1. */
74 x = fz_xml_find_dfs(xml, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#");
75 if (x)
76 {
77 while (x)
78 {
79 /* The Version tag in the document appears to always be 1.0 */
80 const char *v = tag_or_text(x, "zf:Version");
81 const char *cl = tag_or_text(x, "zf:ConformanceLevel");
82 const char *df = tag_or_text(x, "zf:DocumentFileName");
83 const char *dt = tag_or_text(x, "zf:DocumentType");
84 if (v && dt && !strcmp(dt, "INVOICE"))
85 {
86 if (!cl)
87 fz_warn(ctx, "No conformance level specified");
88 else if (!strcmp(cl, "COMFORT"))
89 ret = PDF_ZUGFERD_COMFORT;
90 else if (!strcmp(cl, "BASIC"))
91 ret = PDF_ZUGFERD_BASIC;
92 else if (!strcmp(cl, "EXTENDED"))
93 ret = PDF_ZUGFERD_EXTENDED;
94
95 if (version)
96 *version = fz_atof(v);
97
98 if (!df)
99 fz_warn(ctx, "ZUGFeRD doc is missing filename");
100 else if (strcmp(df, "ZUGFeRD-invoice.xml"))
101 fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
102 if (fname && df)
103 *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
104 break;
105 }
106
107 x = fz_xml_find_next_dfs(x, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#");
108 }
109 break;
110 }
111
112 /* Version 2. */
113 x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#");
114 if (x)
115 {
116 while (x)
117 {
118 const char *v = tag_or_text(x, "fx:Version");
119 const char *cl = tag_or_text(x, "fx:ConformanceLevel");
120 const char *df = tag_or_text(x, "fx:DocumentFileName");
121 const char *dt = tag_or_text(x, "fx:DocumentType");
122 if (v && dt && !strcmp(dt, "INVOICE"))
123 {
124 if (!cl)
125 fz_warn(ctx, "No conformance level specified");
126 else if (!strcmp(cl, "EN 16931"))
127 ret = PDF_ZUGFERD_COMFORT;
128 else if (!strcmp(cl, "BASIC"))
129 ret = PDF_ZUGFERD_BASIC;
130 else if (!strcmp(cl, "EXTENDED"))
131 ret = PDF_ZUGFERD_EXTENDED;
132 else if (!strcmp(cl, "BASIC WL"))
133 ret = PDF_ZUGFERD_BASIC_WL;
134 else if (!strcmp(cl, "MINIMUM"))
135 ret = PDF_ZUGFERD_MINIMUM;
136
137 if (version)
138 *version = fz_atof(v);
139
140 if (!df)
141 fz_warn(ctx, "ZUGFeRD doc is missing filename");
142 else if (strcmp(df, "zugferd-invoice.xml"))
143 fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
144 if (fname && df)
145 *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
146 break;
147 }
148
149 x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#");
150 }
151 break;
152 }
153
154 /* Version 2.1 + 2.11 */
155 x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#");
156 if (x)
157 {
158 while (x)
159 {
160 const char *v = tag_or_text(x, "fx:Version");
161 const char *cl = tag_or_text(x, "fx:ConformanceLevel");
162 const char *df = tag_or_text(x, "fx:DocumentFileName");
163 const char *dt = tag_or_text(x, "fx:DocumentType");
164 if (v && dt && !strcmp(dt, "INVOICE"))
165 {
166 if (!cl)
167 fz_warn(ctx, "No conformance level specified");
168 else if (!strcmp(cl, "EN 16931"))
169 ret = PDF_ZUGFERD_COMFORT;
170 else if (!strcmp(cl, "BASIC"))
171 ret = PDF_ZUGFERD_BASIC;
172 else if (!strcmp(cl, "EXTENDED"))
173 ret = PDF_ZUGFERD_EXTENDED;
174 else if (!strcmp(cl, "BASIC WL"))
175 ret = PDF_ZUGFERD_BASIC_WL;
176 else if (!strcmp(cl, "MINIMUM"))
177 ret = PDF_ZUGFERD_MINIMUM;
178 else if (!strcmp(cl, "XRECHNUNG"))
179 ret = PDF_ZUGFERD_XRECHNUNG;
180
181 if (version)
182 *version = fz_atof(v);
183
184 if (!df)
185 fz_warn(ctx, "ZUGFeRD doc is missing filename");
186 else if (ret == PDF_ZUGFERD_XRECHNUNG && strcmp(df, "xrechnung.xml"))
187 fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
188 else if (ret != PDF_ZUGFERD_XRECHNUNG && strcmp(df, "factur-x.xml"))
189 fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
190 if (fname && df)
191 *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
192 break;
193 }
194
195 x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#");
196 }
197 break;
198 }
199 }
200 fz_always(ctx)
201 {
202 fz_drop_xml(ctx, xml);
203 fz_drop_buffer(ctx, buf);
204 }
205 fz_catch(ctx)
206 fz_rethrow(ctx);
207
208 return ret;
209 }
210
211 enum pdf_zugferd_profile pdf_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version)
212 {
213 return do_zugferd_profile(ctx, doc, version, NULL);
214 }
215
216 fz_buffer *pdf_zugferd_xml(fz_context *ctx, pdf_document *doc)
217 {
218 char *fname;
219 float version;
220 enum pdf_zugferd_profile p = do_zugferd_profile(ctx, doc, &version, &fname);
221 int count, i;
222 fz_buffer *buf = NULL;
223
224 if (p == PDF_NOT_ZUGFERD)
225 {
226 fz_free(ctx, fname);
227 return NULL;
228 }
229
230 fz_try(ctx)
231 {
232 count = pdf_count_document_associated_files(ctx, doc);
233 for (i = 0; i < count; i++)
234 {
235 pdf_obj *fs = pdf_document_associated_file(ctx, doc, i);
236 pdf_filespec_params params;
237
238 pdf_get_filespec_params(ctx, fs, &params);
239
240 if (!strcmp(fname, params.filename))
241 {
242 if (!pdf_is_embedded_file(ctx, fs))
243 fz_throw(ctx, FZ_ERROR_FORMAT, "ZUGFeRD XML was not embedded");
244
245 buf = pdf_load_embedded_file_contents(ctx, fs);
246 break;
247 }
248 }
249 }
250 fz_always(ctx)
251 fz_free(ctx, fname);
252 fz_catch(ctx)
253 fz_rethrow(ctx);
254
255 return buf;
256 }
257
258 const char *
259 pdf_zugferd_profile_to_string(fz_context *ctx, enum pdf_zugferd_profile profile)
260 {
261 static const char *strings[] =
262 {
263 "NOT ZUGFERD",
264 "COMFORT",
265 "BASIC",
266 "EXTENDED",
267 "BASIC WL",
268 "MINIMUM",
269 "XRECHNUNG",
270 "UNKNOWN"
271 };
272
273 if (profile < PDF_NOT_ZUGFERD || profile > PDF_ZUGFERD_UNKNOWN)
274 profile = PDF_ZUGFERD_UNKNOWN;
275
276 return strings[profile];
277 }