comparison mupdf-source/source/xps/xps-doc.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2024 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 #include "mupdf/fitz.h"
24 #include "xps-imp.h"
25
26 #include <string.h>
27 #include <stdlib.h>
28
29 #define REL_START_PART \
30 "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation"
31 #define REL_DOC_STRUCTURE \
32 "http://schemas.microsoft.com/xps/2005/06/documentstructure"
33 #define REL_REQUIRED_RESOURCE \
34 "http://schemas.microsoft.com/xps/2005/06/required-resource"
35 #define REL_REQUIRED_RESOURCE_RECURSIVE \
36 "http://schemas.microsoft.com/xps/2005/06/required-resource#recursive"
37
38 #define REL_START_PART_OXPS \
39 "http://schemas.openxps.org/oxps/v1.0/fixedrepresentation"
40 #define REL_DOC_STRUCTURE_OXPS \
41 "http://schemas.openxps.org/oxps/v1.0/documentstructure"
42
43 static void
44 xps_rels_for_part(fz_context *ctx, xps_document *doc, char *buf, char *name, int buflen)
45 {
46 char *p, *basename;
47 p = strrchr(name, '/');
48 basename = p ? p + 1 : name;
49 fz_strlcpy(buf, name, buflen);
50 p = strrchr(buf, '/');
51 if (p) *p = 0;
52 fz_strlcat(buf, "/_rels/", buflen);
53 fz_strlcat(buf, basename, buflen);
54 fz_strlcat(buf, ".rels", buflen);
55 }
56
57 /*
58 * The FixedDocumentSequence and FixedDocument parts determine
59 * which parts correspond to actual pages, and the page order.
60 */
61
62 static void
63 xps_add_fixed_document(fz_context *ctx, xps_document *doc, char *name)
64 {
65 xps_fixdoc *fixdoc;
66
67 /* Check for duplicates first */
68 for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
69 if (!strcmp(fixdoc->name, name))
70 return;
71
72 fixdoc = fz_malloc_struct(ctx, xps_fixdoc);
73 fz_try(ctx)
74 {
75 fixdoc->name = fz_strdup(ctx, name);
76 fixdoc->outline = NULL;
77 fixdoc->next = NULL;
78 }
79 fz_catch(ctx)
80 {
81 fz_free(ctx, fixdoc);
82 fz_rethrow(ctx);
83 }
84
85 if (!doc->first_fixdoc)
86 {
87 doc->first_fixdoc = fixdoc;
88 doc->last_fixdoc = fixdoc;
89 }
90 else
91 {
92 doc->last_fixdoc->next = fixdoc;
93 doc->last_fixdoc = fixdoc;
94 }
95 }
96
97 static void
98 xps_add_fixed_page(fz_context *ctx, xps_document *doc, char *name, int width, int height)
99 {
100 xps_fixpage *page;
101
102 /* Check for duplicates first */
103 for (page = doc->first_page; page; page = page->next)
104 if (!strcmp(page->name, name))
105 return;
106
107 page = fz_malloc_struct(ctx, xps_fixpage);
108 page->name = NULL;
109
110 fz_try(ctx)
111 {
112 page->name = fz_strdup(ctx, name);
113 page->number = doc->page_count++;
114 page->width = width;
115 page->height = height;
116 page->next = NULL;
117 }
118 fz_catch(ctx)
119 {
120 fz_free(ctx, page->name);
121 fz_free(ctx, page);
122 fz_rethrow(ctx);
123 }
124
125 if (!doc->first_page)
126 {
127 doc->first_page = page;
128 doc->last_page = page;
129 }
130 else
131 {
132 doc->last_page->next = page;
133 doc->last_page = page;
134 }
135 }
136
137 static void
138 xps_add_link_target(fz_context *ctx, xps_document *doc, char *name)
139 {
140 xps_fixpage *page = doc->last_page;
141 xps_target *target;
142
143 if (page == NULL)
144 {
145 fz_warn(ctx, "Dropping link target with no page");
146 return;
147 }
148
149 target = fz_malloc_struct(ctx, xps_target);
150
151 fz_try(ctx)
152 {
153 target->name = fz_strdup(ctx, name);
154 target->page = page->number;
155 target->next = doc->target;
156 }
157 fz_catch(ctx)
158 {
159 fz_free(ctx, target);
160 fz_rethrow(ctx);
161 }
162
163 doc->target = target;
164 }
165
166 fz_link_dest
167 xps_lookup_link_target(fz_context *ctx, fz_document *doc_, const char *target_uri)
168 {
169 xps_document *doc = (xps_document*)doc_;
170 xps_target *target;
171 const char *needle = strrchr(target_uri, '#');
172 needle = needle ? needle + 1 : target_uri;
173 for (target = doc->target; target; target = target->next)
174 if (!strcmp(target->name, needle))
175 return fz_make_link_dest_xyz(0, target->page, 0, 0, 0);
176 return fz_make_link_dest_xyz(0, fz_atoi(needle) - 1, 0, 0, 0);
177 }
178
179 static void
180 xps_drop_link_targets(fz_context *ctx, xps_document *doc)
181 {
182 xps_target *target = doc->target, *next;
183 while (target)
184 {
185 next = target->next;
186 fz_free(ctx, target->name);
187 fz_free(ctx, target);
188 target = next;
189 }
190 }
191
192 static void
193 xps_drop_fixed_pages(fz_context *ctx, xps_document *doc)
194 {
195 xps_fixpage *page = doc->first_page;
196 while (page)
197 {
198 xps_fixpage *next = page->next;
199 fz_free(ctx, page->name);
200 fz_free(ctx, page);
201 page = next;
202 }
203 doc->first_page = NULL;
204 doc->last_page = NULL;
205 }
206
207 static void
208 xps_drop_fixed_documents(fz_context *ctx, xps_document *doc)
209 {
210 xps_fixdoc *fixdoc = doc->first_fixdoc;
211 while (fixdoc)
212 {
213 xps_fixdoc *next = fixdoc->next;
214 fz_free(ctx, fixdoc->name);
215 fz_free(ctx, fixdoc->outline);
216 fz_free(ctx, fixdoc);
217 fixdoc = next;
218 }
219 doc->first_fixdoc = NULL;
220 doc->last_fixdoc = NULL;
221 }
222
223 void
224 xps_drop_page_list(fz_context *ctx, xps_document *doc)
225 {
226 xps_drop_fixed_documents(ctx, doc);
227 xps_drop_fixed_pages(ctx, doc);
228 xps_drop_link_targets(ctx, doc);
229 }
230
231 /*
232 * Parse the fixed document sequence structure and _rels/.rels to find the start part.
233 */
234
235 static void
236 xps_parse_metadata_imp(fz_context *ctx, xps_document *doc, fz_xml *item, xps_fixdoc *fixdoc)
237 {
238 while (item)
239 {
240 if (fz_xml_is_tag(item, "Relationship"))
241 {
242 char *target = fz_xml_att(item, "Target");
243 char *type = fz_xml_att(item, "Type");
244 if (target && type)
245 {
246 char tgtbuf[1024];
247 xps_resolve_url(ctx, doc, tgtbuf, doc->base_uri, target, sizeof tgtbuf);
248 if (!strcmp(type, REL_START_PART) || !strcmp(type, REL_START_PART_OXPS))
249 {
250 fz_free(ctx, doc->start_part);
251 doc->start_part = fz_strdup(ctx, tgtbuf);
252 }
253 if ((!strcmp(type, REL_DOC_STRUCTURE) || !strcmp(type, REL_DOC_STRUCTURE_OXPS)) && fixdoc)
254 fixdoc->outline = fz_strdup(ctx, tgtbuf);
255 if (!fz_xml_att(item, "Id"))
256 fz_warn(ctx, "missing relationship id for %s", target);
257 }
258 }
259
260 if (fz_xml_is_tag(item, "DocumentReference"))
261 {
262 char *source = fz_xml_att(item, "Source");
263 if (source)
264 {
265 char srcbuf[1024];
266 xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
267 xps_add_fixed_document(ctx, doc, srcbuf);
268 }
269 }
270
271 if (fz_xml_is_tag(item, "PageContent"))
272 {
273 char *source = fz_xml_att(item, "Source");
274 char *width_att = fz_xml_att(item, "Width");
275 char *height_att = fz_xml_att(item, "Height");
276 int width = width_att ? atoi(width_att) : 0;
277 int height = height_att ? atoi(height_att) : 0;
278 if (source)
279 {
280 char srcbuf[1024];
281 xps_resolve_url(ctx, doc, srcbuf, doc->base_uri, source, sizeof srcbuf);
282 xps_add_fixed_page(ctx, doc, srcbuf, width, height);
283 }
284 }
285
286 if (fz_xml_is_tag(item, "LinkTarget"))
287 {
288 char *name = fz_xml_att(item, "Name");
289 if (name)
290 xps_add_link_target(ctx, doc, name);
291 }
292
293 xps_parse_metadata_imp(ctx, doc, fz_xml_down(item), fixdoc);
294
295 item = fz_xml_next(item);
296 }
297 }
298
299 static void
300 xps_parse_metadata(fz_context *ctx, xps_document *doc, xps_part *part, xps_fixdoc *fixdoc)
301 {
302 fz_xml_doc *xml;
303 char buf[1024];
304 char *s;
305
306 /* Save directory name part */
307 fz_strlcpy(buf, part->name, sizeof buf);
308 s = strrchr(buf, '/');
309 if (s)
310 s[0] = 0;
311
312 /* _rels parts are voodoo: their URI references are from
313 * the part they are associated with, not the actual _rels
314 * part being parsed.
315 */
316 s = strstr(buf, "/_rels");
317 if (s)
318 *s = 0;
319
320 doc->base_uri = buf;
321 doc->part_uri = part->name;
322
323 xml = fz_parse_xml(ctx, part->data, 0);
324 fz_try(ctx)
325 {
326 xps_parse_metadata_imp(ctx, doc, fz_xml_root(xml), fixdoc);
327 }
328 fz_always(ctx)
329 {
330 fz_drop_xml(ctx, xml);
331 doc->base_uri = NULL;
332 doc->part_uri = NULL;
333 }
334 fz_catch(ctx)
335 fz_rethrow(ctx);
336 }
337
338 static void
339 xps_read_and_process_metadata_part(fz_context *ctx, xps_document *doc, char *name, xps_fixdoc *fixdoc)
340 {
341 xps_part *part;
342
343 if (!xps_has_part(ctx, doc, name))
344 return;
345
346 part = xps_read_part(ctx, doc, name);
347 fz_try(ctx)
348 {
349 xps_parse_metadata(ctx, doc, part, fixdoc);
350 }
351 fz_always(ctx)
352 {
353 xps_drop_part(ctx, doc, part);
354 }
355 fz_catch(ctx)
356 {
357 fz_rethrow(ctx);
358 }
359 }
360
361 void
362 xps_read_page_list(fz_context *ctx, xps_document *doc)
363 {
364 xps_fixdoc *fixdoc;
365
366 xps_read_and_process_metadata_part(ctx, doc, "/_rels/.rels", NULL);
367
368 if (!doc->start_part)
369 fz_throw(ctx, FZ_ERROR_FORMAT, "cannot find fixed document sequence start part");
370
371 xps_read_and_process_metadata_part(ctx, doc, doc->start_part, NULL);
372
373 for (fixdoc = doc->first_fixdoc; fixdoc; fixdoc = fixdoc->next)
374 {
375 char relbuf[1024];
376 fz_try(ctx)
377 {
378 xps_rels_for_part(ctx, doc, relbuf, fixdoc->name, sizeof relbuf);
379 xps_read_and_process_metadata_part(ctx, doc, relbuf, fixdoc);
380 }
381 fz_catch(ctx)
382 {
383 fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
384 fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
385 fz_report_error(ctx);
386 fz_warn(ctx, "cannot process FixedDocument rels part");
387 }
388 xps_read_and_process_metadata_part(ctx, doc, fixdoc->name, fixdoc);
389 }
390 }
391
392 int
393 xps_count_pages(fz_context *ctx, fz_document *doc_, int chapter)
394 {
395 xps_document *doc = (xps_document*)doc_;
396 return doc->page_count;
397 }
398
399 static fz_xml_doc *
400 xps_load_fixed_page(fz_context *ctx, xps_document *doc, xps_fixpage *page)
401 {
402 xps_part *part;
403 fz_xml_doc *xml = NULL;
404 fz_xml *root;
405 char *width_att;
406 char *height_att;
407
408 part = xps_read_part(ctx, doc, page->name);
409 fz_try(ctx)
410 {
411 xml = fz_parse_xml(ctx, part->data, 0);
412
413 root = fz_xml_root(xml);
414 if (!root)
415 fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing root element");
416
417 if (fz_xml_is_tag(root, "AlternateContent"))
418 {
419 fz_xml *node = xps_lookup_alternate_content(ctx, doc, root);
420 if (!node)
421 fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing alternate root element");
422 fz_detach_xml(ctx, node);
423 root = node;
424 }
425
426 if (!fz_xml_is_tag(root, "FixedPage"))
427 fz_throw(ctx, FZ_ERROR_FORMAT, "expected FixedPage element");
428 width_att = fz_xml_att(root, "Width");
429 if (!width_att)
430 fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Width");
431 height_att = fz_xml_att(root, "Height");
432 if (!height_att)
433 fz_throw(ctx, FZ_ERROR_FORMAT, "FixedPage missing required attribute: Height");
434
435 page->width = atoi(width_att);
436 page->height = atoi(height_att);
437 }
438 fz_always(ctx)
439 {
440 xps_drop_part(ctx, doc, part);
441 }
442 fz_catch(ctx)
443 {
444 fz_drop_xml(ctx, xml);
445 fz_rethrow(ctx);
446 }
447
448 return xml;
449 }
450
451 static fz_rect
452 xps_bound_page(fz_context *ctx, fz_page *page_, fz_box_type box)
453 {
454 xps_page *page = (xps_page*)page_;
455 fz_rect bounds;
456 bounds.x0 = bounds.y0 = 0;
457 bounds.x1 = page->fix->width * 72.0f / 96.0f;
458 bounds.y1 = page->fix->height * 72.0f / 96.0f;
459 return bounds;
460 }
461
462 static void
463 xps_drop_page_imp(fz_context *ctx, fz_page *page_)
464 {
465 xps_page *page = (xps_page*)page_;
466 fz_drop_xml(ctx, page->xml);
467 }
468
469 fz_page *
470 xps_load_page(fz_context *ctx, fz_document *doc_, int chapter, int number)
471 {
472 xps_document *doc = (xps_document*)doc_;
473 xps_page *page = NULL;
474 xps_fixpage *fix;
475 fz_xml_doc *xml;
476 int n = 0;
477
478 fz_var(page);
479
480 for (fix = doc->first_page; fix; fix = fix->next)
481 {
482 if (n == number)
483 {
484 xml = xps_load_fixed_page(ctx, doc, fix);
485 fz_try(ctx)
486 {
487 page = fz_new_derived_page(ctx, xps_page, doc_);
488 page->super.load_links = xps_load_links;
489 page->super.bound_page = xps_bound_page;
490 page->super.run_page_contents = xps_run_page;
491 page->super.drop_page = xps_drop_page_imp;
492
493 page->fix = fix;
494 page->xml = xml;
495 }
496 fz_catch(ctx)
497 {
498 fz_drop_xml(ctx, xml);
499 fz_rethrow(ctx);
500 }
501 return (fz_page*)page;
502 }
503 n ++;
504 }
505
506 fz_throw(ctx, FZ_ERROR_ARGUMENT, "cannot find page %d", number + 1);
507 }
508
509 static const char *xps_extensions[] =
510 {
511 "oxps",
512 "xps",
513 NULL
514 };
515
516 static const char *xps_mimetypes[] =
517 {
518 "application/oxps",
519 "application/vnd.ms-xpsdocument",
520 "application/xps",
521 NULL
522 };
523
524 static int
525 xps_recognize_doc_content(fz_context *ctx, const fz_document_handler *handler, fz_stream *stream, fz_archive *dir, void **state, fz_document_recognize_state_free_fn **free_state)
526 {
527 fz_archive *arch = NULL;
528 int ret = 0;
529 fz_xml *xml = NULL;
530 fz_xml *pos;
531
532 if (state)
533 *state = NULL;
534 if (free_state)
535 *free_state = NULL;
536
537 fz_var(arch);
538 fz_var(ret);
539 fz_var(xml);
540
541 fz_try(ctx)
542 {
543 int i, count;
544 const char *name;
545
546 if (stream == NULL)
547 arch = fz_keep_archive(ctx, dir);
548 else
549 {
550 arch = fz_try_open_archive_with_stream(ctx, stream);
551 if (arch == NULL)
552 break;
553 }
554
555 xml = fz_try_parse_xml_archive_entry(ctx, arch, "/_rels/.rels", 0);
556 if (xml == NULL)
557 xml = fz_try_parse_xml_archive_entry(ctx, arch, "\\_rels\\.rels", 0);
558
559 if (xml)
560 {
561 pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
562 if (pos)
563 ret = 100;
564 break;
565 }
566
567 /* Cope with tricksy XPS's have the rels in multiple bits. */
568 count = fz_count_archive_entries(ctx, arch);
569
570 for (i = 0; i < count; i++)
571 {
572 name = fz_list_archive_entry(ctx, arch, i);
573 if (!name)
574 continue;
575 if (strncmp(name, "/_rels/.rels/", 13) == 0 ||
576 strncmp(name, "_rels/.rels/", 12) == 0 ||
577 strncmp(name, "\\_rels\\.rels\\", 13) == 0 ||
578 strncmp(name, "_rels\\.rels\\", 12) == 0)
579 {
580 xml = fz_try_parse_xml_archive_entry(ctx, arch, name, 0);
581 if (xml)
582 {
583 pos = fz_xml_find_dfs(xml, "Relationship", "Type", "http://schemas.microsoft.com/xps/2005/06/fixedrepresentation");
584 if (pos)
585 {
586 ret = 100;
587 break;
588 }
589 fz_drop_xml(ctx, xml);
590 xml = NULL;
591 }
592 }
593 }
594 }
595 fz_always(ctx)
596 {
597 fz_drop_xml(ctx, xml);
598 fz_drop_archive(ctx, arch);
599 }
600 fz_catch(ctx)
601 fz_rethrow(ctx);
602
603 return ret;
604 }
605
606 static fz_document *
607 xps_open(fz_context *ctx, const fz_document_handler *handler, fz_stream *file, fz_stream *accel, fz_archive *dir, void *state)
608 {
609 if (file)
610 return xps_open_document_with_stream(ctx, file);
611 else
612 return xps_open_document_with_directory(ctx, dir);
613 }
614
615 fz_document_handler xps_document_handler =
616 {
617 NULL,
618 xps_open,
619 xps_extensions,
620 xps_mimetypes,
621 xps_recognize_doc_content
622 };