comparison mupdf-source/source/tools/pdfinfo.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 // Copyright (C) 2004-2021 Artifex Software, Inc.
2 //
3 // This file is part of MuPDF.
4 //
5 // MuPDF is free software: you can redistribute it and/or modify it under the
6 // terms of the GNU Affero General Public License as published by the Free
7 // Software Foundation, either version 3 of the License, or (at your option)
8 // any later version.
9 //
10 // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13 // details.
14 //
15 // You should have received a copy of the GNU Affero General Public License
16 // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17 //
18 // Alternative licensing terms are available from the licensor.
19 // For commercial licensing, see <https://www.artifex.com/> or contact
20 // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21 // CA 94129, USA, for further information.
22
23 /*
24 * Information tool.
25 * Print information about the input pdf.
26 */
27
28 #include "mupdf/fitz.h"
29 #include "mupdf/pdf.h"
30
31 #include <string.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34
35 enum
36 {
37 DIMENSIONS = 0x01,
38 FONTS = 0x02,
39 IMAGES = 0x04,
40 SHADINGS = 0x08,
41 PATTERNS = 0x10,
42 XOBJS = 0x20,
43 ZUGFERD = 0x40,
44 ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS | ZUGFERD
45 };
46
47 struct info
48 {
49 int page;
50 pdf_obj *pageref;
51 union {
52 struct {
53 pdf_obj *obj;
54 } info;
55 struct {
56 pdf_obj *obj;
57 } crypt;
58 struct {
59 pdf_obj *obj;
60 fz_rect *bbox;
61 } dim;
62 struct {
63 pdf_obj *obj;
64 pdf_obj *subtype;
65 pdf_obj *name;
66 pdf_obj *encoding;
67 } font;
68 struct {
69 pdf_obj *obj;
70 pdf_obj *width;
71 pdf_obj *height;
72 pdf_obj *bpc;
73 pdf_obj *filter;
74 pdf_obj *cs;
75 pdf_obj *altcs;
76 } image;
77 struct {
78 pdf_obj *obj;
79 pdf_obj *type;
80 } shading;
81 struct {
82 pdf_obj *obj;
83 pdf_obj *type;
84 pdf_obj *paint;
85 pdf_obj *tiling;
86 pdf_obj *shading;
87 } pattern;
88 struct {
89 pdf_obj *obj;
90 pdf_obj *groupsubtype;
91 pdf_obj *reference;
92 } form;
93 } u;
94 };
95
96 typedef struct
97 {
98 pdf_document *doc;
99 fz_context *ctx;
100 fz_output *out;
101 int pagecount;
102 struct info *dim;
103 int dims;
104 struct info *font;
105 int fonts;
106 struct info *image;
107 int images;
108 struct info *shading;
109 int shadings;
110 struct info *pattern;
111 int patterns;
112 struct info *form;
113 int forms;
114 struct info *psobj;
115 int psobjs;
116 } globals;
117
118 static void clearinfo(fz_context *ctx, globals *glo)
119 {
120 int i;
121
122 if (glo->dim)
123 {
124 for (i = 0; i < glo->dims; i++)
125 fz_free(ctx, glo->dim[i].u.dim.bbox);
126 fz_free(ctx, glo->dim);
127 glo->dim = NULL;
128 glo->dims = 0;
129 }
130
131 if (glo->font)
132 {
133 fz_free(ctx, glo->font);
134 glo->font = NULL;
135 glo->fonts = 0;
136 }
137
138 if (glo->image)
139 {
140 fz_free(ctx, glo->image);
141 glo->image = NULL;
142 glo->images = 0;
143 }
144
145 if (glo->shading)
146 {
147 fz_free(ctx, glo->shading);
148 glo->shading = NULL;
149 glo->shadings = 0;
150 }
151
152 if (glo->pattern)
153 {
154 fz_free(ctx, glo->pattern);
155 glo->pattern = NULL;
156 glo->patterns = 0;
157 }
158
159 if (glo->form)
160 {
161 fz_free(ctx, glo->form);
162 glo->form = NULL;
163 glo->forms = 0;
164 }
165
166 if (glo->psobj)
167 {
168 fz_free(ctx, glo->psobj);
169 glo->psobj = NULL;
170 glo->psobjs = 0;
171 }
172 }
173
174 static void closexref(fz_context *ctx, globals *glo)
175 {
176 if (glo->doc)
177 {
178 pdf_drop_document(ctx, glo->doc);
179 glo->doc = NULL;
180 }
181
182 clearinfo(ctx, glo);
183 }
184
185 static void
186 infousage(void)
187 {
188 fprintf(stderr,
189 "usage: mutool info [options] file.pdf [pages]\n"
190 "\t-p -\tpassword for decryption\n"
191 "\t-F\tlist fonts\n"
192 "\t-I\tlist images\n"
193 "\t-M\tlist dimensions\n"
194 "\t-P\tlist patterns\n"
195 "\t-S\tlist shadings\n"
196 "\t-X\tlist form and postscript xobjects\n"
197 "\t-Z\tlist ZUGFeRD info\n"
198 "\tpages\tcomma separated list of page numbers and ranges\n"
199 );
200 }
201
202 static void
203 showglobalinfo(fz_context *ctx, globals *glo)
204 {
205 pdf_obj *obj;
206 fz_output *out = glo->out;
207 pdf_document *doc = glo->doc;
208 int version = pdf_version(ctx, doc);
209
210 fz_write_printf(ctx, out, "\nPDF-%d.%d\n", version / 10, version % 10);
211
212 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
213 if (obj)
214 {
215 fz_write_printf(ctx, out, "Info object (%d 0 R):\n", pdf_to_num(ctx, obj));
216 pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
217 }
218
219 obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
220 if (obj)
221 {
222 fz_write_printf(ctx, out, "\nEncryption object (%d 0 R):\n", pdf_to_num(ctx, obj));
223 pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
224 }
225
226 fz_write_printf(ctx, out, "\nPages: %d\n\n", glo->pagecount);
227 }
228
229 static void
230 gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *pageref)
231 {
232 fz_rect bbox;
233 pdf_obj *obj;
234 float unit;
235 int j;
236
237 obj = pdf_dict_get(ctx, pageref, PDF_NAME(MediaBox));
238 if (!pdf_is_array(ctx, obj))
239 return;
240
241 bbox = pdf_to_rect(ctx, obj);
242
243 unit = pdf_dict_get_real_default(ctx, pageref, PDF_NAME(UserUnit), 1);
244 bbox.x0 *= unit;
245 bbox.y0 *= unit;
246 bbox.x1 *= unit;
247 bbox.y1 *= unit;
248
249 for (j = 0; j < glo->dims; j++)
250 if (!memcmp(glo->dim[j].u.dim.bbox, &bbox, sizeof (fz_rect)))
251 break;
252
253 if (j < glo->dims)
254 return;
255
256 glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info);
257 glo->dims++;
258
259 glo->dim[glo->dims - 1].page = page;
260 glo->dim[glo->dims - 1].pageref = pageref;
261 glo->dim[glo->dims - 1].u.dim.bbox = NULL;
262 glo->dim[glo->dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect));
263 memcpy(glo->dim[glo->dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect));
264
265 return;
266 }
267
268 static void
269 gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
270 {
271 int i, n;
272
273 n = pdf_dict_len(ctx, dict);
274 for (i = 0; i < n; i++)
275 {
276 pdf_obj *fontdict = NULL;
277 pdf_obj *subtype = NULL;
278 pdf_obj *basefont = NULL;
279 pdf_obj *name = NULL;
280 pdf_obj *encoding = NULL;
281 int k;
282
283 fontdict = pdf_dict_get_val(ctx, dict, i);
284 if (!pdf_is_dict(ctx, fontdict))
285 {
286 fz_warn(ctx, "not a font dict (%d 0 R)", pdf_to_num(ctx, fontdict));
287 continue;
288 }
289
290 subtype = pdf_dict_get(ctx, fontdict, PDF_NAME(Subtype));
291 basefont = pdf_dict_get(ctx, fontdict, PDF_NAME(BaseFont));
292 if (!basefont || pdf_is_null(ctx, basefont))
293 name = pdf_dict_get(ctx, fontdict, PDF_NAME(Name));
294 encoding = pdf_dict_get(ctx, fontdict, PDF_NAME(Encoding));
295 if (pdf_is_dict(ctx, encoding))
296 encoding = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
297
298 for (k = 0; k < glo->fonts; k++)
299 if (!pdf_objcmp(ctx, glo->font[k].u.font.obj, fontdict))
300 break;
301
302 if (k < glo->fonts)
303 continue;
304
305 glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info);
306 glo->fonts++;
307
308 glo->font[glo->fonts - 1].page = page;
309 glo->font[glo->fonts - 1].pageref = pageref;
310 glo->font[glo->fonts - 1].u.font.obj = fontdict;
311 glo->font[glo->fonts - 1].u.font.subtype = subtype;
312 glo->font[glo->fonts - 1].u.font.name = basefont ? basefont : name;
313 glo->font[glo->fonts - 1].u.font.encoding = encoding;
314 }
315 }
316
317 static void
318 gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
319 {
320 int i, n;
321
322 n = pdf_dict_len(ctx, dict);
323 for (i = 0; i < n; i++)
324 {
325 pdf_obj *imagedict;
326 pdf_obj *type;
327 pdf_obj *width;
328 pdf_obj *height;
329 pdf_obj *bpc = NULL;
330 pdf_obj *filter = NULL;
331 pdf_obj *cs = NULL;
332 pdf_obj *altcs;
333 int k;
334
335 imagedict = pdf_dict_get_val(ctx, dict, i);
336 if (!pdf_is_dict(ctx, imagedict))
337 {
338 fz_warn(ctx, "not an image dict (%d 0 R)", pdf_to_num(ctx, imagedict));
339 continue;
340 }
341
342 type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype));
343 if (!pdf_name_eq(ctx, type, PDF_NAME(Image)))
344 continue;
345
346 filter = pdf_dict_get(ctx, imagedict, PDF_NAME(Filter));
347
348 altcs = NULL;
349 cs = pdf_dict_get(ctx, imagedict, PDF_NAME(ColorSpace));
350 if (pdf_is_array(ctx, cs))
351 {
352 pdf_obj *cses = cs;
353
354 cs = pdf_array_get(ctx, cses, 0);
355 if (pdf_name_eq(ctx, cs, PDF_NAME(DeviceN)) || pdf_name_eq(ctx, cs, PDF_NAME(Separation)))
356 {
357 altcs = pdf_array_get(ctx, cses, 2);
358 if (pdf_is_array(ctx, altcs))
359 altcs = pdf_array_get(ctx, altcs, 0);
360 }
361 }
362
363 width = pdf_dict_get(ctx, imagedict, PDF_NAME(Width));
364 height = pdf_dict_get(ctx, imagedict, PDF_NAME(Height));
365 bpc = pdf_dict_get(ctx, imagedict, PDF_NAME(BitsPerComponent));
366
367 for (k = 0; k < glo->images; k++)
368 if (!pdf_objcmp(ctx, glo->image[k].u.image.obj, imagedict))
369 break;
370
371 if (k < glo->images)
372 continue;
373
374 glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info);
375 glo->images++;
376
377 glo->image[glo->images - 1].page = page;
378 glo->image[glo->images - 1].pageref = pageref;
379 glo->image[glo->images - 1].u.image.obj = imagedict;
380 glo->image[glo->images - 1].u.image.width = width;
381 glo->image[glo->images - 1].u.image.height = height;
382 glo->image[glo->images - 1].u.image.bpc = bpc;
383 glo->image[glo->images - 1].u.image.filter = filter;
384 glo->image[glo->images - 1].u.image.cs = cs;
385 glo->image[glo->images - 1].u.image.altcs = altcs;
386 }
387 }
388
389 static void
390 gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
391 {
392 int i, n;
393
394 n = pdf_dict_len(ctx, dict);
395 for (i = 0; i < n; i++)
396 {
397 pdf_obj *xobjdict;
398 pdf_obj *type;
399 pdf_obj *subtype;
400 pdf_obj *group;
401 pdf_obj *groupsubtype;
402 pdf_obj *reference;
403 int k;
404
405 xobjdict = pdf_dict_get_val(ctx, dict, i);
406 if (!pdf_is_dict(ctx, xobjdict))
407 {
408 fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
409 continue;
410 }
411
412 type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
413 if (!pdf_name_eq(ctx, type, PDF_NAME(Form)))
414 continue;
415
416 subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
417 if (!pdf_name_eq(ctx, subtype, PDF_NAME(PS)))
418 continue;
419
420 group = pdf_dict_get(ctx, xobjdict, PDF_NAME(Group));
421 groupsubtype = pdf_dict_get(ctx, group, PDF_NAME(S));
422 reference = pdf_dict_get(ctx, xobjdict, PDF_NAME(Ref));
423
424 for (k = 0; k < glo->forms; k++)
425 if (!pdf_objcmp(ctx, glo->form[k].u.form.obj, xobjdict))
426 break;
427
428 if (k < glo->forms)
429 continue;
430
431 glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info);
432 glo->forms++;
433
434 glo->form[glo->forms - 1].page = page;
435 glo->form[glo->forms - 1].pageref = pageref;
436 glo->form[glo->forms - 1].u.form.obj = xobjdict;
437 glo->form[glo->forms - 1].u.form.groupsubtype = groupsubtype;
438 glo->form[glo->forms - 1].u.form.reference = reference;
439 }
440 }
441
442 static void
443 gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
444 {
445 int i, n;
446
447 n = pdf_dict_len(ctx, dict);
448 for (i = 0; i < n; i++)
449 {
450 pdf_obj *xobjdict;
451 pdf_obj *type;
452 pdf_obj *subtype;
453 int k;
454
455 xobjdict = pdf_dict_get_val(ctx, dict, i);
456 if (!pdf_is_dict(ctx, xobjdict))
457 {
458 fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
459 continue;
460 }
461
462 type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
463 subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
464 if (!pdf_name_eq(ctx, type, PDF_NAME(PS)) &&
465 (!pdf_name_eq(ctx, type, PDF_NAME(Form)) || !pdf_name_eq(ctx, subtype, PDF_NAME(PS))))
466 continue;
467
468 for (k = 0; k < glo->psobjs; k++)
469 if (!pdf_objcmp(ctx, glo->psobj[k].u.form.obj, xobjdict))
470 break;
471
472 if (k < glo->psobjs)
473 continue;
474
475 glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info);
476 glo->psobjs++;
477
478 glo->psobj[glo->psobjs - 1].page = page;
479 glo->psobj[glo->psobjs - 1].pageref = pageref;
480 glo->psobj[glo->psobjs - 1].u.form.obj = xobjdict;
481 }
482 }
483
484 static void
485 gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
486 {
487 int i, n;
488
489 n = pdf_dict_len(ctx, dict);
490 for (i = 0; i < n; i++)
491 {
492 pdf_obj *shade;
493 pdf_obj *type;
494 int k;
495
496 shade = pdf_dict_get_val(ctx, dict, i);
497 if (!pdf_is_dict(ctx, shade))
498 {
499 fz_warn(ctx, "not a shading dict (%d 0 R)", pdf_to_num(ctx, shade));
500 continue;
501 }
502
503 type = pdf_dict_get(ctx, shade, PDF_NAME(ShadingType));
504 if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 7)
505 {
506 fz_warn(ctx, "not a shading type (%d 0 R)", pdf_to_num(ctx, shade));
507 type = NULL;
508 }
509
510 for (k = 0; k < glo->shadings; k++)
511 if (!pdf_objcmp(ctx, glo->shading[k].u.shading.obj, shade))
512 break;
513
514 if (k < glo->shadings)
515 continue;
516
517 glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info);
518 glo->shadings++;
519
520 glo->shading[glo->shadings - 1].page = page;
521 glo->shading[glo->shadings - 1].pageref = pageref;
522 glo->shading[glo->shadings - 1].u.shading.obj = shade;
523 glo->shading[glo->shadings - 1].u.shading.type = type;
524 }
525 }
526
527 static void
528 gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
529 {
530 int i, n;
531
532 n = pdf_dict_len(ctx, dict);
533 for (i = 0; i < n; i++)
534 {
535 pdf_obj *patterndict;
536 pdf_obj *type;
537 pdf_obj *paint = NULL;
538 pdf_obj *tiling = NULL;
539 pdf_obj *shading = NULL;
540 int k;
541
542 patterndict = pdf_dict_get_val(ctx, dict, i);
543 if (!pdf_is_dict(ctx, patterndict))
544 {
545 fz_warn(ctx, "not a pattern dict (%d 0 R)", pdf_to_num(ctx, patterndict));
546 continue;
547 }
548
549 type = pdf_dict_get(ctx, patterndict, PDF_NAME(PatternType));
550 if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 2)
551 {
552 fz_warn(ctx, "not a pattern type (%d 0 R)", pdf_to_num(ctx, patterndict));
553 type = NULL;
554 }
555
556 if (pdf_to_int(ctx, type) == 1)
557 {
558 paint = pdf_dict_get(ctx, patterndict, PDF_NAME(PaintType));
559 if (!pdf_is_int(ctx, paint) || pdf_to_int(ctx, paint) < 1 || pdf_to_int(ctx, paint) > 2)
560 {
561 fz_warn(ctx, "not a pattern paint type (%d 0 R)", pdf_to_num(ctx, patterndict));
562 paint = NULL;
563 }
564
565 tiling = pdf_dict_get(ctx, patterndict, PDF_NAME(TilingType));
566 if (!pdf_is_int(ctx, tiling) || pdf_to_int(ctx, tiling) < 1 || pdf_to_int(ctx, tiling) > 3)
567 {
568 fz_warn(ctx, "not a pattern tiling type (%d 0 R)", pdf_to_num(ctx, patterndict));
569 tiling = NULL;
570 }
571 }
572 else
573 {
574 shading = pdf_dict_get(ctx, patterndict, PDF_NAME(Shading));
575 }
576
577 for (k = 0; k < glo->patterns; k++)
578 if (!pdf_objcmp(ctx, glo->pattern[k].u.pattern.obj, patterndict))
579 break;
580
581 if (k < glo->patterns)
582 continue;
583
584 glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info);
585 glo->patterns++;
586
587 glo->pattern[glo->patterns - 1].page = page;
588 glo->pattern[glo->patterns - 1].pageref = pageref;
589 glo->pattern[glo->patterns - 1].u.pattern.obj = patterndict;
590 glo->pattern[glo->patterns - 1].u.pattern.type = type;
591 glo->pattern[glo->patterns - 1].u.pattern.paint = paint;
592 glo->pattern[glo->patterns - 1].u.pattern.tiling = tiling;
593 glo->pattern[glo->patterns - 1].u.pattern.shading = shading;
594 }
595 }
596
597 static void
598 gatherresourceinfo(fz_context *ctx, pdf_mark_list *mark_list, globals *glo, int page, pdf_obj *obj, int show)
599 {
600 pdf_obj *rsrc;
601 pdf_obj *pageref;
602 pdf_obj *font;
603 pdf_obj *xobj;
604 pdf_obj *shade;
605 pdf_obj *pattern;
606 int i;
607
608 /* stop on cyclic resource dependencies */
609 if (pdf_mark_list_push(ctx, mark_list, obj))
610 return;
611
612 rsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
613
614 pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
615 if (!pageref)
616 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
617
618 font = pdf_dict_get(ctx, rsrc, PDF_NAME(Font));
619 if (show & FONTS && font && !pdf_mark_list_push(ctx, mark_list, font))
620 {
621 int n;
622
623 gatherfonts(ctx, glo, page, pageref, font);
624 n = pdf_dict_len(ctx, font);
625 for (i = 0; i < n; i++)
626 {
627 gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, font, i), show);
628 }
629 }
630
631 xobj = pdf_dict_get(ctx, rsrc, PDF_NAME(XObject));
632 if (show & (IMAGES|XOBJS) && xobj && !pdf_mark_list_push(ctx, mark_list, xobj))
633 {
634 int n;
635
636 if (show & IMAGES)
637 gatherimages(ctx, glo, page, pageref, xobj);
638 if (show & XOBJS)
639 {
640 gatherforms(ctx, glo, page, pageref, xobj);
641 gatherpsobjs(ctx, glo, page, pageref, xobj);
642 }
643 n = pdf_dict_len(ctx, xobj);
644 for (i = 0; i < n; i++)
645 {
646 gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, xobj, i), show);
647 }
648 }
649
650 shade = pdf_dict_get(ctx, rsrc, PDF_NAME(Shading));
651 if (show & SHADINGS && shade && !pdf_mark_list_push(ctx, mark_list, shade))
652 gathershadings(ctx, glo, page, pageref, shade);
653
654 pattern = pdf_dict_get(ctx, rsrc, PDF_NAME(Pattern));
655 if (show & PATTERNS && pattern && !pdf_mark_list_push(ctx, mark_list, pattern))
656 {
657 int n;
658 gatherpatterns(ctx, glo, page, pageref, pattern);
659 n = pdf_dict_len(ctx, pattern);
660 for (i = 0; i < n; i++)
661 {
662 gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, pattern, i), show);
663 }
664 }
665 }
666
667 static void
668 gatherpageinfo(fz_context *ctx, globals *glo, int page, int show)
669 {
670 pdf_mark_list mark_list;
671 pdf_obj *pageref;
672
673 pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
674
675 if (!pageref)
676 fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
677
678 gatherdimensions(ctx, glo, page, pageref);
679
680 pdf_mark_list_init(ctx, &mark_list);
681 fz_try(ctx)
682 gatherresourceinfo(ctx, &mark_list, glo, page, pageref, show);
683 fz_always(ctx)
684 pdf_mark_list_free(ctx, &mark_list);
685 fz_catch(ctx)
686 fz_rethrow(ctx);
687 }
688
689 static void
690 printinfo(fz_context *ctx, globals *glo, char *filename, int show, int page)
691 {
692 int i;
693 int j;
694 fz_output *out = glo->out;
695
696 #define PAGE_FMT_zu "\t%d\t(%d 0 R):\t"
697
698 if (show & DIMENSIONS && glo->dims > 0)
699 {
700 fz_write_printf(ctx, out, "Mediaboxes (%d):\n", glo->dims);
701 for (i = 0; i < glo->dims; i++)
702 {
703 fz_write_printf(ctx, out, PAGE_FMT_zu "[ %g %g %g %g ]\n",
704 glo->dim[i].page,
705 pdf_to_num(ctx, glo->dim[i].pageref),
706 glo->dim[i].u.dim.bbox->x0,
707 glo->dim[i].u.dim.bbox->y0,
708 glo->dim[i].u.dim.bbox->x1,
709 glo->dim[i].u.dim.bbox->y1);
710 }
711 fz_write_printf(ctx, out, "\n");
712 }
713
714 if (show & FONTS && glo->fonts > 0)
715 {
716 fz_write_printf(ctx, out, "Fonts (%d):\n", glo->fonts);
717 for (i = 0; i < glo->fonts; i++)
718 {
719 fz_write_printf(ctx, out, PAGE_FMT_zu "%s '%s' %s%s(%d 0 R)\n",
720 glo->font[i].page,
721 pdf_to_num(ctx, glo->font[i].pageref),
722 pdf_to_name(ctx, glo->font[i].u.font.subtype),
723 pdf_to_name(ctx, glo->font[i].u.font.name),
724 glo->font[i].u.font.encoding ? pdf_to_name(ctx, glo->font[i].u.font.encoding) : "",
725 glo->font[i].u.font.encoding ? " " : "",
726 pdf_to_num(ctx, glo->font[i].u.font.obj));
727 }
728 fz_write_printf(ctx, out, "\n");
729 }
730
731 if (show & IMAGES && glo->images > 0)
732 {
733 fz_write_printf(ctx, out, "Images (%d):\n", glo->images);
734 for (i = 0; i < glo->images; i++)
735 {
736 char *cs = NULL;
737 char *altcs = NULL;
738
739 fz_write_printf(ctx, out, PAGE_FMT_zu "[ ",
740 glo->image[i].page,
741 pdf_to_num(ctx, glo->image[i].pageref));
742
743 if (pdf_is_array(ctx, glo->image[i].u.image.filter))
744 {
745 int n = pdf_array_len(ctx, glo->image[i].u.image.filter);
746 for (j = 0; j < n; j++)
747 {
748 pdf_obj *obj = pdf_array_get(ctx, glo->image[i].u.image.filter, j);
749 char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
750
751 if (strstr(filter, "Decode"))
752 *(strstr(filter, "Decode")) = '\0';
753
754 fz_write_printf(ctx, out, "%s%s",
755 filter,
756 j == pdf_array_len(ctx, glo->image[i].u.image.filter) - 1 ? "" : " ");
757 fz_free(ctx, filter);
758 }
759 }
760 else if (glo->image[i].u.image.filter)
761 {
762 pdf_obj *obj = glo->image[i].u.image.filter;
763 char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
764
765 if (strstr(filter, "Decode"))
766 *(strstr(filter, "Decode")) = '\0';
767
768 fz_write_printf(ctx, out, "%s", filter);
769 fz_free(ctx, filter);
770 }
771 else
772 fz_write_printf(ctx, out, "Raw");
773
774 if (glo->image[i].u.image.cs)
775 {
776 cs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.cs));
777
778 if (!strncmp(cs, "Device", 6))
779 {
780 size_t len = strlen(cs + 6);
781 memmove(cs + 3, cs + 6, len + 1);
782 cs[3 + len + 1] = '\0';
783 }
784 if (strstr(cs, "ICC"))
785 fz_strlcpy(cs, "ICC", 4);
786 if (strstr(cs, "Indexed"))
787 fz_strlcpy(cs, "Idx", 4);
788 if (strstr(cs, "Pattern"))
789 fz_strlcpy(cs, "Pat", 4);
790 if (strstr(cs, "Separation"))
791 fz_strlcpy(cs, "Sep", 4);
792 }
793 if (glo->image[i].u.image.altcs)
794 {
795 altcs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.altcs));
796
797 if (!strncmp(altcs, "Device", 6))
798 {
799 size_t len = strlen(altcs + 6);
800 memmove(altcs + 3, altcs + 6, len + 1);
801 altcs[3 + len + 1] = '\0';
802 }
803 if (strstr(altcs, "ICC"))
804 fz_strlcpy(altcs, "ICC", 4);
805 if (strstr(altcs, "Indexed"))
806 fz_strlcpy(altcs, "Idx", 4);
807 if (strstr(altcs, "Pattern"))
808 fz_strlcpy(altcs, "Pat", 4);
809 if (strstr(altcs, "Separation"))
810 fz_strlcpy(altcs, "Sep", 4);
811 }
812
813 fz_write_printf(ctx, out, " ] %dx%d %dbpc %s%s%s (%d 0 R)\n",
814 pdf_to_int(ctx, glo->image[i].u.image.width),
815 pdf_to_int(ctx, glo->image[i].u.image.height),
816 glo->image[i].u.image.bpc ? pdf_to_int(ctx, glo->image[i].u.image.bpc) : 1,
817 glo->image[i].u.image.cs ? cs : "ImageMask",
818 glo->image[i].u.image.altcs ? " " : "",
819 glo->image[i].u.image.altcs ? altcs : "",
820 pdf_to_num(ctx, glo->image[i].u.image.obj));
821
822 fz_free(ctx, cs);
823 fz_free(ctx, altcs);
824 }
825 fz_write_printf(ctx, out, "\n");
826 }
827
828 if (show & SHADINGS && glo->shadings > 0)
829 {
830 fz_write_printf(ctx, out, "Shading patterns (%d):\n", glo->shadings);
831 for (i = 0; i < glo->shadings; i++)
832 {
833 char *shadingtype[] =
834 {
835 "",
836 "Function",
837 "Axial",
838 "Radial",
839 "Triangle mesh",
840 "Lattice",
841 "Coons patch",
842 "Tensor patch",
843 };
844
845 fz_write_printf(ctx, out, PAGE_FMT_zu "%s (%d 0 R)\n",
846 glo->shading[i].page,
847 pdf_to_num(ctx, glo->shading[i].pageref),
848 shadingtype[pdf_to_int(ctx, glo->shading[i].u.shading.type)],
849 pdf_to_num(ctx, glo->shading[i].u.shading.obj));
850 }
851 fz_write_printf(ctx, out, "\n");
852 }
853
854 if (show & PATTERNS && glo->patterns > 0)
855 {
856 fz_write_printf(ctx, out, "Patterns (%d):\n", glo->patterns);
857 for (i = 0; i < glo->patterns; i++)
858 {
859 if (pdf_to_int(ctx, glo->pattern[i].u.pattern.type) == 1)
860 {
861 char *painttype[] =
862 {
863 "",
864 "Colored",
865 "Uncolored",
866 };
867 char *tilingtype[] =
868 {
869 "",
870 "Constant",
871 "No distortion",
872 "Constant/fast tiling",
873 };
874
875 fz_write_printf(ctx, out, PAGE_FMT_zu "Tiling %s %s (%d 0 R)\n",
876 glo->pattern[i].page,
877 pdf_to_num(ctx, glo->pattern[i].pageref),
878 painttype[pdf_to_int(ctx, glo->pattern[i].u.pattern.paint)],
879 tilingtype[pdf_to_int(ctx, glo->pattern[i].u.pattern.tiling)],
880 pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
881 }
882 else
883 {
884 fz_write_printf(ctx, out, PAGE_FMT_zu "Shading %d 0 R (%d 0 R)\n",
885 glo->pattern[i].page,
886 pdf_to_num(ctx, glo->pattern[i].pageref),
887 pdf_to_num(ctx, glo->pattern[i].u.pattern.shading),
888 pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
889 }
890 }
891 fz_write_printf(ctx, out, "\n");
892 }
893
894 if (show & XOBJS && glo->forms > 0)
895 {
896 fz_write_printf(ctx, out, "Form xobjects (%d):\n", glo->forms);
897 for (i = 0; i < glo->forms; i++)
898 {
899 fz_write_printf(ctx, out, PAGE_FMT_zu "Form%s%s%s%s (%d 0 R)\n",
900 glo->form[i].page,
901 pdf_to_num(ctx, glo->form[i].pageref),
902 glo->form[i].u.form.groupsubtype ? " " : "",
903 glo->form[i].u.form.groupsubtype ? pdf_to_name(ctx, glo->form[i].u.form.groupsubtype) : "",
904 glo->form[i].u.form.groupsubtype ? " Group" : "",
905 glo->form[i].u.form.reference ? " Reference" : "",
906 pdf_to_num(ctx, glo->form[i].u.form.obj));
907 }
908 fz_write_printf(ctx, out, "\n");
909 }
910
911 if (show & XOBJS && glo->psobjs > 0)
912 {
913 fz_write_printf(ctx, out, "Postscript xobjects (%d):\n", glo->psobjs);
914 for (i = 0; i < glo->psobjs; i++)
915 {
916 fz_write_printf(ctx, out, PAGE_FMT_zu "(%d 0 R)\n",
917 glo->psobj[i].page,
918 pdf_to_num(ctx, glo->psobj[i].pageref),
919 pdf_to_num(ctx, glo->psobj[i].u.form.obj));
920 }
921 fz_write_printf(ctx, out, "\n");
922 }
923 }
924
925 static void
926 showinfo(fz_context *ctx, globals *glo, char *filename, int show, const char *pagelist)
927 {
928 int page, spage, epage;
929 int allpages;
930 int pagecount;
931 fz_output *out = glo->out;
932
933 if (!glo->doc)
934 {
935 infousage();
936 fz_throw(ctx, FZ_ERROR_GENERIC, "Cannot show info without document");
937 }
938
939 allpages = !strcmp(pagelist, "1-N");
940
941 pagecount = pdf_count_pages(ctx, glo->doc);
942
943 while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount)))
944 {
945 if (allpages)
946 fz_write_printf(ctx, out, "Retrieving info from pages %d-%d...\n", spage, epage);
947 for (page = spage; page <= epage; page++)
948 {
949 gatherpageinfo(ctx, glo, page, show);
950 if (!allpages)
951 {
952 fz_write_printf(ctx, out, "Page %d:\n", page);
953 printinfo(ctx, glo, filename, show, page);
954 fz_write_printf(ctx, out, "\n");
955 clearinfo(ctx, glo);
956 }
957 }
958 }
959
960 if (allpages)
961 printinfo(ctx, glo, filename, show, -1);
962 }
963
964 static void
965 showzugferd(fz_context *ctx, globals *glo)
966 {
967 float version;
968 fz_output *out = glo->out;
969 enum pdf_zugferd_profile profile = pdf_zugferd_profile(ctx, glo->doc, &version);
970 fz_buffer *buf;
971
972 if (profile == PDF_NOT_ZUGFERD)
973 {
974 fz_write_printf(ctx, out, "Not a ZUGFeRD file.\n");
975 return;
976 }
977
978 fz_write_printf(ctx, out, "ZUGFeRD version %g\n", version);
979 fz_write_printf(ctx, out, "%s profile\n", pdf_zugferd_profile_to_string(ctx, profile));
980
981 fz_write_printf(ctx, out, "Embedded XML:\n");
982 buf = pdf_zugferd_xml(ctx, glo->doc);
983 fz_write_buffer(ctx, out, buf);
984 fz_drop_buffer(ctx, buf);
985 fz_write_printf(ctx, out, "\n\n");
986 }
987
988 static void
989 pdfinfo_info(fz_context *ctx, fz_output *out, char *filename, char *password, int show, char *argv[], int argc)
990 {
991 enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state;
992 int argidx = 0;
993 globals glo = { 0 };
994
995 glo.out = out;
996 glo.ctx = ctx;
997
998 state = NO_FILE_OPENED;
999
1000 fz_try(ctx)
1001 {
1002 while (argidx < argc)
1003 {
1004 if (state == NO_FILE_OPENED || !fz_is_page_range(ctx, argv[argidx]))
1005 {
1006 if (state == NO_INFO_GATHERED)
1007 {
1008 showinfo(ctx, &glo, filename, show, "1-N");
1009 }
1010
1011 closexref(ctx, &glo);
1012
1013 filename = argv[argidx];
1014 fz_write_printf(ctx, out, "%s:\n", filename);
1015 glo.doc = pdf_open_document(glo.ctx, filename);
1016 if (pdf_needs_password(ctx, glo.doc))
1017 if (!pdf_authenticate_password(ctx, glo.doc, password))
1018 fz_throw(glo.ctx, FZ_ERROR_ARGUMENT, "cannot authenticate password: %s", filename);
1019 glo.pagecount = pdf_count_pages(ctx, glo.doc);
1020
1021 showglobalinfo(ctx, &glo);
1022 state = NO_INFO_GATHERED;
1023
1024 if (show & ZUGFERD)
1025 showzugferd(ctx, &glo);
1026 }
1027 else
1028 {
1029 showinfo(ctx, &glo, filename, show, argv[argidx]);
1030 state = INFO_SHOWN;
1031 }
1032
1033 argidx++;
1034 }
1035
1036 if (state == NO_INFO_GATHERED)
1037 showinfo(ctx, &glo, filename, show, "1-N");
1038 }
1039 fz_always(ctx)
1040 closexref(ctx, &glo);
1041 fz_catch(ctx)
1042 fz_rethrow(ctx);
1043 }
1044
1045 int pdfinfo_main(int argc, char **argv)
1046 {
1047 char *filename = "";
1048 char *password = "";
1049 int show = ALL;
1050 int c;
1051 int ret;
1052 fz_context *ctx;
1053
1054 while ((c = fz_getopt(argc, argv, "FISPXMZp:")) != -1)
1055 {
1056 switch (c)
1057 {
1058 case 'F': if (show == ALL) show = FONTS; else show |= FONTS; break;
1059 case 'I': if (show == ALL) show = IMAGES; else show |= IMAGES; break;
1060 case 'S': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break;
1061 case 'P': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break;
1062 case 'X': if (show == ALL) show = XOBJS; else show |= XOBJS; break;
1063 case 'M': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break;
1064 case 'Z': if (show == ALL) show = ZUGFERD; else show |= ZUGFERD; break;
1065 case 'p': password = fz_optarg; break;
1066 default:
1067 infousage();
1068 return 1;
1069 }
1070 }
1071
1072 if (fz_optind == argc)
1073 {
1074 infousage();
1075 return 1;
1076 }
1077
1078 ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
1079 if (!ctx)
1080 {
1081 fprintf(stderr, "cannot initialise context\n");
1082 exit(1);
1083 }
1084
1085 ret = 0;
1086 fz_try(ctx)
1087 pdfinfo_info(ctx, fz_stdout(ctx), filename, password, show, &argv[fz_optind], argc-fz_optind);
1088 fz_catch(ctx)
1089 {
1090 fz_report_error(ctx);
1091 ret = 1;
1092 }
1093 fz_drop_context(ctx);
1094 return ret;
1095 }