diff mupdf-source/source/tools/pdfinfo.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/tools/pdfinfo.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,1095 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+/*
+ * Information tool.
+ * Print information about the input pdf.
+ */
+
+#include "mupdf/fitz.h"
+#include "mupdf/pdf.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+enum
+{
+	DIMENSIONS = 0x01,
+	FONTS = 0x02,
+	IMAGES = 0x04,
+	SHADINGS = 0x08,
+	PATTERNS = 0x10,
+	XOBJS = 0x20,
+	ZUGFERD = 0x40,
+	ALL = DIMENSIONS | FONTS | IMAGES | SHADINGS | PATTERNS | XOBJS | ZUGFERD
+};
+
+struct info
+{
+	int page;
+	pdf_obj *pageref;
+	union {
+		struct {
+			pdf_obj *obj;
+		} info;
+		struct {
+			pdf_obj *obj;
+		} crypt;
+		struct {
+			pdf_obj *obj;
+			fz_rect *bbox;
+		} dim;
+		struct {
+			pdf_obj *obj;
+			pdf_obj *subtype;
+			pdf_obj *name;
+			pdf_obj *encoding;
+		} font;
+		struct {
+			pdf_obj *obj;
+			pdf_obj *width;
+			pdf_obj *height;
+			pdf_obj *bpc;
+			pdf_obj *filter;
+			pdf_obj *cs;
+			pdf_obj *altcs;
+		} image;
+		struct {
+			pdf_obj *obj;
+			pdf_obj *type;
+		} shading;
+		struct {
+			pdf_obj *obj;
+			pdf_obj *type;
+			pdf_obj *paint;
+			pdf_obj *tiling;
+			pdf_obj *shading;
+		} pattern;
+		struct {
+			pdf_obj *obj;
+			pdf_obj *groupsubtype;
+			pdf_obj *reference;
+		} form;
+	} u;
+};
+
+typedef struct
+{
+	pdf_document *doc;
+	fz_context *ctx;
+	fz_output *out;
+	int pagecount;
+	struct info *dim;
+	int dims;
+	struct info *font;
+	int fonts;
+	struct info *image;
+	int images;
+	struct info *shading;
+	int shadings;
+	struct info *pattern;
+	int patterns;
+	struct info *form;
+	int forms;
+	struct info *psobj;
+	int psobjs;
+} globals;
+
+static void clearinfo(fz_context *ctx, globals *glo)
+{
+	int i;
+
+	if (glo->dim)
+	{
+		for (i = 0; i < glo->dims; i++)
+			fz_free(ctx, glo->dim[i].u.dim.bbox);
+		fz_free(ctx, glo->dim);
+		glo->dim = NULL;
+		glo->dims = 0;
+	}
+
+	if (glo->font)
+	{
+		fz_free(ctx, glo->font);
+		glo->font = NULL;
+		glo->fonts = 0;
+	}
+
+	if (glo->image)
+	{
+		fz_free(ctx, glo->image);
+		glo->image = NULL;
+		glo->images = 0;
+	}
+
+	if (glo->shading)
+	{
+		fz_free(ctx, glo->shading);
+		glo->shading = NULL;
+		glo->shadings = 0;
+	}
+
+	if (glo->pattern)
+	{
+		fz_free(ctx, glo->pattern);
+		glo->pattern = NULL;
+		glo->patterns = 0;
+	}
+
+	if (glo->form)
+	{
+		fz_free(ctx, glo->form);
+		glo->form = NULL;
+		glo->forms = 0;
+	}
+
+	if (glo->psobj)
+	{
+		fz_free(ctx, glo->psobj);
+		glo->psobj = NULL;
+		glo->psobjs = 0;
+	}
+}
+
+static void closexref(fz_context *ctx, globals *glo)
+{
+	if (glo->doc)
+	{
+		pdf_drop_document(ctx, glo->doc);
+		glo->doc = NULL;
+	}
+
+	clearinfo(ctx, glo);
+}
+
+static void
+infousage(void)
+{
+	fprintf(stderr,
+		"usage: mutool info [options] file.pdf [pages]\n"
+		"\t-p -\tpassword for decryption\n"
+		"\t-F\tlist fonts\n"
+		"\t-I\tlist images\n"
+		"\t-M\tlist dimensions\n"
+		"\t-P\tlist patterns\n"
+		"\t-S\tlist shadings\n"
+		"\t-X\tlist form and postscript xobjects\n"
+		"\t-Z\tlist ZUGFeRD info\n"
+		"\tpages\tcomma separated list of page numbers and ranges\n"
+		);
+}
+
+static void
+showglobalinfo(fz_context *ctx, globals *glo)
+{
+	pdf_obj *obj;
+	fz_output *out = glo->out;
+	pdf_document *doc = glo->doc;
+	int version = pdf_version(ctx, doc);
+
+	fz_write_printf(ctx, out, "\nPDF-%d.%d\n", version / 10, version % 10);
+
+	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Info));
+	if (obj)
+	{
+		fz_write_printf(ctx, out, "Info object (%d 0 R):\n", pdf_to_num(ctx, obj));
+		pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
+	}
+
+	obj = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Encrypt));
+	if (obj)
+	{
+		fz_write_printf(ctx, out, "\nEncryption object (%d 0 R):\n", pdf_to_num(ctx, obj));
+		pdf_print_obj(ctx, out, pdf_resolve_indirect(ctx, obj), 1, 1);
+	}
+
+	fz_write_printf(ctx, out, "\nPages: %d\n\n", glo->pagecount);
+}
+
+static void
+gatherdimensions(fz_context *ctx, globals *glo, int page, pdf_obj *pageref)
+{
+	fz_rect bbox;
+	pdf_obj *obj;
+	float unit;
+	int j;
+
+	obj = pdf_dict_get(ctx, pageref, PDF_NAME(MediaBox));
+	if (!pdf_is_array(ctx, obj))
+		return;
+
+	bbox = pdf_to_rect(ctx, obj);
+
+	unit = pdf_dict_get_real_default(ctx, pageref, PDF_NAME(UserUnit), 1);
+	bbox.x0 *= unit;
+	bbox.y0 *= unit;
+	bbox.x1 *= unit;
+	bbox.y1 *= unit;
+
+	for (j = 0; j < glo->dims; j++)
+		if (!memcmp(glo->dim[j].u.dim.bbox, &bbox, sizeof (fz_rect)))
+			break;
+
+	if (j < glo->dims)
+		return;
+
+	glo->dim = fz_realloc_array(ctx, glo->dim, glo->dims+1, struct info);
+	glo->dims++;
+
+	glo->dim[glo->dims - 1].page = page;
+	glo->dim[glo->dims - 1].pageref = pageref;
+	glo->dim[glo->dims - 1].u.dim.bbox = NULL;
+	glo->dim[glo->dims - 1].u.dim.bbox = fz_malloc(ctx, sizeof(fz_rect));
+	memcpy(glo->dim[glo->dims - 1].u.dim.bbox, &bbox, sizeof (fz_rect));
+
+	return;
+}
+
+static void
+gatherfonts(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
+{
+	int i, n;
+
+	n = pdf_dict_len(ctx, dict);
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *fontdict = NULL;
+		pdf_obj *subtype = NULL;
+		pdf_obj *basefont = NULL;
+		pdf_obj *name = NULL;
+		pdf_obj *encoding = NULL;
+		int k;
+
+		fontdict = pdf_dict_get_val(ctx, dict, i);
+		if (!pdf_is_dict(ctx, fontdict))
+		{
+			fz_warn(ctx, "not a font dict (%d 0 R)", pdf_to_num(ctx, fontdict));
+			continue;
+		}
+
+		subtype = pdf_dict_get(ctx, fontdict, PDF_NAME(Subtype));
+		basefont = pdf_dict_get(ctx, fontdict, PDF_NAME(BaseFont));
+		if (!basefont || pdf_is_null(ctx, basefont))
+			name = pdf_dict_get(ctx, fontdict, PDF_NAME(Name));
+		encoding = pdf_dict_get(ctx, fontdict, PDF_NAME(Encoding));
+		if (pdf_is_dict(ctx, encoding))
+			encoding = pdf_dict_get(ctx, encoding, PDF_NAME(BaseEncoding));
+
+		for (k = 0; k < glo->fonts; k++)
+			if (!pdf_objcmp(ctx, glo->font[k].u.font.obj, fontdict))
+				break;
+
+		if (k < glo->fonts)
+			continue;
+
+		glo->font = fz_realloc_array(ctx, glo->font, glo->fonts+1, struct info);
+		glo->fonts++;
+
+		glo->font[glo->fonts - 1].page = page;
+		glo->font[glo->fonts - 1].pageref = pageref;
+		glo->font[glo->fonts - 1].u.font.obj = fontdict;
+		glo->font[glo->fonts - 1].u.font.subtype = subtype;
+		glo->font[glo->fonts - 1].u.font.name = basefont ? basefont : name;
+		glo->font[glo->fonts - 1].u.font.encoding = encoding;
+	}
+}
+
+static void
+gatherimages(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
+{
+	int i, n;
+
+	n = pdf_dict_len(ctx, dict);
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *imagedict;
+		pdf_obj *type;
+		pdf_obj *width;
+		pdf_obj *height;
+		pdf_obj *bpc = NULL;
+		pdf_obj *filter = NULL;
+		pdf_obj *cs = NULL;
+		pdf_obj *altcs;
+		int k;
+
+		imagedict = pdf_dict_get_val(ctx, dict, i);
+		if (!pdf_is_dict(ctx, imagedict))
+		{
+			fz_warn(ctx, "not an image dict (%d 0 R)", pdf_to_num(ctx, imagedict));
+			continue;
+		}
+
+		type = pdf_dict_get(ctx, imagedict, PDF_NAME(Subtype));
+		if (!pdf_name_eq(ctx, type, PDF_NAME(Image)))
+			continue;
+
+		filter = pdf_dict_get(ctx, imagedict, PDF_NAME(Filter));
+
+		altcs = NULL;
+		cs = pdf_dict_get(ctx, imagedict, PDF_NAME(ColorSpace));
+		if (pdf_is_array(ctx, cs))
+		{
+			pdf_obj *cses = cs;
+
+			cs = pdf_array_get(ctx, cses, 0);
+			if (pdf_name_eq(ctx, cs, PDF_NAME(DeviceN)) || pdf_name_eq(ctx, cs, PDF_NAME(Separation)))
+			{
+				altcs = pdf_array_get(ctx, cses, 2);
+				if (pdf_is_array(ctx, altcs))
+					altcs = pdf_array_get(ctx, altcs, 0);
+			}
+		}
+
+		width = pdf_dict_get(ctx, imagedict, PDF_NAME(Width));
+		height = pdf_dict_get(ctx, imagedict, PDF_NAME(Height));
+		bpc = pdf_dict_get(ctx, imagedict, PDF_NAME(BitsPerComponent));
+
+		for (k = 0; k < glo->images; k++)
+			if (!pdf_objcmp(ctx, glo->image[k].u.image.obj, imagedict))
+				break;
+
+		if (k < glo->images)
+			continue;
+
+		glo->image = fz_realloc_array(ctx, glo->image, glo->images+1, struct info);
+		glo->images++;
+
+		glo->image[glo->images - 1].page = page;
+		glo->image[glo->images - 1].pageref = pageref;
+		glo->image[glo->images - 1].u.image.obj = imagedict;
+		glo->image[glo->images - 1].u.image.width = width;
+		glo->image[glo->images - 1].u.image.height = height;
+		glo->image[glo->images - 1].u.image.bpc = bpc;
+		glo->image[glo->images - 1].u.image.filter = filter;
+		glo->image[glo->images - 1].u.image.cs = cs;
+		glo->image[glo->images - 1].u.image.altcs = altcs;
+	}
+}
+
+static void
+gatherforms(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
+{
+	int i, n;
+
+	n = pdf_dict_len(ctx, dict);
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *xobjdict;
+		pdf_obj *type;
+		pdf_obj *subtype;
+		pdf_obj *group;
+		pdf_obj *groupsubtype;
+		pdf_obj *reference;
+		int k;
+
+		xobjdict = pdf_dict_get_val(ctx, dict, i);
+		if (!pdf_is_dict(ctx, xobjdict))
+		{
+			fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
+			continue;
+		}
+
+		type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
+		if (!pdf_name_eq(ctx, type, PDF_NAME(Form)))
+			continue;
+
+		subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
+		if (!pdf_name_eq(ctx, subtype, PDF_NAME(PS)))
+			continue;
+
+		group = pdf_dict_get(ctx, xobjdict, PDF_NAME(Group));
+		groupsubtype = pdf_dict_get(ctx, group, PDF_NAME(S));
+		reference = pdf_dict_get(ctx, xobjdict, PDF_NAME(Ref));
+
+		for (k = 0; k < glo->forms; k++)
+			if (!pdf_objcmp(ctx, glo->form[k].u.form.obj, xobjdict))
+				break;
+
+		if (k < glo->forms)
+			continue;
+
+		glo->form = fz_realloc_array(ctx, glo->form, glo->forms+1, struct info);
+		glo->forms++;
+
+		glo->form[glo->forms - 1].page = page;
+		glo->form[glo->forms - 1].pageref = pageref;
+		glo->form[glo->forms - 1].u.form.obj = xobjdict;
+		glo->form[glo->forms - 1].u.form.groupsubtype = groupsubtype;
+		glo->form[glo->forms - 1].u.form.reference = reference;
+	}
+}
+
+static void
+gatherpsobjs(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
+{
+	int i, n;
+
+	n = pdf_dict_len(ctx, dict);
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *xobjdict;
+		pdf_obj *type;
+		pdf_obj *subtype;
+		int k;
+
+		xobjdict = pdf_dict_get_val(ctx, dict, i);
+		if (!pdf_is_dict(ctx, xobjdict))
+		{
+			fz_warn(ctx, "not a xobject dict (%d 0 R)", pdf_to_num(ctx, xobjdict));
+			continue;
+		}
+
+		type = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype));
+		subtype = pdf_dict_get(ctx, xobjdict, PDF_NAME(Subtype2));
+		if (!pdf_name_eq(ctx, type, PDF_NAME(PS)) &&
+			(!pdf_name_eq(ctx, type, PDF_NAME(Form)) || !pdf_name_eq(ctx, subtype, PDF_NAME(PS))))
+			continue;
+
+		for (k = 0; k < glo->psobjs; k++)
+			if (!pdf_objcmp(ctx, glo->psobj[k].u.form.obj, xobjdict))
+				break;
+
+		if (k < glo->psobjs)
+			continue;
+
+		glo->psobj = fz_realloc_array(ctx, glo->psobj, glo->psobjs+1, struct info);
+		glo->psobjs++;
+
+		glo->psobj[glo->psobjs - 1].page = page;
+		glo->psobj[glo->psobjs - 1].pageref = pageref;
+		glo->psobj[glo->psobjs - 1].u.form.obj = xobjdict;
+	}
+}
+
+static void
+gathershadings(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
+{
+	int i, n;
+
+	n = pdf_dict_len(ctx, dict);
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *shade;
+		pdf_obj *type;
+		int k;
+
+		shade = pdf_dict_get_val(ctx, dict, i);
+		if (!pdf_is_dict(ctx, shade))
+		{
+			fz_warn(ctx, "not a shading dict (%d 0 R)", pdf_to_num(ctx, shade));
+			continue;
+		}
+
+		type = pdf_dict_get(ctx, shade, PDF_NAME(ShadingType));
+		if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 7)
+		{
+			fz_warn(ctx, "not a shading type (%d 0 R)", pdf_to_num(ctx, shade));
+			type = NULL;
+		}
+
+		for (k = 0; k < glo->shadings; k++)
+			if (!pdf_objcmp(ctx, glo->shading[k].u.shading.obj, shade))
+				break;
+
+		if (k < glo->shadings)
+			continue;
+
+		glo->shading = fz_realloc_array(ctx, glo->shading, glo->shadings+1, struct info);
+		glo->shadings++;
+
+		glo->shading[glo->shadings - 1].page = page;
+		glo->shading[glo->shadings - 1].pageref = pageref;
+		glo->shading[glo->shadings - 1].u.shading.obj = shade;
+		glo->shading[glo->shadings - 1].u.shading.type = type;
+	}
+}
+
+static void
+gatherpatterns(fz_context *ctx, globals *glo, int page, pdf_obj *pageref, pdf_obj *dict)
+{
+	int i, n;
+
+	n = pdf_dict_len(ctx, dict);
+	for (i = 0; i < n; i++)
+	{
+		pdf_obj *patterndict;
+		pdf_obj *type;
+		pdf_obj *paint = NULL;
+		pdf_obj *tiling = NULL;
+		pdf_obj *shading = NULL;
+		int k;
+
+		patterndict = pdf_dict_get_val(ctx, dict, i);
+		if (!pdf_is_dict(ctx, patterndict))
+		{
+			fz_warn(ctx, "not a pattern dict (%d 0 R)", pdf_to_num(ctx, patterndict));
+			continue;
+		}
+
+		type = pdf_dict_get(ctx, patterndict, PDF_NAME(PatternType));
+		if (!pdf_is_int(ctx, type) || pdf_to_int(ctx, type) < 1 || pdf_to_int(ctx, type) > 2)
+		{
+			fz_warn(ctx, "not a pattern type (%d 0 R)", pdf_to_num(ctx, patterndict));
+			type = NULL;
+		}
+
+		if (pdf_to_int(ctx, type) == 1)
+		{
+			paint = pdf_dict_get(ctx, patterndict, PDF_NAME(PaintType));
+			if (!pdf_is_int(ctx, paint) || pdf_to_int(ctx, paint) < 1 || pdf_to_int(ctx, paint) > 2)
+			{
+				fz_warn(ctx, "not a pattern paint type (%d 0 R)", pdf_to_num(ctx, patterndict));
+				paint = NULL;
+			}
+
+			tiling = pdf_dict_get(ctx, patterndict, PDF_NAME(TilingType));
+			if (!pdf_is_int(ctx, tiling) || pdf_to_int(ctx, tiling) < 1 || pdf_to_int(ctx, tiling) > 3)
+			{
+				fz_warn(ctx, "not a pattern tiling type (%d 0 R)", pdf_to_num(ctx, patterndict));
+				tiling = NULL;
+			}
+		}
+		else
+		{
+			shading = pdf_dict_get(ctx, patterndict, PDF_NAME(Shading));
+		}
+
+		for (k = 0; k < glo->patterns; k++)
+			if (!pdf_objcmp(ctx, glo->pattern[k].u.pattern.obj, patterndict))
+				break;
+
+		if (k < glo->patterns)
+			continue;
+
+		glo->pattern = fz_realloc_array(ctx, glo->pattern, glo->patterns+1, struct info);
+		glo->patterns++;
+
+		glo->pattern[glo->patterns - 1].page = page;
+		glo->pattern[glo->patterns - 1].pageref = pageref;
+		glo->pattern[glo->patterns - 1].u.pattern.obj = patterndict;
+		glo->pattern[glo->patterns - 1].u.pattern.type = type;
+		glo->pattern[glo->patterns - 1].u.pattern.paint = paint;
+		glo->pattern[glo->patterns - 1].u.pattern.tiling = tiling;
+		glo->pattern[glo->patterns - 1].u.pattern.shading = shading;
+	}
+}
+
+static void
+gatherresourceinfo(fz_context *ctx, pdf_mark_list *mark_list, globals *glo, int page, pdf_obj *obj, int show)
+{
+	pdf_obj *rsrc;
+	pdf_obj *pageref;
+	pdf_obj *font;
+	pdf_obj *xobj;
+	pdf_obj *shade;
+	pdf_obj *pattern;
+	int i;
+
+	/* stop on cyclic resource dependencies */
+	if (pdf_mark_list_push(ctx, mark_list, obj))
+		return;
+
+	rsrc = pdf_dict_get(ctx, obj, PDF_NAME(Resources));
+
+	pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
+	if (!pageref)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
+
+	font = pdf_dict_get(ctx, rsrc, PDF_NAME(Font));
+	if (show & FONTS && font && !pdf_mark_list_push(ctx, mark_list, font))
+	{
+		int n;
+
+		gatherfonts(ctx, glo, page, pageref, font);
+		n = pdf_dict_len(ctx, font);
+		for (i = 0; i < n; i++)
+		{
+			gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, font, i), show);
+		}
+	}
+
+	xobj = pdf_dict_get(ctx, rsrc, PDF_NAME(XObject));
+	if (show & (IMAGES|XOBJS) && xobj && !pdf_mark_list_push(ctx, mark_list, xobj))
+	{
+		int n;
+
+		if (show & IMAGES)
+			gatherimages(ctx, glo, page, pageref, xobj);
+		if (show & XOBJS)
+		{
+			gatherforms(ctx, glo, page, pageref, xobj);
+			gatherpsobjs(ctx, glo, page, pageref, xobj);
+		}
+		n = pdf_dict_len(ctx, xobj);
+		for (i = 0; i < n; i++)
+		{
+			gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, xobj, i), show);
+		}
+	}
+
+	shade = pdf_dict_get(ctx, rsrc, PDF_NAME(Shading));
+	if (show & SHADINGS && shade && !pdf_mark_list_push(ctx, mark_list, shade))
+		gathershadings(ctx, glo, page, pageref, shade);
+
+	pattern = pdf_dict_get(ctx, rsrc, PDF_NAME(Pattern));
+	if (show & PATTERNS && pattern && !pdf_mark_list_push(ctx, mark_list, pattern))
+	{
+		int n;
+		gatherpatterns(ctx, glo, page, pageref, pattern);
+		n = pdf_dict_len(ctx, pattern);
+		for (i = 0; i < n; i++)
+		{
+			gatherresourceinfo(ctx, mark_list, glo, page, pdf_dict_get_val(ctx, pattern, i), show);
+		}
+	}
+}
+
+static void
+gatherpageinfo(fz_context *ctx, globals *glo, int page, int show)
+{
+	pdf_mark_list mark_list;
+	pdf_obj *pageref;
+
+	pageref = pdf_lookup_page_obj(ctx, glo->doc, page-1);
+
+	if (!pageref)
+		fz_throw(ctx, FZ_ERROR_GENERIC, "cannot retrieve info from page %d", page);
+
+	gatherdimensions(ctx, glo, page, pageref);
+
+	pdf_mark_list_init(ctx, &mark_list);
+	fz_try(ctx)
+		gatherresourceinfo(ctx, &mark_list, glo, page, pageref, show);
+	fz_always(ctx)
+		pdf_mark_list_free(ctx, &mark_list);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+}
+
+static void
+printinfo(fz_context *ctx, globals *glo, char *filename, int show, int page)
+{
+	int i;
+	int j;
+	fz_output *out = glo->out;
+
+#define PAGE_FMT_zu "\t%d\t(%d 0 R):\t"
+
+	if (show & DIMENSIONS && glo->dims > 0)
+	{
+		fz_write_printf(ctx, out, "Mediaboxes (%d):\n", glo->dims);
+		for (i = 0; i < glo->dims; i++)
+		{
+			fz_write_printf(ctx, out, PAGE_FMT_zu "[ %g %g %g %g ]\n",
+				glo->dim[i].page,
+				pdf_to_num(ctx, glo->dim[i].pageref),
+				glo->dim[i].u.dim.bbox->x0,
+				glo->dim[i].u.dim.bbox->y0,
+				glo->dim[i].u.dim.bbox->x1,
+				glo->dim[i].u.dim.bbox->y1);
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+
+	if (show & FONTS && glo->fonts > 0)
+	{
+		fz_write_printf(ctx, out, "Fonts (%d):\n", glo->fonts);
+		for (i = 0; i < glo->fonts; i++)
+		{
+			fz_write_printf(ctx, out, PAGE_FMT_zu "%s '%s' %s%s(%d 0 R)\n",
+				glo->font[i].page,
+				pdf_to_num(ctx, glo->font[i].pageref),
+				pdf_to_name(ctx, glo->font[i].u.font.subtype),
+				pdf_to_name(ctx, glo->font[i].u.font.name),
+				glo->font[i].u.font.encoding ? pdf_to_name(ctx, glo->font[i].u.font.encoding) : "",
+				glo->font[i].u.font.encoding ? " " : "",
+				pdf_to_num(ctx, glo->font[i].u.font.obj));
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+
+	if (show & IMAGES && glo->images > 0)
+	{
+		fz_write_printf(ctx, out, "Images (%d):\n", glo->images);
+		for (i = 0; i < glo->images; i++)
+		{
+			char *cs = NULL;
+			char *altcs = NULL;
+
+			fz_write_printf(ctx, out, PAGE_FMT_zu "[ ",
+				glo->image[i].page,
+				pdf_to_num(ctx, glo->image[i].pageref));
+
+			if (pdf_is_array(ctx, glo->image[i].u.image.filter))
+			{
+				int n = pdf_array_len(ctx, glo->image[i].u.image.filter);
+				for (j = 0; j < n; j++)
+				{
+					pdf_obj *obj = pdf_array_get(ctx, glo->image[i].u.image.filter, j);
+					char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
+
+					if (strstr(filter, "Decode"))
+						*(strstr(filter, "Decode")) = '\0';
+
+					fz_write_printf(ctx, out, "%s%s",
+						filter,
+						j == pdf_array_len(ctx, glo->image[i].u.image.filter) - 1 ? "" : " ");
+					fz_free(ctx, filter);
+				}
+			}
+			else if (glo->image[i].u.image.filter)
+			{
+				pdf_obj *obj = glo->image[i].u.image.filter;
+				char *filter = fz_strdup(ctx, pdf_to_name(ctx, obj));
+
+				if (strstr(filter, "Decode"))
+					*(strstr(filter, "Decode")) = '\0';
+
+				fz_write_printf(ctx, out, "%s", filter);
+				fz_free(ctx, filter);
+			}
+			else
+				fz_write_printf(ctx, out, "Raw");
+
+			if (glo->image[i].u.image.cs)
+			{
+				cs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.cs));
+
+				if (!strncmp(cs, "Device", 6))
+				{
+					size_t len = strlen(cs + 6);
+					memmove(cs + 3, cs + 6, len + 1);
+					cs[3 + len + 1] = '\0';
+				}
+				if (strstr(cs, "ICC"))
+					fz_strlcpy(cs, "ICC", 4);
+				if (strstr(cs, "Indexed"))
+					fz_strlcpy(cs, "Idx", 4);
+				if (strstr(cs, "Pattern"))
+					fz_strlcpy(cs, "Pat", 4);
+				if (strstr(cs, "Separation"))
+					fz_strlcpy(cs, "Sep", 4);
+			}
+			if (glo->image[i].u.image.altcs)
+			{
+				altcs = fz_strdup(ctx, pdf_to_name(ctx, glo->image[i].u.image.altcs));
+
+				if (!strncmp(altcs, "Device", 6))
+				{
+					size_t len = strlen(altcs + 6);
+					memmove(altcs + 3, altcs + 6, len + 1);
+					altcs[3 + len + 1] = '\0';
+				}
+				if (strstr(altcs, "ICC"))
+					fz_strlcpy(altcs, "ICC", 4);
+				if (strstr(altcs, "Indexed"))
+					fz_strlcpy(altcs, "Idx", 4);
+				if (strstr(altcs, "Pattern"))
+					fz_strlcpy(altcs, "Pat", 4);
+				if (strstr(altcs, "Separation"))
+					fz_strlcpy(altcs, "Sep", 4);
+			}
+
+			fz_write_printf(ctx, out, " ] %dx%d %dbpc %s%s%s (%d 0 R)\n",
+				pdf_to_int(ctx, glo->image[i].u.image.width),
+				pdf_to_int(ctx, glo->image[i].u.image.height),
+				glo->image[i].u.image.bpc ? pdf_to_int(ctx, glo->image[i].u.image.bpc) : 1,
+				glo->image[i].u.image.cs ? cs : "ImageMask",
+				glo->image[i].u.image.altcs ? " " : "",
+				glo->image[i].u.image.altcs ? altcs : "",
+				pdf_to_num(ctx, glo->image[i].u.image.obj));
+
+			fz_free(ctx, cs);
+			fz_free(ctx, altcs);
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+
+	if (show & SHADINGS && glo->shadings > 0)
+	{
+		fz_write_printf(ctx, out, "Shading patterns (%d):\n", glo->shadings);
+		for (i = 0; i < glo->shadings; i++)
+		{
+			char *shadingtype[] =
+			{
+				"",
+				"Function",
+				"Axial",
+				"Radial",
+				"Triangle mesh",
+				"Lattice",
+				"Coons patch",
+				"Tensor patch",
+			};
+
+			fz_write_printf(ctx, out, PAGE_FMT_zu "%s (%d 0 R)\n",
+				glo->shading[i].page,
+				pdf_to_num(ctx, glo->shading[i].pageref),
+				shadingtype[pdf_to_int(ctx, glo->shading[i].u.shading.type)],
+				pdf_to_num(ctx, glo->shading[i].u.shading.obj));
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+
+	if (show & PATTERNS && glo->patterns > 0)
+	{
+		fz_write_printf(ctx, out, "Patterns (%d):\n", glo->patterns);
+		for (i = 0; i < glo->patterns; i++)
+		{
+			if (pdf_to_int(ctx, glo->pattern[i].u.pattern.type) == 1)
+			{
+				char *painttype[] =
+				{
+					"",
+					"Colored",
+					"Uncolored",
+				};
+				char *tilingtype[] =
+				{
+					"",
+					"Constant",
+					"No distortion",
+					"Constant/fast tiling",
+				};
+
+				fz_write_printf(ctx, out, PAGE_FMT_zu "Tiling %s %s (%d 0 R)\n",
+						glo->pattern[i].page,
+						pdf_to_num(ctx, glo->pattern[i].pageref),
+						painttype[pdf_to_int(ctx, glo->pattern[i].u.pattern.paint)],
+						tilingtype[pdf_to_int(ctx, glo->pattern[i].u.pattern.tiling)],
+						pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
+			}
+			else
+			{
+				fz_write_printf(ctx, out, PAGE_FMT_zu "Shading %d 0 R (%d 0 R)\n",
+						glo->pattern[i].page,
+						pdf_to_num(ctx, glo->pattern[i].pageref),
+						pdf_to_num(ctx, glo->pattern[i].u.pattern.shading),
+						pdf_to_num(ctx, glo->pattern[i].u.pattern.obj));
+			}
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+
+	if (show & XOBJS && glo->forms > 0)
+	{
+		fz_write_printf(ctx, out, "Form xobjects (%d):\n", glo->forms);
+		for (i = 0; i < glo->forms; i++)
+		{
+			fz_write_printf(ctx, out, PAGE_FMT_zu "Form%s%s%s%s (%d 0 R)\n",
+				glo->form[i].page,
+				pdf_to_num(ctx, glo->form[i].pageref),
+				glo->form[i].u.form.groupsubtype ? " " : "",
+				glo->form[i].u.form.groupsubtype ? pdf_to_name(ctx, glo->form[i].u.form.groupsubtype) : "",
+				glo->form[i].u.form.groupsubtype ? " Group" : "",
+				glo->form[i].u.form.reference ? " Reference" : "",
+				pdf_to_num(ctx, glo->form[i].u.form.obj));
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+
+	if (show & XOBJS && glo->psobjs > 0)
+	{
+		fz_write_printf(ctx, out, "Postscript xobjects (%d):\n", glo->psobjs);
+		for (i = 0; i < glo->psobjs; i++)
+		{
+			fz_write_printf(ctx, out, PAGE_FMT_zu "(%d 0 R)\n",
+				glo->psobj[i].page,
+				pdf_to_num(ctx, glo->psobj[i].pageref),
+				pdf_to_num(ctx, glo->psobj[i].u.form.obj));
+		}
+		fz_write_printf(ctx, out, "\n");
+	}
+}
+
+static void
+showinfo(fz_context *ctx, globals *glo, char *filename, int show, const char *pagelist)
+{
+	int page, spage, epage;
+	int allpages;
+	int pagecount;
+	fz_output *out = glo->out;
+
+	if (!glo->doc)
+	{
+		infousage();
+		fz_throw(ctx, FZ_ERROR_GENERIC, "Cannot show info without document");
+	}
+
+	allpages = !strcmp(pagelist, "1-N");
+
+	pagecount = pdf_count_pages(ctx, glo->doc);
+
+	while ((pagelist = fz_parse_page_range(ctx, pagelist, &spage, &epage, pagecount)))
+	{
+		if (allpages)
+			fz_write_printf(ctx, out, "Retrieving info from pages %d-%d...\n", spage, epage);
+		for (page = spage; page <= epage; page++)
+		{
+			gatherpageinfo(ctx, glo, page, show);
+			if (!allpages)
+			{
+				fz_write_printf(ctx, out, "Page %d:\n", page);
+				printinfo(ctx, glo, filename, show, page);
+				fz_write_printf(ctx, out, "\n");
+				clearinfo(ctx, glo);
+			}
+		}
+	}
+
+	if (allpages)
+		printinfo(ctx, glo, filename, show, -1);
+}
+
+static void
+showzugferd(fz_context *ctx, globals *glo)
+{
+	float version;
+	fz_output *out = glo->out;
+	enum pdf_zugferd_profile profile = pdf_zugferd_profile(ctx, glo->doc, &version);
+	fz_buffer *buf;
+
+	if (profile == PDF_NOT_ZUGFERD)
+	{
+		fz_write_printf(ctx, out, "Not a ZUGFeRD file.\n");
+		return;
+	}
+
+	fz_write_printf(ctx, out, "ZUGFeRD version %g\n", version);
+	fz_write_printf(ctx, out, "%s profile\n", pdf_zugferd_profile_to_string(ctx, profile));
+
+	fz_write_printf(ctx, out, "Embedded XML:\n");
+	buf = pdf_zugferd_xml(ctx, glo->doc);
+	fz_write_buffer(ctx, out, buf);
+	fz_drop_buffer(ctx, buf);
+	fz_write_printf(ctx, out, "\n\n");
+}
+
+static void
+pdfinfo_info(fz_context *ctx, fz_output *out, char *filename, char *password, int show, char *argv[], int argc)
+{
+	enum { NO_FILE_OPENED, NO_INFO_GATHERED, INFO_SHOWN } state;
+	int argidx = 0;
+	globals glo = { 0 };
+
+	glo.out = out;
+	glo.ctx = ctx;
+
+	state = NO_FILE_OPENED;
+
+	fz_try(ctx)
+	{
+		while (argidx < argc)
+		{
+			if (state == NO_FILE_OPENED || !fz_is_page_range(ctx, argv[argidx]))
+			{
+				if (state == NO_INFO_GATHERED)
+				{
+					showinfo(ctx, &glo, filename, show, "1-N");
+				}
+
+				closexref(ctx, &glo);
+
+				filename = argv[argidx];
+				fz_write_printf(ctx, out, "%s:\n", filename);
+				glo.doc = pdf_open_document(glo.ctx, filename);
+				if (pdf_needs_password(ctx, glo.doc))
+					if (!pdf_authenticate_password(ctx, glo.doc, password))
+						fz_throw(glo.ctx, FZ_ERROR_ARGUMENT, "cannot authenticate password: %s", filename);
+				glo.pagecount = pdf_count_pages(ctx, glo.doc);
+
+				showglobalinfo(ctx, &glo);
+				state = NO_INFO_GATHERED;
+
+				if (show & ZUGFERD)
+					showzugferd(ctx, &glo);
+			}
+			else
+			{
+				showinfo(ctx, &glo, filename, show, argv[argidx]);
+				state = INFO_SHOWN;
+			}
+
+			argidx++;
+		}
+
+		if (state == NO_INFO_GATHERED)
+			showinfo(ctx, &glo, filename, show, "1-N");
+	}
+	fz_always(ctx)
+		closexref(ctx, &glo);
+	fz_catch(ctx)
+		fz_rethrow(ctx);
+}
+
+int pdfinfo_main(int argc, char **argv)
+{
+	char *filename = "";
+	char *password = "";
+	int show = ALL;
+	int c;
+	int ret;
+	fz_context *ctx;
+
+	while ((c = fz_getopt(argc, argv, "FISPXMZp:")) != -1)
+	{
+		switch (c)
+		{
+		case 'F': if (show == ALL) show = FONTS; else show |= FONTS; break;
+		case 'I': if (show == ALL) show = IMAGES; else show |= IMAGES; break;
+		case 'S': if (show == ALL) show = SHADINGS; else show |= SHADINGS; break;
+		case 'P': if (show == ALL) show = PATTERNS; else show |= PATTERNS; break;
+		case 'X': if (show == ALL) show = XOBJS; else show |= XOBJS; break;
+		case 'M': if (show == ALL) show = DIMENSIONS; else show |= DIMENSIONS; break;
+		case 'Z': if (show == ALL) show = ZUGFERD; else show |= ZUGFERD; break;
+		case 'p': password = fz_optarg; break;
+		default:
+			infousage();
+			return 1;
+		}
+	}
+
+	if (fz_optind == argc)
+	{
+		infousage();
+		return 1;
+	}
+
+	ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
+	if (!ctx)
+	{
+		fprintf(stderr, "cannot initialise context\n");
+		exit(1);
+	}
+
+	ret = 0;
+	fz_try(ctx)
+		pdfinfo_info(ctx, fz_stdout(ctx), filename, password, show, &argv[fz_optind], argc-fz_optind);
+	fz_catch(ctx)
+	{
+		fz_report_error(ctx);
+		ret = 1;
+	}
+	fz_drop_context(ctx);
+	return ret;
+}