view mupdf-source/source/tools/pdfaudit.c @ 15:393e55ef9200

Explicitely use python3 instead of python
author Franz Glasner <fzglas.hg@dom66.de>
date Wed, 17 Sep 2025 21:11:04 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

// Copyright (C) 2023-2025 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.

/*
 * PDF auditing tool
 */

#include "mupdf/fitz.h"
#include "mupdf/pdf.h"

#include <string.h>
#include <stdlib.h>
#include <stdio.h>

#define SWITCH(x) switch ((intptr_t)(x))
#define CASE(x) case ((intptr_t)(x))

typedef enum
{
	AUDIT_UNKNOWN = 0,
	AUDIT_THUMBNAILS,
	AUDIT_IMAGES,
	AUDIT_BOOKMARKS,
	AUDIT_PAGE_OBJECTS,
	AUDIT_CONTENT_STREAMS,
	AUDIT_FONTS,
	AUDIT_STRUCTURE_INFO,
	AUDIT_FORMS,
	AUDIT_LINK_ANNOTATIONS,
	AUDIT_COMMENTS,
	AUDIT_3DCONTENT,
	AUDIT_NAMED_DESTINATIONS,
	//AUDIT_DOCUMENT_OVERHEAD, // FIXME
	AUDIT_COLORSPACES,
	AUDIT_FORM_XOBJ,
	AUDIT_EXTGS,
	AUDIT_PIECE_INFORMATION,
	AUDIT_EMBEDDED_FILES,
	AUDIT_TRAILER,
	AUDIT_RESOURCES,
	AUDIT_OBJSTM,
	AUDIT_METADATA,
	AUDIT__MAX
} audit_type_t;

const char *audit_type[] =
{
	"UNKNOWN",
	"THUMBNAILS",
	"IMAGES",
	"BOOKMARKS",
	"PAGE OBJECTS",
	"CONTENT_STREAMS",
	"FONTS",
	"STRUCTURE_INFO",
	"FORMS",
	"LINK_ANNOTATIONS",
	"COMMENTS",
	"3DCONTENT",
	"NAMED_DESTINATIONS",
	//"DOCUMENT_OVERHEAD",
	"COLORSPACES",
	"FORM_XOBJ",
	"EXTGS",
	"PIECE_INFORMATION",
	"EMBEDDED_FILES",
	"TRAILER",
	"RESOURCES",
	"OBJSTM",
	"METADATA"
};

typedef struct
{
	audit_type_t type;
	int is_in_objstm;
	/* The number of bytes this object will take in the file, not including any actual stream content. */
	size_t textsize;
	/* The number of bytes of overhead "1 0 R\nendobj\n" plus "stream\nendstream\n" */
	size_t overhead;
	/* Uncompressed stream size */
	size_t len;
	/* Compressed stream size (not including 'stream\nendstream\n) */
	size_t stream_len;
} obj_info_t;

enum
{
	OP_w = 0,
	OP_j,
	OP_J,
	OP_M,
	OP_d,
	OP_ri,
	OP_gs_OP,
	OP_gs_op,
	OP_gs_OPM,
	OP_gs_UseBlackPtComp,
	OP_i,
	OP_gs_begin,
	OP_gs_BM,
	OP_gs_CA,
	OP_gs_ca,
	OP_gs_SMask,
	OP_gs_end,
	OP_q,
	OP_cm,
	OP_m,
	OP_l,
	OP_c,
	OP_v,
	OP_y,
	OP_h,
	OP_re,
	OP_S,
	OP_s,
	OP_F,
	OP_f,
	OP_fstar,
	OP_B,
	OP_Bstar,
	OP_b,
	OP_bstar,
	OP_n,
	OP_W,
	OP_Wstar,
	OP_BT,
	OP_ET,
	OP_Q,
	OP_Tc,
	OP_Tw,
	OP_Tz,
	OP_TL,
	OP_Tf,
	OP_Tr,
	OP_Ts,
	OP_Td,
	OP_TD,
	OP_Tm,
	OP_Tstar,
	OP_TJ,
	OP_Tj,
	OP_squote,
	OP_dquote,
	OP_d0,
	OP_d1,
	OP_CS,
	OP_cs,
	OP_SC_pattern,
	OP_sc_pattern,
	OP_SC_shade,
	OP_sc_shade,
	OP_SC_color,
	OP_sc_color,
	OP_G,
	OP_g,
	OP_RG,
	OP_rg,
	OP_K,
	OP_k,
	OP_BI,
	OP_sh,
	OP_Do_image,
	OP_Do_form,
	OP_MP,
	OP_DP,
	OP_BMC,
	OP_BDC,
	OP_EMC,
	OP_BX,
	OP_EX,
	OP_END
};

const char *op_names[] =
{
	"w",
	"j",
	"J",
	"M",
	"d",
	"ri",
	"gs_OP",
	"gs_op",
	"gs_OPM",
	"gs_UseBlackPtComp",
	"i",
	"gs_begin",
	"gs_BM",
	"gs_CA",
	"gs_ca",
	"gs_SMask",
	"gs_end",
	"q",
	"cm",
	"m",
	"l",
	"c",
	"v",
	"y",
	"h",
	"re",
	"S",
	"s",
	"F",
	"f",
	"fstar",
	"B",
	"Bstar",
	"b",
	"bstar",
	"n",
	"W",
	"Wstar",
	"BT",
	"ET",
	"Q",
	"Tc",
	"Tw",
	"Tz",
	"TL",
	"Tf",
	"Tr",
	"Ts",
	"Td",
	"TD",
	"Tm",
	"Tstar",
	"TJ",
	"Tj",
	"squote",
	"dquote",
	"d0",
	"d1",
	"CS",
	"cs",
	"SC_pattern",
	"sc_pattern",
	"SC_shade",
	"sc_shade",
	"SC_color",
	"sc_color",
	"G",
	"g",
	"RG",
	"rg",
	"K",
	"k",
	"BI",
	"sh",
	"Do_image",
	"Do_form",
	"MP",
	"DP",
	"BMC",
	"BDC",
	"EMC",
	"BX",
	"EX",
};

typedef struct
{
	size_t len[OP_END];
} op_usage_t;

typedef struct
{
	pdf_processor super;
	pdf_document *doc;
	int structparents;
	pdf_processor *mine;
	pdf_processor *chain;
	pdf_filter_options *global_options;
	op_usage_t *op_usage;
	fz_buffer *buffer;
} pdf_opcount_processor;

/* general graphics state */

static void
pdf_opcount_w(fz_context *ctx, pdf_processor *proc, float linewidth)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_w)
		p->mine->op_w(ctx, p->mine, linewidth);

	z = p->buffer->len - z;
	p->op_usage->len[OP_w] += z;
}

static void
pdf_opcount_j(fz_context *ctx, pdf_processor *proc, int linejoin)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_j)
		p->mine->op_j(ctx, p->mine, linejoin);

	z = p->buffer->len - z;
	p->op_usage->len[OP_j] += z;
}

static void
pdf_opcount_J(fz_context *ctx, pdf_processor *proc, int linecap)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_J)
		p->mine->op_J(ctx, p->mine, linecap);

	z = p->buffer->len - z;
	p->op_usage->len[OP_J] += z;
}

static void
pdf_opcount_M(fz_context *ctx, pdf_processor *proc, float miterlimit)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_M)
		p->mine->op_M(ctx, p->mine, miterlimit);

	z = p->buffer->len - z;
	p->op_usage->len[OP_M] += z;
}

static void
pdf_opcount_d(fz_context *ctx, pdf_processor *proc, pdf_obj *array, float phase)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_d)
		p->mine->op_d(ctx, p->mine, array, phase);

	z = p->buffer->len - z;
	p->op_usage->len[OP_d] += z;
}

static void
pdf_opcount_ri(fz_context *ctx, pdf_processor *proc, const char *intent)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_ri)
		p->mine->op_ri(ctx, p->mine, intent);

	z = p->buffer->len - z;
	p->op_usage->len[OP_ri] += z;
}

static void
pdf_opcount_gs_OP(fz_context *ctx, pdf_processor *proc, int b)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_OP)
		p->mine->op_gs_OP(ctx, p->mine, b);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_OP] += z;
}

static void
pdf_opcount_gs_op(fz_context *ctx, pdf_processor *proc, int b)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_op)
		p->mine->op_gs_op(ctx, p->mine, b);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_op] += z;
}

static void
pdf_opcount_gs_OPM(fz_context *ctx, pdf_processor *proc, int i)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_OPM)
		p->mine->op_gs_OPM(ctx, p->mine, i);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_OPM] += z;
}

static void
pdf_opcount_gs_UseBlackPtComp(fz_context *ctx, pdf_processor *proc, pdf_obj *name)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_UseBlackPtComp)
		p->mine->op_gs_UseBlackPtComp(ctx, p->mine, name);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_UseBlackPtComp] += z;
}

static void
pdf_opcount_i(fz_context *ctx, pdf_processor *proc, float flatness)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_i)
		p->mine->op_i(ctx, p->mine, flatness);

	z = p->buffer->len - z;
	p->op_usage->len[OP_i] += z;
}

static void
pdf_opcount_gs_begin(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *extgstate)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_begin)
		p->mine->op_gs_begin(ctx, p->mine, name, extgstate);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_begin] += z;
}

static void
pdf_opcount_gs_BM(fz_context *ctx, pdf_processor *proc, const char *blendmode)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_BM)
		p->mine->op_gs_BM(ctx, p->mine, blendmode);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_BM] += z;
}

static void
pdf_opcount_gs_CA(fz_context *ctx, pdf_processor *proc, float alpha)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_CA)
		p->mine->op_gs_CA(ctx, p->mine, alpha);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_CA] += z;
}

static void
pdf_opcount_gs_ca(fz_context *ctx, pdf_processor *proc, float alpha)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_ca)
		p->mine->op_gs_ca(ctx, p->mine, alpha);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_ca] += z;
}

static void
pdf_opcount_gs_SMask(fz_context *ctx, pdf_processor *proc, pdf_obj *smask, fz_colorspace *smask_cs, float *bc, int luminosity, pdf_obj *obj)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_SMask)
		p->mine->op_gs_SMask(ctx, p->mine, smask, smask_cs, bc, luminosity, obj);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_SMask] += z;
}

static void
pdf_opcount_gs_end(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_gs_end)
		p->mine->op_gs_end(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_gs_end] += z;
}

/* special graphics state */

static void
pdf_opcount_q(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_q)
		p->mine->op_q(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_q] += z;
}

static void
pdf_opcount_cm(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_cm)
		p->mine->op_cm(ctx, p->mine, a, b, c, d, e, f);

	z = p->buffer->len - z;
	p->op_usage->len[OP_cm] += z;
}

/* path construction */

static void
pdf_opcount_m(fz_context *ctx, pdf_processor *proc, float x, float y)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_m)
		p->mine->op_m(ctx, p->mine, x, y);

	z = p->buffer->len - z;
	p->op_usage->len[OP_m] += z;
}

static void
pdf_opcount_l(fz_context *ctx, pdf_processor *proc, float x, float y)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_l)
		p->mine->op_l(ctx, p->mine, x, y);

	z = p->buffer->len - z;
	p->op_usage->len[OP_l] += z;
}

static void
pdf_opcount_c(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x2, float y2, float x3, float y3)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_c)
		p->mine->op_c(ctx, p->mine, x1, y1, x2, y2, x3, y3);

	z = p->buffer->len - z;
	p->op_usage->len[OP_c] += z;
}

static void
pdf_opcount_v(fz_context *ctx, pdf_processor *proc, float x2, float y2, float x3, float y3)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_v)
		p->mine->op_v(ctx, p->mine, x2, y2, x3, y3);

	z = p->buffer->len - z;
	p->op_usage->len[OP_v] += z;
}

static void
pdf_opcount_y(fz_context *ctx, pdf_processor *proc, float x1, float y1, float x3, float y3)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_y)
		p->mine->op_y(ctx, p->mine, x1, y1, x3, y3);

	z = p->buffer->len - z;
	p->op_usage->len[OP_y] += z;
}

static void
pdf_opcount_h(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_h)
		p->mine->op_h(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_h] += z;
}

static void
pdf_opcount_re(fz_context *ctx, pdf_processor *proc, float x, float y, float w, float h)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_re)
		p->mine->op_re(ctx, p->mine, x, y, w, h);

	z = p->buffer->len - z;
	p->op_usage->len[OP_re] += z;
}

/* path painting */

static void
pdf_opcount_S(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_S)
		p->mine->op_S(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_S] += z;
}

static void
pdf_opcount_s(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_s)
		p->mine->op_s(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_s] += z;
}

static void
pdf_opcount_F(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_F)
		p->mine->op_F(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_F] += z;
}

static void
pdf_opcount_f(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_f)
		p->mine->op_f(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_f] += z;
}

static void
pdf_opcount_fstar(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_fstar)
		p->mine->op_fstar(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_fstar] += z;
}

static void
pdf_opcount_B(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_B)
		p->mine->op_B(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_B] += z;
}

static void
pdf_opcount_Bstar(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Bstar)
		p->mine->op_Bstar(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Bstar] += z;
}

static void
pdf_opcount_b(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_b)
		p->mine->op_b(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_b] += z;
}

static void
pdf_opcount_bstar(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_bstar)
		p->mine->op_bstar(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_bstar] += z;
}

static void
pdf_opcount_n(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_n)
		p->mine->op_n(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_n] += z;
}

/* clipping paths */

static void
pdf_opcount_W(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_W)
		p->mine->op_W(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_W] += z;
}

static void
pdf_opcount_Wstar(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Wstar)
		p->mine->op_Wstar(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Wstar] += z;
}

/* text objects */

static void
pdf_opcount_BT(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_BT)
		p->mine->op_BT(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_BT] += z;
}

static void
pdf_opcount_ET(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_ET)
		p->mine->op_ET(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_ET] += z;
}

static void
pdf_opcount_Q(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Q)
		p->mine->op_Q(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Q] += z;
}

/* text state */

static void
pdf_opcount_Tc(fz_context *ctx, pdf_processor *proc, float charspace)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tc)
		p->mine->op_Tc(ctx, p->mine, charspace);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tc] += z;
}

static void
pdf_opcount_Tw(fz_context *ctx, pdf_processor *proc, float wordspace)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tw)
		p->mine->op_Tw(ctx, p->mine, wordspace);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tw] += z;
}

static void
pdf_opcount_Tz(fz_context *ctx, pdf_processor *proc, float scale)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tz)
		p->mine->op_Tz(ctx, p->mine, scale);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tz] += z;
}

static void
pdf_opcount_TL(fz_context *ctx, pdf_processor *proc, float leading)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_TL)
		p->mine->op_TL(ctx, p->mine, leading);

	z = p->buffer->len - z;
	p->op_usage->len[OP_TL] += z;
}

static void
pdf_opcount_Tf(fz_context *ctx, pdf_processor *proc, const char *name, pdf_font_desc *font, float size)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tf)
		p->mine->op_Tf(ctx, p->mine, name, font, size);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tf] += z;
}

static void
pdf_opcount_Tr(fz_context *ctx, pdf_processor *proc, int render)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tr)
		p->mine->op_Tr(ctx, p->mine, render);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tr] += z;
}

static void
pdf_opcount_Ts(fz_context *ctx, pdf_processor *proc, float rise)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Ts)
		p->mine->op_Ts(ctx, p->mine, rise);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Ts] += z;
}

/* text positioning */

static void
pdf_opcount_Td(fz_context *ctx, pdf_processor *proc, float tx, float ty)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Td)
		p->mine->op_Td(ctx, p->mine, tx, ty);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Td] += z;
}

static void
pdf_opcount_TD(fz_context *ctx, pdf_processor *proc, float tx, float ty)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_TD)
		p->mine->op_TD(ctx, p->mine, tx, ty);

	z = p->buffer->len - z;
	p->op_usage->len[OP_TD] += z;
}

static void
pdf_opcount_Tm(fz_context *ctx, pdf_processor *proc, float a, float b, float c, float d, float e, float f)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tm)
		p->mine->op_Tm(ctx, p->mine, a, b, c, d, e, f);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tm] += z;
}

static void
pdf_opcount_Tstar(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tstar)
		p->mine->op_Tstar(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tstar] += z;
}

/* text showing */

static void
pdf_opcount_TJ(fz_context *ctx, pdf_processor *proc, pdf_obj *array)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_TJ)
		p->mine->op_TJ(ctx, p->mine, array);

	z = p->buffer->len - z;
	p->op_usage->len[OP_TJ] += z;
}

static void
pdf_opcount_Tj(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Tj)
		p->mine->op_Tj(ctx, p->mine, str, len);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Tj] += z;
}

static void
pdf_opcount_squote(fz_context *ctx, pdf_processor *proc, char *str, size_t len)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_squote)
		p->mine->op_squote(ctx, p->mine, str, len);

	z = p->buffer->len - z;
	p->op_usage->len[OP_squote] += z;
}

static void
pdf_opcount_dquote(fz_context *ctx, pdf_processor *proc, float aw, float ac, char *str, size_t len)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_dquote)
		p->mine->op_dquote(ctx, p->mine, aw, ac, str, len);

	z = p->buffer->len - z;
	p->op_usage->len[OP_dquote] += z;
}

/* type 3 fonts */

static void
pdf_opcount_d0(fz_context *ctx, pdf_processor *proc, float wx, float wy)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_d0)
		p->mine->op_d0(ctx, p->mine, wx, wy);

	z = p->buffer->len - z;
	p->op_usage->len[OP_d0] += z;
}

static void
pdf_opcount_d1(fz_context *ctx, pdf_processor *proc, float wx, float wy, float llx, float lly, float urx, float ury)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_d1)
		p->mine->op_d1(ctx, p->mine, wx, wy, llx, lly, urx, ury);

	z = p->buffer->len - z;
	p->op_usage->len[OP_d1] += z;
}

/* color */

static void
pdf_opcount_CS(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs)
{
	pdf_opcount_processor *p = (pdf_opcount_processor *)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_CS)
		p->mine->op_CS(ctx, p->mine, name, cs);

	z = p->buffer->len - z;
	p->op_usage->len[OP_CS] += z;
}

static void
pdf_opcount_cs(fz_context *ctx, pdf_processor *proc, const char *name, fz_colorspace *cs)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_cs)
		p->mine->op_cs(ctx, p->mine, name, cs);

	z = p->buffer->len - z;
	p->op_usage->len[OP_cs] += z;
}

static void
pdf_opcount_SC_pattern(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_SC_pattern)
		p->mine->op_SC_pattern(ctx, p->mine, name, pat, n, color);

	z = p->buffer->len - z;
	p->op_usage->len[OP_SC_pattern] += z;
}

static void
pdf_opcount_sc_pattern(fz_context *ctx, pdf_processor *proc, const char *name, pdf_pattern *pat, int n, float *color)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_sc_pattern)
		p->mine->op_sc_pattern(ctx, p->mine, name, pat, n, color);

	z = p->buffer->len - z;
	p->op_usage->len[OP_sc_pattern] += z;
}

static void
pdf_opcount_SC_shade(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_SC_shade)
		p->mine->op_SC_shade(ctx, p->mine, name, shade);

	z = p->buffer->len - z;
	p->op_usage->len[OP_SC_shade] += z;
}

static void
pdf_opcount_sc_shade(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_sc_shade)
		p->mine->op_sc_shade(ctx, p->mine, name, shade);

	z = p->buffer->len - z;
	p->op_usage->len[OP_sc_shade] += z;
}

static void
pdf_opcount_SC_color(fz_context *ctx, pdf_processor *proc, int n, float *color)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_SC_color)
		p->mine->op_SC_color(ctx, p->mine, n, color);

	z = p->buffer->len - z;
	p->op_usage->len[OP_SC_color] += z;
}

static void
pdf_opcount_sc_color(fz_context *ctx, pdf_processor *proc, int n, float *color)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_sc_color)
		p->mine->op_sc_color(ctx, p->mine, n, color);

	z = p->buffer->len - z;
	p->op_usage->len[OP_sc_color] += z;
}

static void
pdf_opcount_G(fz_context *ctx, pdf_processor *proc, float g)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_G)
		p->mine->op_G(ctx, p->mine, g);

	z = p->buffer->len - z;
	p->op_usage->len[OP_G] += z;
}

static void
pdf_opcount_g(fz_context *ctx, pdf_processor *proc, float g)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_g)
		p->mine->op_g(ctx, p->mine, g);

	z = p->buffer->len - z;
	p->op_usage->len[OP_g] += z;
}

static void
pdf_opcount_RG(fz_context *ctx, pdf_processor *proc, float r, float g, float b)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_RG)
		p->mine->op_RG(ctx, p->mine, r, g, b);

	z = p->buffer->len - z;
	p->op_usage->len[OP_RG] += z;
}

static void
pdf_opcount_rg(fz_context *ctx, pdf_processor *proc, float r, float g, float b)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_rg)
		p->mine->op_rg(ctx, p->mine, r, g, b);

	z = p->buffer->len - z;
	p->op_usage->len[OP_rg] += z;
}

static void
pdf_opcount_K(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_K)
		p->mine->op_K(ctx, p->mine, c, m, y, k);

	z = p->buffer->len - z;
	p->op_usage->len[OP_K] += z;
}

static void
pdf_opcount_k(fz_context *ctx, pdf_processor *proc, float c, float m, float y, float k)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_k)
		p->mine->op_k(ctx, p->mine, c, m, y, k);

	z = p->buffer->len - z;
	p->op_usage->len[OP_k] += z;
}

/* shadings, images, xobjects */

static void
pdf_opcount_BI(fz_context *ctx, pdf_processor *proc, fz_image *image, const char *colorspace)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_BI)
		p->mine->op_BI(ctx, p->mine, image, colorspace);

	z = p->buffer->len - z;
	p->op_usage->len[OP_BI] += z;
}

static void
pdf_opcount_sh(fz_context *ctx, pdf_processor *proc, const char *name, fz_shade *shade)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_sh)
		p->mine->op_sh(ctx, p->mine, name, shade);

	z = p->buffer->len - z;
	p->op_usage->len[OP_sh] += z;
}

static void
pdf_opcount_Do_image(fz_context *ctx, pdf_processor *proc, const char *name, fz_image *image)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Do_image)
		p->mine->op_Do_image(ctx, p->mine, name, image);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Do_image] += z;
}

static void
pdf_opcount_Do_form(fz_context *ctx, pdf_processor *proc, const char *name, pdf_obj *xobj)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_Do_form)
		p->mine->op_Do_form(ctx, p->mine, name, xobj);

	z = p->buffer->len - z;
	p->op_usage->len[OP_Do_form] += z;
}

/* marked content */

static void
pdf_opcount_MP(fz_context *ctx, pdf_processor *proc, const char *tag)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_MP)
		p->mine->op_MP(ctx, p->mine, tag);

	z = p->buffer->len - z;
	p->op_usage->len[OP_MP] += z;
}

static void
pdf_opcount_DP(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_DP)
		p->mine->op_DP(ctx, p->mine, tag, raw, cooked);

	z = p->buffer->len - z;
	p->op_usage->len[OP_DP] += z;
}

static void
pdf_opcount_BMC(fz_context *ctx, pdf_processor *proc, const char *tag)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_BMC)
		p->mine->op_BMC(ctx, p->mine, tag);

	z = p->buffer->len - z;
	p->op_usage->len[OP_BMC] += z;
}

static void
pdf_opcount_BDC(fz_context *ctx, pdf_processor *proc, const char *tag, pdf_obj *raw, pdf_obj *cooked)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_BDC)
		p->mine->op_BDC(ctx, p->mine, tag, raw, cooked);

	z = p->buffer->len - z;
	p->op_usage->len[OP_BDC] += z;
}

static void
pdf_opcount_EMC(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_EMC)
		p->mine->op_EMC(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_EMC] += z;
}

/* compatibility */

static void
pdf_opcount_BX(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_BX)
		p->mine->op_BX(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_BX] += z;
}

static void
pdf_opcount_EX(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;
	size_t z = p->buffer->len;

	if (p->mine->op_EX)
		p->mine->op_EX(ctx, p->mine);

	z = p->buffer->len - z;
	p->op_usage->len[OP_EX] += z;
}

static void
pdf_opcount_END(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;

	if (p->mine->op_END)
		p->mine->op_END(ctx, p->mine);
}

static void
pdf_close_opcount_processor(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;

	pdf_close_processor(ctx, p->mine);
	pdf_close_processor(ctx, p->chain);
}

static void
pdf_drop_opcount_processor(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;

	fz_drop_buffer(ctx, p->buffer);
	pdf_drop_processor(ctx, p->mine);
}

static void
pdf_opcount_push_resources(fz_context *ctx, pdf_processor *proc, pdf_obj *res)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;

	pdf_processor_push_resources(ctx, p->mine, res);
}

static pdf_obj *
pdf_opcount_pop_resources(fz_context *ctx, pdf_processor *proc)
{
	pdf_opcount_processor *p = (pdf_opcount_processor*)proc;

	return pdf_processor_pop_resources(ctx, p->mine);
}

pdf_processor *
pdf_new_opcount_filter(
	fz_context *ctx,
	pdf_document *doc,
	pdf_processor *chain,
	int struct_parents,
	fz_matrix transform,
	pdf_filter_options *global_options,
	void *options_)
{
	pdf_opcount_processor *proc = pdf_new_processor(ctx, sizeof * proc);

	fz_try(ctx)
	{
		proc->buffer = fz_new_buffer(ctx, 1024);
		proc->mine = pdf_new_buffer_processor(ctx, proc->buffer, 0, 0);
	}
	fz_catch(ctx)
		fz_rethrow(ctx);

	proc->op_usage = (op_usage_t *)options_;

	proc->super.close_processor = pdf_close_opcount_processor;
	proc->super.drop_processor = pdf_drop_opcount_processor;

	proc->super.push_resources = pdf_opcount_push_resources;
	proc->super.pop_resources = pdf_opcount_pop_resources;

	/* general graphics state */
	proc->super.op_w = pdf_opcount_w;
	proc->super.op_j = pdf_opcount_j;
	proc->super.op_J = pdf_opcount_J;
	proc->super.op_M = pdf_opcount_M;
	proc->super.op_d = pdf_opcount_d;
	proc->super.op_ri = pdf_opcount_ri;
	proc->super.op_i = pdf_opcount_i;
	proc->super.op_gs_begin = pdf_opcount_gs_begin;
	proc->super.op_gs_end = pdf_opcount_gs_end;

	/* transparency graphics state */
	proc->super.op_gs_BM = pdf_opcount_gs_BM;
	proc->super.op_gs_CA = pdf_opcount_gs_CA;
	proc->super.op_gs_ca = pdf_opcount_gs_ca;
	proc->super.op_gs_SMask = pdf_opcount_gs_SMask;

	/* special graphics state */
	proc->super.op_q = pdf_opcount_q;
	proc->super.op_Q = pdf_opcount_Q;
	proc->super.op_cm = pdf_opcount_cm;

	/* path construction */
	proc->super.op_m = pdf_opcount_m;
	proc->super.op_l = pdf_opcount_l;
	proc->super.op_c = pdf_opcount_c;
	proc->super.op_v = pdf_opcount_v;
	proc->super.op_y = pdf_opcount_y;
	proc->super.op_h = pdf_opcount_h;
	proc->super.op_re = pdf_opcount_re;

	/* path painting */
	proc->super.op_S = pdf_opcount_S;
	proc->super.op_s = pdf_opcount_s;
	proc->super.op_F = pdf_opcount_F;
	proc->super.op_f = pdf_opcount_f;
	proc->super.op_fstar = pdf_opcount_fstar;
	proc->super.op_B = pdf_opcount_B;
	proc->super.op_Bstar = pdf_opcount_Bstar;
	proc->super.op_b = pdf_opcount_b;
	proc->super.op_bstar = pdf_opcount_bstar;
	proc->super.op_n = pdf_opcount_n;

	/* clipping paths */
	proc->super.op_W = pdf_opcount_W;
	proc->super.op_Wstar = pdf_opcount_Wstar;

	/* text objects */
	proc->super.op_BT = pdf_opcount_BT;
	proc->super.op_ET = pdf_opcount_ET;

	/* text state */
	proc->super.op_Tc = pdf_opcount_Tc;
	proc->super.op_Tw = pdf_opcount_Tw;
	proc->super.op_Tz = pdf_opcount_Tz;
	proc->super.op_TL = pdf_opcount_TL;
	proc->super.op_Tf = pdf_opcount_Tf;
	proc->super.op_Tr = pdf_opcount_Tr;
	proc->super.op_Ts = pdf_opcount_Ts;

	/* text positioning */
	proc->super.op_Td = pdf_opcount_Td;
	proc->super.op_TD = pdf_opcount_TD;
	proc->super.op_Tm = pdf_opcount_Tm;
	proc->super.op_Tstar = pdf_opcount_Tstar;

	/* text showing */
	proc->super.op_TJ = pdf_opcount_TJ;
	proc->super.op_Tj = pdf_opcount_Tj;
	proc->super.op_squote = pdf_opcount_squote;
	proc->super.op_dquote = pdf_opcount_dquote;

	/* type 3 fonts */
	proc->super.op_d0 = pdf_opcount_d0;
	proc->super.op_d1 = pdf_opcount_d1;

	/* color */
	proc->super.op_CS = pdf_opcount_CS;
	proc->super.op_cs = pdf_opcount_cs;
	proc->super.op_SC_color = pdf_opcount_SC_color;
	proc->super.op_sc_color = pdf_opcount_sc_color;
	proc->super.op_SC_pattern = pdf_opcount_SC_pattern;
	proc->super.op_sc_pattern = pdf_opcount_sc_pattern;
	proc->super.op_SC_shade = pdf_opcount_SC_shade;
	proc->super.op_sc_shade = pdf_opcount_sc_shade;

	proc->super.op_G = pdf_opcount_G;
	proc->super.op_g = pdf_opcount_g;
	proc->super.op_RG = pdf_opcount_RG;
	proc->super.op_rg = pdf_opcount_rg;
	proc->super.op_K = pdf_opcount_K;
	proc->super.op_k = pdf_opcount_k;

	/* shadings, images, xobjects */
	proc->super.op_BI = pdf_opcount_BI;
	proc->super.op_sh = pdf_opcount_sh;
	proc->super.op_Do_image = pdf_opcount_Do_image;
	proc->super.op_Do_form = pdf_opcount_Do_form;

	/* marked content */
	proc->super.op_MP = pdf_opcount_MP;
	proc->super.op_DP = pdf_opcount_DP;
	proc->super.op_BMC = pdf_opcount_BMC;
	proc->super.op_BDC = pdf_opcount_BDC;
	proc->super.op_EMC = pdf_opcount_EMC;

	/* compatibility */
	proc->super.op_BX = pdf_opcount_BX;
	proc->super.op_EX = pdf_opcount_EX;

	/* extgstate */
	proc->super.op_gs_OP = pdf_opcount_gs_OP;
	proc->super.op_gs_op = pdf_opcount_gs_op;
	proc->super.op_gs_OPM = pdf_opcount_gs_OPM;
	proc->super.op_gs_UseBlackPtComp = pdf_opcount_gs_UseBlackPtComp;

	proc->super.op_END = pdf_opcount_END;

	proc->global_options = global_options;
	proc->chain = chain;

	return (pdf_processor*)proc;
}

static void
filter_page(fz_context *ctx, pdf_document *doc, op_usage_t *op_usage, int page_num)
{
	pdf_page *page = pdf_load_page(ctx, doc, page_num);
	pdf_filter_options options = { 0 };
	pdf_filter_factory list[2] = { 0 };
	pdf_annot *annot;

	options.filters = list;
	options.recurse = 1;
	options.no_update = 1;
	list[0].filter = pdf_new_opcount_filter;
	list[0].options = op_usage;

	fz_try(ctx)
	{
		pdf_filter_page_contents(ctx, doc, page, &options);

		for (annot = pdf_first_annot(ctx, page); annot != NULL; annot = pdf_next_annot(ctx, annot))
			pdf_filter_annot_contents(ctx, doc, annot, &options);
	}
	fz_always(ctx)
		fz_drop_page(ctx, &page->super);
	fz_catch(ctx)
		fz_rethrow(ctx);
}

static void
filter_page_streams(fz_context *ctx, pdf_document *pdf, op_usage_t *ou)
{
	int i, n = pdf_count_pages(ctx, pdf);

	for (i= 0; i < n; i++)
	{
		filter_page(ctx, pdf, ou, i);
	}
}

static void
filter_buffer(fz_context *ctx, obj_info_t *oi, fz_buffer *buf)
{
	oi->len = buf->len;
}

static void
filter_stream(fz_context *ctx, pdf_document *pdf, int i, obj_info_t *oi)
{
	fz_buffer *buf = pdf_load_stream_number(ctx, pdf, i);

	fz_try(ctx)
		filter_buffer(ctx, oi, buf);
	fz_always(ctx)
		fz_drop_buffer(ctx, buf);
	fz_catch(ctx)
		fz_rethrow(ctx);
}

static void
filter_obj(fz_context *ctx, obj_info_t *oi, pdf_obj *obj)
{
	fz_buffer *buf = fz_new_buffer(ctx, 1024);
	fz_output *out = NULL;

	fz_var(out);

	fz_try(ctx)
	{
		out = fz_new_output_with_buffer(ctx, buf);
		pdf_print_obj(ctx, out, obj, 1, 0);
		fz_close_output(ctx, out);
	}
	fz_always(ctx)
	{
		fz_drop_output(ctx, out);
		if (buf)
			oi->textsize = buf->len;
		fz_drop_buffer(ctx, buf);
	}
	fz_catch(ctx)
		fz_rethrow(ctx);
}

typedef struct
{
	int len;
	int max;
	struct {
		pdf_obj *obj;
		int pos;
		int state;
	} *stack;
} walk_stack_t;

static void
walk(fz_context *ctx, walk_stack_t *ws, int n, obj_info_t *oi, pdf_obj *obj, audit_type_t type)
{
	int num = 0;
	do
	{
		if (pdf_is_indirect(ctx, obj))
		{
			num = pdf_to_num(ctx, obj);
			if (num < 0 || num >= n)
				fz_throw(ctx, FZ_ERROR_GENERIC, "object outside of xref range");
			if (oi[num].type != AUDIT_UNKNOWN)
				goto visited;
			if (pdf_mark_obj(ctx, obj))
			{
				/* We've already visited this one! */
				goto visited;
			}
		}
		/* Push the object onto the stack. */
		if (ws->len == ws->max)
		{
			int newmax = ws->max * 2;
			if (newmax == 0)
				newmax = 32;
			ws->stack = fz_realloc(ctx, ws->stack, sizeof(ws->stack[0]) * newmax);
			ws->max = newmax;
		}

		/* If the object we are about to stack is a dict, then check to see if
		 * we should be changing type because of it. */
		if (pdf_is_dict(ctx, obj))
		{
			pdf_obj *otype = pdf_dict_get(ctx, obj, PDF_NAME(Type));
			pdf_obj *subtype = pdf_dict_get(ctx, obj, PDF_NAME(Subtype));

			if (pdf_name_eq(ctx, otype, PDF_NAME(Annot)))
			{
				if (pdf_name_eq(ctx, subtype, PDF_NAME(Link)))
					type = AUDIT_LINK_ANNOTATIONS;
				else if (pdf_name_eq(ctx, subtype, PDF_NAME(Text)))
					type = AUDIT_COMMENTS;
				else if (pdf_name_eq(ctx, subtype, PDF_NAME(FreeText)))
					type = AUDIT_COMMENTS;
				else if (pdf_name_eq(ctx, subtype, PDF_NAME(Popup)))
					type = AUDIT_COMMENTS;
				else if (pdf_name_eq(ctx, subtype, PDF_NAME(3D)))
					type = AUDIT_3DCONTENT;
				else if (pdf_name_eq(ctx, subtype, PDF_NAME(PieceInfo)))
					type = AUDIT_PIECE_INFORMATION;
			}
			else if (pdf_name_eq(ctx, otype, PDF_NAME(Font)))
				type = AUDIT_FONTS;
			else if (pdf_name_eq(ctx, otype, PDF_NAME(FontDescriptor)))
				type = AUDIT_FONTS;
			else if (pdf_name_eq(ctx, otype, PDF_NAME(XObject)))
			{
				if (pdf_name_eq(ctx, subtype, PDF_NAME(Image)))
					type = AUDIT_IMAGES;
				else if (pdf_name_eq(ctx, subtype, PDF_NAME(Form)))
					type = AUDIT_FORM_XOBJ;
			}
			else if (pdf_name_eq(ctx, otype, PDF_NAME(Page)))
				type = AUDIT_PAGE_OBJECTS;
			else if (pdf_name_eq(ctx, otype, PDF_NAME(Pages)))
				type = AUDIT_PAGE_OBJECTS;
			else if (pdf_name_eq(ctx, otype, PDF_NAME(Metadata)))
				type = AUDIT_METADATA;
		}
		ws->stack[ws->len].obj = obj;
		ws->stack[ws->len].pos = 0;
		ws->stack[ws->len].state = type;
		ws->len++;

		/* So we have stepped successfully onto obj. */
		/* Record its type. */
		if (type != AUDIT_UNKNOWN)
		{
			num = pdf_obj_parent_num(ctx, obj);
			oi[num].type = type;
		}

		/* Step onwards to any children. */
		if (pdf_is_dict(ctx, obj))
		{
			pdf_obj *key;
			/* We've just stepped onto a dict. */
step_next_dict_child:
			if (ws->stack[ws->len-1].pos == pdf_dict_len(ctx, obj))
				goto pop;
			key = pdf_dict_get_key(ctx, ws->stack[ws->len-1].obj, ws->stack[ws->len-1].pos);
			ws->stack[ws->len-1].pos++;

			if (pdf_name_eq(ctx, key, PDF_NAME(Parent)))
				goto step_next_dict_child;

			if (pdf_name_eq(ctx, key, PDF_NAME(Thumb)))
				type = AUDIT_THUMBNAILS;
			else if (pdf_name_eq(ctx, key, PDF_NAME(Outlines)))
				type = AUDIT_BOOKMARKS;
			else if (pdf_name_eq(ctx, key, PDF_NAME(Contents)))
				type = AUDIT_CONTENT_STREAMS;
			else if (pdf_name_eq(ctx, key, PDF_NAME(StructTreeRoot)))
				type = AUDIT_STRUCTURE_INFO;
			else if (pdf_name_eq(ctx, key, PDF_NAME(AcroForm)))
				type = AUDIT_FORMS;
			else if (pdf_name_eq(ctx, key, PDF_NAME(ColorSpace)))
				type = AUDIT_COLORSPACES;
			else if (pdf_name_eq(ctx, key, PDF_NAME(CS)))
				type = AUDIT_COLORSPACES;
			else if (pdf_name_eq(ctx, key, PDF_NAME(Dests)))
				type = AUDIT_NAMED_DESTINATIONS;
			else if (pdf_name_eq(ctx, key, PDF_NAME(ExtGState)))
				type = AUDIT_EXTGS;
			else if (pdf_name_eq(ctx, key, PDF_NAME(Resources)))
				type = AUDIT_RESOURCES;
			else if (pdf_name_eq(ctx, key, PDF_NAME(EmbeddedFile)))
				type = AUDIT_EMBEDDED_FILES;
			else if (pdf_name_eq(ctx, key, PDF_NAME(Metadata)))
				type = AUDIT_METADATA;

			/* OK. step onto the val. */
			obj = pdf_dict_get_val(ctx, ws->stack[ws->len-1].obj, ws->stack[ws->len-1].pos-1);
			continue;
		}
		else if (pdf_is_array(ctx, obj))
		{
step_next_array_child:
			if (ws->stack[ws->len-1].pos == pdf_array_len(ctx, obj))
				goto pop;

			obj = pdf_array_get(ctx, ws->stack[ws->len-1].obj, ws->stack[ws->len-1].pos);
			ws->stack[ws->len-1].pos++;
			continue;
		}

		/* Nothing more to do with this object. Pop back up. */
pop:
		if (pdf_is_indirect(ctx, obj))
		{
			pdf_unmark_obj(ctx, obj);
		}
		ws->len--;
visited:
		if (ws->len > 0)
		{
			/* We should either have stepped up to a dict or an array. */
			obj = ws->stack[ws->len-1].obj;
			type = ws->stack[ws->len-1].state;
			num = pdf_obj_parent_num(ctx, obj);
			if (pdf_is_dict(ctx, obj))
				goto step_next_dict_child;
			else if (pdf_is_array(ctx, obj))
				goto step_next_array_child;
			else
				assert("Never happens" == NULL);
		}
	}
	while (ws->len > 0);
}

static void
classify_by_walking(fz_context *ctx, pdf_document *doc, int n, obj_info_t *oi)
{
	walk_stack_t ws = { 0 };

	fz_try(ctx)
		walk(ctx, &ws, n, oi, pdf_trailer(ctx, doc), AUDIT_TRAILER);
	fz_always(ctx)
		fz_free(ctx, ws.stack);
	fz_catch(ctx)
		fz_rethrow(ctx);
}

static void
output_size(fz_context *ctx, fz_output *out, uint64_t file_size, const char *str, uint64_t size, uint64_t size2)
{
	fz_write_printf(ctx, out, "<tr align=right><td align=left>%s<td>%,ld<td>%.2f%%<td>%,ld<td>%.2f%%</tr>\n", str, size, 100.0f * size/file_size, size2, 100.0f * size2/file_size);
}

static void
filter_file(fz_context *ctx, fz_output *out, const char *filename)
{
	pdf_document *pdf = NULL;
	int i, n;
	pdf_obj *obj = NULL;
	obj_info_t *oi = NULL;
	op_usage_t ou = { 0 };

	fz_var(pdf);
	fz_var(obj);
	fz_var(i);
	fz_var(oi);

	fz_try(ctx)
	{
		pdf = pdf_open_document(ctx, filename);

		n = pdf_xref_len(ctx, pdf);
		oi = fz_malloc_array(ctx, n, obj_info_t);
		memset(oi, 0, n * sizeof(obj_info_t));
		for (i = 1; i < n; i++)
		{
			fz_try(ctx)
			{
				for (; i <n; i++)
				{
					pdf_xref_entry *entry = pdf_cache_object(ctx, pdf, i);
					int is_in_objstm = entry->type == 'o';
					pdf_obj *type, *subtype;
					char text[128];

					if (entry->obj == NULL)
						continue;
					oi[i].is_in_objstm = is_in_objstm;
					if (!is_in_objstm)
					{
						sprintf(text, "%d 0 obj\nendobj\n", i);
						oi[i].overhead = strlen(text);
					}
					else
					{
						sprintf(text, "%d %zd ", i, (size_t)entry->ofs);
						oi[i].overhead = strlen(text);
					}
					type = pdf_dict_get(ctx, entry->obj, PDF_NAME(Type));
					SWITCH (type)
					{
					CASE(PDF_NAME(ObjStm)):
						oi[i].type = AUDIT_OBJSTM;
						break;

					}
					subtype = pdf_dict_get(ctx, entry->obj, PDF_NAME(Subtype));
					SWITCH (subtype)
					{
					CASE(PDF_NAME(Image)):
						oi[i].type = AUDIT_IMAGES;
						break;
					}

					filter_obj(ctx, &oi[i], entry->obj);
					if (pdf_obj_num_is_stream(ctx, pdf, i))
					{
						filter_stream(ctx, pdf, i, &oi[i]);
						oi[i].stream_len = pdf_dict_get_int64(ctx, entry->obj, PDF_NAME(Length));
					}
					pdf_drop_obj(ctx, obj);
					obj = NULL;
				}
			}
			fz_catch(ctx)
			{
				i++;
				/* Swallow error */
			}
		}

		/* Walk the doc structure. */
		classify_by_walking(ctx, pdf, n, oi);

		/* Filter the content streams to establish operator usage */
		filter_page_streams(ctx, pdf, &ou);

		fz_write_printf(ctx, out, "<html><title>PDF Audit: %s</title><body>\n", filename);
		fz_write_printf(ctx, out, "<H1>Input file: %s</H1>\n", filename);
		fz_write_printf(ctx, out, "<p>Total file size: %,zd bytes</p>", pdf->file_size);

		fz_write_printf(ctx, out, "<H3>Total file usage</H3>\n");
		/* Sum the results */
		{
			struct {
				size_t obj;
				size_t objstm;
			} counts[AUDIT__MAX] = { 0 };
			size_t total_obj = 0;
			size_t total_objstm = 0;
			size_t overhead = 0;
			size_t objstm_overhead = 0;
			size_t total_stream_uncomp = 0;
			size_t total_stream_comp = 0;
			for (i = 1; i < n; i++)
			{
				size_t z = oi[i].textsize + oi[i].overhead + oi[i].stream_len;
				total_stream_uncomp += oi[i].len;
				total_stream_comp += oi[i].stream_len;
				if (oi[i].is_in_objstm)
				{
					objstm_overhead += oi[i].overhead;
					total_objstm += oi[i].textsize;
					counts[oi[i].type].objstm += z;
				}
				else
				{
					overhead += oi[i].overhead;
					total_obj += oi[i].textsize;
					counts[oi[i].type].obj += z;
				}
			}
			fz_write_printf(ctx, out, "<table border=1><thead><th><th colspan=2>not in objstms<th colspan=2>in objstms</thead>\n");
			output_size(ctx, out, pdf->file_size, "object text", total_obj, total_objstm);
			output_size(ctx, out, pdf->file_size, "object overhead", overhead, objstm_overhead);
			fz_write_printf(ctx, out, "<thead><th><th colspan=2>uncompressed<th colspan=2>compressed</thead>\n");
			output_size(ctx, out, pdf->file_size, "streams", total_stream_uncomp, total_stream_comp);
			fz_write_printf(ctx, out, "</table>\n");
			fz_write_printf(ctx, out, "<p>NOTE: The uncompressed streams percentage figure is misleading,"
						" as it is the percentage of the complete file which typically includes compression.</p>\n");
			fz_write_printf(ctx, out, "<table border=1><thead><th><th colspan=2>not in objstms<th colspan=2>in objstms</thead>\n");
			fz_write_printf(ctx, out, "<H3>Classified file usage</H3>\n");
			for (i = 0; i < AUDIT__MAX; i++)
			{
				output_size(ctx, out, pdf->file_size, audit_type[i], counts[i].obj, counts[i].objstm);
			}
			fz_write_printf(ctx, out, "</table>\n");
			fz_write_printf(ctx, out, "<p>NOTE: The percentages are as percentages of the complete file. This again means that"
						" the percentages in the &quot;in objstms&quot; column are misleading as the objstms are"
						" typically compressed!</p>\n");
		}

		/* List some unknown objects. */
		{
			int count = 0;

			for (i = 1; i < n; i++)
			{
				if (oi[i].type != AUDIT_UNKNOWN || oi[i].textsize == 0)
					continue;

				if (count == 0)
					fz_write_printf(ctx, out, "<p>Some objects still unknown: e.g. ");
				fz_write_printf(ctx, out, "%d ", i);
				count++;
				if (count == 10)
					break;
			}
			if (count > 0)
				fz_write_printf(ctx, out, "</p>\n");
		}

		/* Write out the operator usage */
		fz_write_printf(ctx, out, "<H3>Operator usage within streams</H3>\n");
		{
			size_t total = 0;
			for (i = 0; i < OP_END; i++)
				total += ou.len[i];
			fz_write_printf(ctx, out, "<table border=1><thead><th>Op<th>bytes<th></thead>\n");
			for (i = 0; i < OP_END; i++)
			{
				fz_write_printf(ctx, out, "<tr align=right><td align=left>%s<td>%,zd<td>%.2f%%</tr>\n", op_names[i], ou.len[i], 100.f * ou.len[i] / total);
			}
			fz_write_printf(ctx, out, "</table>\n");
			fz_write_printf(ctx, out, "<p>NOTE: The percentages are of the operator stream content found.</p>\n");
		}

	}
	fz_always(ctx)
	{
		pdf_drop_obj(ctx, obj);
		pdf_drop_document(ctx, pdf);
		fz_free(ctx, oi);
	}
	fz_catch(ctx)
		fz_rethrow(ctx);
}

static int usage(void)
{
	fprintf(stderr,
		"usage: mutool audit [options] input.pdf+\n"
		"\t-o -\toutput file\n"
		);
	return 1;
}

int pdfaudit_main(int argc, char **argv)
{
	char *outfile = "-";
	int c;
	int errors = 0;
	int append = 0;
	fz_context *ctx;
	fz_output *out = NULL;
	const fz_getopt_long_options longopts[] =
	{
		{ NULL, NULL, NULL }
	};

	while ((c = fz_getopt_long(argc, argv, "o:", longopts)) != -1)
	{
		switch (c)
		{
		case 'o': outfile = fz_optpath(fz_optarg); break;
		case 0:
		{
			SWITCH(fz_optlong->opaque)
			{
			// Any future long options go here.
			default:
			case 0:
				assert(!"Never happens");
				break;
			break;
			}
		}
		default: return usage();
		}
	}

	if (argc - fz_optind < 1)
		return usage();

	ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
	if (!ctx)
	{
		fprintf(stderr, "cannot initialise context\n");
		exit(1);
	}

#ifdef _WIN32
	fz_set_stddbg(ctx, fz_stdods(ctx));
#endif

	fz_var(out);

	fz_try(ctx)
	{
		out = fz_new_output_with_path(ctx, outfile, append);
		while (fz_optind < argc)
			filter_file(ctx, out, argv[fz_optind++]);
		fz_close_output(ctx, out);
	}
	fz_always(ctx)
		fz_drop_output(ctx, out);
	fz_catch(ctx)
	{
		fz_report_error(ctx);
		errors++;
	}
	fz_drop_context(ctx);

	return errors != 0;
}