diff mupdf-source/source/pdf/pdf-cmap-load.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/source/pdf/pdf-cmap-load.c	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,338 @@
+// Copyright (C) 2004-2024 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#include "mupdf/fitz.h"
+#include "mupdf/pdf.h"
+
+#include "cmaps/TrueType-UCS2.h"
+
+#include <string.h>
+
+static pdf_cmap *
+pdf_load_embedded_cmap_imp(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj, pdf_cycle_list *cycle_up)
+{
+	pdf_cycle_list cycle;
+	fz_stream *file = NULL;
+	pdf_cmap *cmap = NULL;
+	pdf_cmap *usecmap = NULL;
+	pdf_obj *obj;
+
+	fz_var(file);
+	fz_var(cmap);
+	fz_var(usecmap);
+
+	if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL)
+		return cmap;
+
+	fz_try(ctx)
+	{
+		file = pdf_open_stream(ctx, stmobj);
+		cmap = pdf_load_cmap(ctx, file);
+
+		obj = pdf_dict_get(ctx, stmobj, PDF_NAME(WMode));
+		if (pdf_is_int(ctx, obj))
+			pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj));
+
+		obj = pdf_dict_get(ctx, stmobj, PDF_NAME(UseCMap));
+		if (pdf_is_name(ctx, obj))
+		{
+			fz_try(ctx)
+			{
+				usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj));
+				pdf_set_usecmap(ctx, cmap, usecmap);
+			}
+			fz_catch(ctx)
+			{
+				fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
+				fz_report_error(ctx);
+				fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj));
+			}
+		}
+		else if (pdf_is_indirect(ctx, obj))
+		{
+			if (pdf_cycle(ctx, &cycle, cycle_up, obj))
+				fz_throw(ctx, FZ_ERROR_FORMAT, "recursive CMap");
+			usecmap = pdf_load_embedded_cmap_imp(ctx, doc, obj, &cycle);
+			pdf_set_usecmap(ctx, cmap, usecmap);
+		}
+		else if (strlen(cmap->usecmap_name) > 0)
+		{
+			fz_try(ctx)
+			{
+				usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
+				pdf_set_usecmap(ctx, cmap, usecmap);
+			}
+			fz_catch(ctx)
+			{
+				fz_rethrow_if(ctx, FZ_ERROR_SYSTEM);
+				fz_report_error(ctx);
+				fz_warn(ctx, "cannot load system CMap: %s", pdf_to_name(ctx, obj));
+			}
+		}
+
+		pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap));
+	}
+	fz_always(ctx)
+	{
+		fz_drop_stream(ctx, file);
+		pdf_drop_cmap(ctx, usecmap);
+	}
+	fz_catch(ctx)
+	{
+		pdf_drop_cmap(ctx, cmap);
+		fz_rethrow(ctx);
+	}
+
+	return cmap;
+}
+
+pdf_cmap *
+pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj)
+{
+	return pdf_load_embedded_cmap_imp(ctx, doc, stmobj, NULL);
+}
+
+pdf_cmap *
+pdf_new_identity_cmap(fz_context *ctx, int wmode, int bytes)
+{
+	pdf_cmap *cmap = pdf_new_cmap(ctx);
+	fz_try(ctx)
+	{
+		unsigned int high = (1 << (bytes * 8)) - 1;
+		if (wmode)
+			fz_strlcpy(cmap->cmap_name, "Identity-V", sizeof cmap->cmap_name);
+		else
+			fz_strlcpy(cmap->cmap_name, "Identity-H", sizeof cmap->cmap_name);
+		pdf_add_codespace(ctx, cmap, 0, high, bytes);
+		pdf_map_range_to_range(ctx, cmap, 0, high, 0);
+		pdf_sort_cmap(ctx, cmap);
+		pdf_set_cmap_wmode(ctx, cmap, wmode);
+	}
+	fz_catch(ctx)
+	{
+		pdf_drop_cmap(ctx, cmap);
+		fz_rethrow(ctx);
+	}
+	return cmap;
+}
+
+#ifdef NO_CJK
+
+pdf_cmap *
+pdf_load_builtin_cmap(fz_context *ctx, const char *name)
+{
+	if (!strcmp(name, "Identity-H")) return pdf_new_identity_cmap(ctx, 0, 2);
+	if (!strcmp(name, "Identity-V")) return pdf_new_identity_cmap(ctx, 1, 2);
+	if (!strcmp(name, "TrueType-UCS2")) return &cmap_TrueType_UCS2;
+	return NULL;
+}
+
+#else
+
+/* To regenerate this list: :r !bash scripts/runcmapdump.sh */
+
+#include "cmaps/83pv-RKSJ-H.h"
+#include "cmaps/90ms-RKSJ-H.h"
+#include "cmaps/90ms-RKSJ-V.h"
+#include "cmaps/90msp-RKSJ-H.h"
+#include "cmaps/90msp-RKSJ-V.h"
+#include "cmaps/90pv-RKSJ-H.h"
+#include "cmaps/Add-RKSJ-H.h"
+#include "cmaps/Add-RKSJ-V.h"
+#include "cmaps/Adobe-CNS1-UCS2.h"
+#include "cmaps/Adobe-GB1-UCS2.h"
+#include "cmaps/Adobe-Japan1-UCS2.h"
+#include "cmaps/Adobe-Korea1-UCS2.h"
+#include "cmaps/B5pc-H.h"
+#include "cmaps/B5pc-V.h"
+#include "cmaps/CNS-EUC-H.h"
+#include "cmaps/CNS-EUC-V.h"
+#include "cmaps/ETen-B5-H.h"
+#include "cmaps/ETen-B5-V.h"
+#include "cmaps/ETenms-B5-H.h"
+#include "cmaps/ETenms-B5-V.h"
+#include "cmaps/EUC-H.h"
+#include "cmaps/EUC-V.h"
+#include "cmaps/Ext-RKSJ-H.h"
+#include "cmaps/Ext-RKSJ-V.h"
+#include "cmaps/GB-EUC-H.h"
+#include "cmaps/GB-EUC-V.h"
+#include "cmaps/GBK-EUC-H.h"
+#include "cmaps/GBK-EUC-V.h"
+#include "cmaps/GBK-X.h"
+#include "cmaps/GBK2K-H.h"
+#include "cmaps/GBK2K-V.h"
+#include "cmaps/GBKp-EUC-H.h"
+#include "cmaps/GBKp-EUC-V.h"
+#include "cmaps/GBpc-EUC-H.h"
+#include "cmaps/GBpc-EUC-V.h"
+#include "cmaps/H.h"
+#include "cmaps/HKscs-B5-H.h"
+#include "cmaps/HKscs-B5-V.h"
+#include "cmaps/Identity-H.h"
+#include "cmaps/Identity-V.h"
+#include "cmaps/KSC-EUC-H.h"
+#include "cmaps/KSC-EUC-V.h"
+#include "cmaps/KSCms-UHC-H.h"
+#include "cmaps/KSCms-UHC-HW-H.h"
+#include "cmaps/KSCms-UHC-HW-V.h"
+#include "cmaps/KSCms-UHC-V.h"
+#include "cmaps/KSCpc-EUC-H.h"
+#include "cmaps/UniCNS-UCS2-H.h"
+#include "cmaps/UniCNS-UCS2-V.h"
+#include "cmaps/UniCNS-UTF16-H.h"
+#include "cmaps/UniCNS-UTF16-V.h"
+#include "cmaps/UniCNS-X.h"
+#include "cmaps/UniGB-UCS2-H.h"
+#include "cmaps/UniGB-UCS2-V.h"
+#include "cmaps/UniGB-UTF16-H.h"
+#include "cmaps/UniGB-UTF16-V.h"
+#include "cmaps/UniGB-X.h"
+#include "cmaps/UniJIS-UCS2-H.h"
+#include "cmaps/UniJIS-UCS2-HW-H.h"
+#include "cmaps/UniJIS-UCS2-HW-V.h"
+#include "cmaps/UniJIS-UCS2-V.h"
+#include "cmaps/UniJIS-UTF16-H.h"
+#include "cmaps/UniJIS-UTF16-V.h"
+#include "cmaps/UniJIS-X.h"
+#include "cmaps/UniKS-UCS2-H.h"
+#include "cmaps/UniKS-UCS2-V.h"
+#include "cmaps/UniKS-UTF16-H.h"
+#include "cmaps/UniKS-UTF16-V.h"
+#include "cmaps/UniKS-X.h"
+#include "cmaps/V.h"
+
+static pdf_cmap *table[] = {
+	&cmap_83pv_RKSJ_H,
+	&cmap_90ms_RKSJ_H,
+	&cmap_90ms_RKSJ_V,
+	&cmap_90msp_RKSJ_H,
+	&cmap_90msp_RKSJ_V,
+	&cmap_90pv_RKSJ_H,
+	&cmap_Add_RKSJ_H,
+	&cmap_Add_RKSJ_V,
+	&cmap_Adobe_CNS1_UCS2,
+	&cmap_Adobe_GB1_UCS2,
+	&cmap_Adobe_Japan1_UCS2,
+	&cmap_Adobe_Korea1_UCS2,
+	&cmap_B5pc_H,
+	&cmap_B5pc_V,
+	&cmap_CNS_EUC_H,
+	&cmap_CNS_EUC_V,
+	&cmap_ETen_B5_H,
+	&cmap_ETen_B5_V,
+	&cmap_ETenms_B5_H,
+	&cmap_ETenms_B5_V,
+	&cmap_EUC_H,
+	&cmap_EUC_V,
+	&cmap_Ext_RKSJ_H,
+	&cmap_Ext_RKSJ_V,
+	&cmap_GB_EUC_H,
+	&cmap_GB_EUC_V,
+	&cmap_GBK_EUC_H,
+	&cmap_GBK_EUC_V,
+	&cmap_GBK_X,
+	&cmap_GBK2K_H,
+	&cmap_GBK2K_V,
+	&cmap_GBKp_EUC_H,
+	&cmap_GBKp_EUC_V,
+	&cmap_GBpc_EUC_H,
+	&cmap_GBpc_EUC_V,
+	&cmap_H,
+	&cmap_HKscs_B5_H,
+	&cmap_HKscs_B5_V,
+	&cmap_Identity_H,
+	&cmap_Identity_V,
+	&cmap_KSC_EUC_H,
+	&cmap_KSC_EUC_V,
+	&cmap_KSCms_UHC_H,
+	&cmap_KSCms_UHC_HW_H,
+	&cmap_KSCms_UHC_HW_V,
+	&cmap_KSCms_UHC_V,
+	&cmap_KSCpc_EUC_H,
+	&cmap_TrueType_UCS2,
+	&cmap_UniCNS_UCS2_H,
+	&cmap_UniCNS_UCS2_V,
+	&cmap_UniCNS_UTF16_H,
+	&cmap_UniCNS_UTF16_V,
+	&cmap_UniCNS_X,
+	&cmap_UniGB_UCS2_H,
+	&cmap_UniGB_UCS2_V,
+	&cmap_UniGB_UTF16_H,
+	&cmap_UniGB_UTF16_V,
+	&cmap_UniGB_X,
+	&cmap_UniJIS_UCS2_H,
+	&cmap_UniJIS_UCS2_HW_H,
+	&cmap_UniJIS_UCS2_HW_V,
+	&cmap_UniJIS_UCS2_V,
+	&cmap_UniJIS_UTF16_H,
+	&cmap_UniJIS_UTF16_V,
+	&cmap_UniJIS_X,
+	&cmap_UniKS_UCS2_H,
+	&cmap_UniKS_UCS2_V,
+	&cmap_UniKS_UTF16_H,
+	&cmap_UniKS_UTF16_V,
+	&cmap_UniKS_X,
+	&cmap_V,
+};
+
+pdf_cmap *
+pdf_load_builtin_cmap(fz_context *ctx, const char *name)
+{
+	int r = nelem(table)-1;
+	int l = 0;
+	while (l <= r)
+	{
+		int m = (l + r) >> 1;
+		int c = strcmp(name, table[m]->cmap_name);
+		if (c < 0)
+			r = m - 1;
+		else if (c > 0)
+			l = m + 1;
+		else
+			return table[m];
+	}
+	return NULL;
+}
+
+#endif
+
+pdf_cmap *
+pdf_load_system_cmap(fz_context *ctx, const char *cmap_name)
+{
+	pdf_cmap *usecmap;
+	pdf_cmap *cmap;
+
+	cmap = pdf_load_builtin_cmap(ctx, cmap_name);
+	if (!cmap)
+		fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap_name);
+
+	if (cmap->usecmap_name[0] && !cmap->usecmap)
+	{
+		usecmap = pdf_load_system_cmap(ctx, cmap->usecmap_name);
+		if (!usecmap)
+			fz_throw(ctx, FZ_ERROR_FORMAT, "no builtin cmap file: %s", cmap->usecmap_name);
+		pdf_set_usecmap(ctx, cmap, usecmap);
+	}
+
+	return cmap;
+}