Python/libs/ConfigMix: configmix/

comparison configmix/_speedups.c @ 549:84657447ab39

FIX: Properly raise a UnicodeEncodeError from C

author	Franz Glasner <fzglas.hg@dom66.de>
date	Sun, 02 Jan 2022 01:00:10 +0100
parents	1cbe8b0f2b78
children	79db28e879f8

comparison

equal deleted inserted replaced

-:325008573bc6
+:84657447ab39
 return 0;  /* success */
 }
 #if defined(Py_LIMITED_API)
+static
+void
+_raise_utf8_encode_error(PyObject *s,
+Py_ssize_t start, Py_ssize_t end,
+const char *reason)
+{
+/*
+* See also: https://docs.python.org/3/c-api/exceptions.html#unicode-exception-objects
+*/
+PyObject *errobj = PyObject_CallFunction(
+PyExc_UnicodeEncodeError,
+"sOnns",
+"utf-8",
+s,
+start,
+end,
+reason);
+if (errobj == NULL) {
+/* cannot do anything here */
+return;
+}
+/* Make PyExc_UnicodeEncodeError owned because PyErr_Restore steals */
+//Py_INCREF(PyExc_UnicodeEncodeError);
+//PyErr_Restore(PyExc_UnicodeEncodeError, errobj, NULL);
+PyErr_SetObject(PyExc_UnicodeEncodeError, errobj);
+Py_DECREF(errobj);
+}
 /*
 * Copyright 2001-2004 Unicode, Inc.
 *
 * Disclaimer
 static
 Py_ssize_t
 _convert_ucs4_to_utf8(
 Py_UCS4 ch,
+PyObject *ch_obj,  /* for error messages: the string where ch comes from */
+Py_ssize_t ch_obj_end,  /* effective length of ch_obj (error reporting) */
 unsigned char *targetStart, unsigned char *targetEnd,
 int strict)
 {
 const Py_UCS4 byteMask = 0xBF;
 const Py_UCS4 byteMark = 0x80;
 unsigned char *target = targetStart;
 if (strict) {
 /* UTF-16 surrogate values are illegal */
 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-PyErr_SetString(PyExc_UnicodeEncodeError,
+_raise_utf8_encode_error(
-"surrogate values not allowed");
+ch_obj,
+1, ch_obj_end,
+"surrogate values are illegal in UCS4");
 return -1;
 }
 }
 /*
 * Figure out how many bytes the result will require. Turn any
 } else if (ch < (Py_UCS4)0x10000) {
 bytesToWrite = 3;
 } else if (ch <= UNI_MAX_LEGAL_UTF32) {
 bytesToWrite = 4;
 } else {
-PyErr_SetString(PyExc_UnicodeEncodeError,
+_raise_utf8_encode_error(
-"max Unicode codepoint value exceeded");
+ch_obj,
+1, ch_obj_end,
+"max Unicode codepoint value exceeded");
 return -1;
 }
 target += bytesToWrite;
 if (target > targetEnd) {
-PyErr_SetString(PyExc_UnicodeEncodeError,
+_raise_utf8_encode_error(
-"target exhausted");
+ch_obj,
+1, ch_obj_end,
+"temporary target buffer exhausted");
 return -1;
 }
 switch (bytesToWrite) { /* note: everything falls through. */
 case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
 case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
 }
 return bytesToWrite;
 }
+/*
+* End of Copyright 2001-2004 Unicode, Inc.
+*/
 static
 PyObject *
 _hex2string(PyObject *s, Py_ssize_t end)
 {
 Py_UCS4 c;
 if (_hex2ucs4(s, end, &c) != 0)
 return NULL;
 /* Replace the combination PyUniode_New/PyUnicode_WriteChar */
-buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1);
+buf_bytes = _convert_ucs4_to_utf8(c, s, end+1, buf, &(buf[6]), 1);
 if (buf_bytes < 0) {
 return NULL;
 }
 u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes);
 if (u == NULL) {

Mercurial > hgrepos > Python > libs > ConfigMix

comparison configmix/_speedups.c @ 549:84657447ab39