Mercurial > hgrepos > Python > libs > ConfigMix
diff configmix/_speedups.c @ 549:84657447ab39
FIX: Properly raise a UnicodeEncodeError from C
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 02 Jan 2022 01:00:10 +0100 |
| parents | 1cbe8b0f2b78 |
| children | 79db28e879f8 |
line wrap: on
line diff
--- a/configmix/_speedups.c Sat Jan 01 21:38:46 2022 +0100 +++ b/configmix/_speedups.c Sun Jan 02 01:00:10 2022 +0100 @@ -62,6 +62,37 @@ #if defined(Py_LIMITED_API) +static +void +_raise_utf8_encode_error(PyObject *s, + Py_ssize_t start, Py_ssize_t end, + const char *reason) +{ + /* + * See also: https://docs.python.org/3/c-api/exceptions.html#unicode-exception-objects + */ + PyObject *errobj = PyObject_CallFunction( + PyExc_UnicodeEncodeError, + "sOnns", + "utf-8", + s, + start, + end, + reason); + + if (errobj == NULL) { + /* cannot do anything here */ + return; + } + /* Make PyExc_UnicodeEncodeError owned because PyErr_Restore steals */ + //Py_INCREF(PyExc_UnicodeEncodeError); + //PyErr_Restore(PyExc_UnicodeEncodeError, errobj, NULL); + + PyErr_SetObject(PyExc_UnicodeEncodeError, errobj); + Py_DECREF(errobj); +} + + /* * Copyright 2001-2004 Unicode, Inc. * @@ -105,6 +136,8 @@ Py_ssize_t _convert_ucs4_to_utf8( Py_UCS4 ch, + PyObject *ch_obj, /* for error messages: the string where ch comes from */ + Py_ssize_t ch_obj_end, /* effective length of ch_obj (error reporting) */ unsigned char *targetStart, unsigned char *targetEnd, int strict) { @@ -117,8 +150,10 @@ if (strict) { /* UTF-16 surrogate values are illegal */ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { - PyErr_SetString(PyExc_UnicodeEncodeError, - "surrogate values not allowed"); + _raise_utf8_encode_error( + ch_obj, + 1, ch_obj_end, + "surrogate values are illegal in UCS4"); return -1; } } @@ -135,15 +170,19 @@ } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; } else { - PyErr_SetString(PyExc_UnicodeEncodeError, - "max Unicode codepoint value exceeded"); + _raise_utf8_encode_error( + ch_obj, + 1, ch_obj_end, + "max Unicode codepoint value exceeded"); return -1; } target += bytesToWrite; if (target > targetEnd) { - PyErr_SetString(PyExc_UnicodeEncodeError, - "target exhausted"); + _raise_utf8_encode_error( + ch_obj, + 1, ch_obj_end, + "temporary target buffer exhausted"); return -1; } switch (bytesToWrite) { /* note: everything falls through. */ @@ -156,6 +195,11 @@ } +/* + * End of Copyright 2001-2004 Unicode, Inc. + */ + + static PyObject * _hex2string(PyObject *s, Py_ssize_t end) @@ -169,7 +213,7 @@ return NULL; /* Replace the combination PyUniode_New/PyUnicode_WriteChar */ - buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1); + buf_bytes = _convert_ucs4_to_utf8(c, s, end+1, buf, &(buf[6]), 1); if (buf_bytes < 0) { return NULL; }
