Mercurial > hgrepos > Python > libs > ConfigMix
changeset 545:6501fe0e116c
Build the speedup C-extension against the stable API.
This is done by emulating PyUnicode_New()/PyUnicode_WriteChar() with
encoding the UCS4 character to UTF-8 and using PyUnicode_FromStringAndSize().
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 01 Jan 2022 20:36:46 +0100 |
| parents | db2d108e14e7 |
| children | adf65c31f8fc |
| files | configmix/_speedups.c setup.py |
| diffstat | 2 files changed, 125 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/configmix/_speedups.c Sat Jan 01 18:05:32 2022 +0100 +++ b/configmix/_speedups.c Sat Jan 01 20:36:46 2022 +0100 @@ -60,6 +60,128 @@ } +#if defined(Py_LIMITED_API) + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + +#define UNI_MAX_LEGAL_UTF32 (Py_UCS4)0x0010FFFF +#define UNI_SUR_HIGH_START (Py_UCS4)0xD800 +#define UNI_SUR_HIGH_END (Py_UCS4)0xDBFF +#define UNI_SUR_LOW_START (Py_UCS4)0xDC00 +#define UNI_SUR_LOW_END (Py_UCS4)0xDFFF + + +/* + * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed + * into the first byte, depending on how many bytes follow. There are + * as many entries in this table as there are UTF-8 sequence types. + * (I.e., one byte sequence, two byte... etc.). Remember that sequencs + * for *legal* UTF-8 will be 4 or fewer bytes total. + */ +static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + +static +Py_ssize_t +_convert_ucs4_to_utf8( + Py_UCS4 ch, + unsigned char *targetStart, unsigned char *targetEnd, + int strict) +{ + const Py_UCS4 byteMask = 0xBF; + const Py_UCS4 byteMark = 0x80; + + Py_ssize_t bytesToWrite = 0; + unsigned char *target = targetStart; + + if (strict) { + /* UTF-16 surrogate values are illegal */ + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { + PyErr_SetString(PyExc_UnicodeEncodeError, + "surrogate values not allowed"); + return -1; + } + } + /* + * Figure out how many bytes the result will require. Turn any + * illegally large UTF32 things (> Plane 17) into replacement chars. + */ + if (ch < (Py_UCS4)0x80) { + bytesToWrite = 1; + } else if (ch < (Py_UCS4)0x800) { + bytesToWrite = 2; + } else if (ch < (Py_UCS4)0x10000) { + bytesToWrite = 3; + } else if (ch <= UNI_MAX_LEGAL_UTF32) { + bytesToWrite = 4; + } else { + PyErr_SetString(PyExc_UnicodeEncodeError, + "max Unicode codepoint value exceeded"); + return -1; + } + + target += bytesToWrite; + if (target > targetEnd) { + PyErr_SetString(PyExc_UnicodeEncodeError, + "target exhausted"); + return -1; + } + switch (bytesToWrite) { /* note: everything falls through. */ + case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; + case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; + case 2: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; + case 1: *--target = (unsigned char) (ch | firstByteMark[bytesToWrite]); + } + return bytesToWrite; +} + + +static +PyObject * +_hex2string(PyObject *s, Py_ssize_t end) +{ + Py_UCS4 c; + unsigned char buf[6]; + Py_ssize_t buf_bytes; + PyObject *u; + + if (_hex2ucs4(s, end, &c) != 0) + return NULL; + + /* Replace the combination PyUniode_New/PyUnicode_WriteChar */ + buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1); + if (buf_bytes < 0) { + return NULL; + } + u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes); + if (u == NULL) { + return NULL; + } + return u; +} + +#else + static PyObject * _hex2string(PyObject *s, Py_ssize_t end) @@ -79,6 +201,8 @@ return u; } +#endif /* Py_LIMITED_API */ + static PyObject *
--- a/setup.py Sat Jan 01 18:05:32 2022 +0100 +++ b/setup.py Sat Jan 01 20:36:46 2022 +0100 @@ -57,7 +57,7 @@ and (sys.version_info[0] > 3 or (sys.version_info[0] == 3 and sys.version_info[1] >= 7))): - py_limited_api = False + py_limited_api = True if py_limited_api: define_macros = [("Py_LIMITED_API", "0x03070000")]
