Python/libs/ConfigMix: configmix/

comparison configmix/_speedups.c @ 545:6501fe0e116c

Build the speedup C-extension against the stable API. This is done by emulating PyUnicode_New()/PyUnicode_WriteChar() with encoding the UCS4 character to UTF-8 and using PyUnicode_FromStringAndSize().

author	Franz Glasner <fzglas.hg@dom66.de>
date	Sat, 01 Jan 2022 20:36:46 +0100
parents	db2d108e14e7
children	1cbe8b0f2b78

comparison

equal deleted inserted replaced

-:db2d108e14e7
+:6501fe0e116c
 *result = r;
 return 0;  /* success */
 }
+#if defined(Py_LIMITED_API)
+/*
+* Copyright 2001-2004 Unicode, Inc.
+*
+* Disclaimer
+*
+* This source code is provided as is by Unicode, Inc. No claims are
+* made as to fitness for any particular purpose. No warranties of any
+* kind are expressed or implied. The recipient agrees to determine
+* applicability of information provided. If this file has been
+* purchased on magnetic or optical media from Unicode, Inc., the
+* sole remedy for any claim will be exchange of defective media
+* within 90 days of receipt.
+*
+* Limitations on Rights to Redistribute This Code
+*
+* Unicode, Inc. hereby grants the right to freely use the information
+* supplied in this file in the creation of products supporting the
+* Unicode Standard, and to make copies of this file in any form
+* for internal or external distribution as long as this notice
+* remains attached.
+*/
+#define UNI_MAX_LEGAL_UTF32 (Py_UCS4)0x0010FFFF
+#define UNI_SUR_HIGH_START  (Py_UCS4)0xD800
+#define UNI_SUR_HIGH_END    (Py_UCS4)0xDBFF
+#define UNI_SUR_LOW_START   (Py_UCS4)0xDC00
+#define UNI_SUR_LOW_END     (Py_UCS4)0xDFFF
+/*
+* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+* into the first byte, depending on how many bytes follow.  There are
+* as many entries in this table as there are UTF-8 sequence types.
+* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+* for *legal* UTF-8 will be 4 or fewer bytes total.
+*/
+static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+static
+Py_ssize_t
+_convert_ucs4_to_utf8(
+Py_UCS4 ch,
+unsigned char *targetStart, unsigned char *targetEnd,
+int strict)
+{
+const Py_UCS4 byteMask = 0xBF;
+const Py_UCS4 byteMark = 0x80;
+Py_ssize_t bytesToWrite = 0;
+unsigned char *target = targetStart;
+if (strict) {
+/* UTF-16 surrogate values are illegal */
+if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+PyErr_SetString(PyExc_UnicodeEncodeError,
+"surrogate values not allowed");
+return -1;
+}
+}
+/*
+* Figure out how many bytes the result will require. Turn any
+* illegally large UTF32 things (> Plane 17) into replacement chars.
+*/
+if (ch < (Py_UCS4)0x80) {
+bytesToWrite = 1;
+} else if (ch < (Py_UCS4)0x800) {
+bytesToWrite = 2;
+} else if (ch < (Py_UCS4)0x10000) {
+bytesToWrite = 3;
+} else if (ch <= UNI_MAX_LEGAL_UTF32) {
+bytesToWrite = 4;
+} else {
+PyErr_SetString(PyExc_UnicodeEncodeError,
+"max Unicode codepoint value exceeded");
+return -1;
+}
+target += bytesToWrite;
+if (target > targetEnd) {
+PyErr_SetString(PyExc_UnicodeEncodeError,
+"target exhausted");
+return -1;
+}
+switch (bytesToWrite) { /* note: everything falls through. */
+case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
+case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
+case 2: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
+case 1: *--target = (unsigned char) (ch | firstByteMark[bytesToWrite]);
+}
+return bytesToWrite;
+}
 static
 PyObject *
 _hex2string(PyObject *s, Py_ssize_t end)
 {
 Py_UCS4 c;
+unsigned char buf[6];
+Py_ssize_t buf_bytes;
+PyObject *u;
+if (_hex2ucs4(s, end, &c) != 0)
+return NULL;
+/* Replace the combination PyUniode_New/PyUnicode_WriteChar */
+buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1);
+if (buf_bytes < 0) {
+return NULL;
+}
+u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes);
+if (u == NULL) {
+return NULL;
+}
+return u;
+}
+#else
+static
+PyObject *
+_hex2string(PyObject *s, Py_ssize_t end)
+{
+Py_UCS4 c;
 PyObject *u = NULL;
 if (_hex2ucs4(s, end, &c) != 0)
 return NULL;
 u = PyUnicode_New(1, c);    /* ARGH: not  in the stable API */
 Py_DECREF(u);
 return NULL;
 }
 return u;
 }
+#endif /* Py_LIMITED_API */
 static
 PyObject *
 _fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate)

Mercurial > hgrepos > Python > libs > ConfigMix

comparison configmix/_speedups.c @ 545:6501fe0e116c