Mercurial > hgrepos > Python > libs > ConfigMix
comparison configmix/_speedups.c @ 545:6501fe0e116c
Build the speedup C-extension against the stable API.
This is done by emulating PyUnicode_New()/PyUnicode_WriteChar() with
encoding the UCS4 character to UTF-8 and using PyUnicode_FromStringAndSize().
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sat, 01 Jan 2022 20:36:46 +0100 |
| parents | db2d108e14e7 |
| children | 1cbe8b0f2b78 |
comparison
equal
deleted
inserted
replaced
| 544:db2d108e14e7 | 545:6501fe0e116c |
|---|---|
| 58 *result = r; | 58 *result = r; |
| 59 return 0; /* success */ | 59 return 0; /* success */ |
| 60 } | 60 } |
| 61 | 61 |
| 62 | 62 |
| 63 #if defined(Py_LIMITED_API) | |
| 64 | |
| 65 /* | |
| 66 * Copyright 2001-2004 Unicode, Inc. | |
| 67 * | |
| 68 * Disclaimer | |
| 69 * | |
| 70 * This source code is provided as is by Unicode, Inc. No claims are | |
| 71 * made as to fitness for any particular purpose. No warranties of any | |
| 72 * kind are expressed or implied. The recipient agrees to determine | |
| 73 * applicability of information provided. If this file has been | |
| 74 * purchased on magnetic or optical media from Unicode, Inc., the | |
| 75 * sole remedy for any claim will be exchange of defective media | |
| 76 * within 90 days of receipt. | |
| 77 * | |
| 78 * Limitations on Rights to Redistribute This Code | |
| 79 * | |
| 80 * Unicode, Inc. hereby grants the right to freely use the information | |
| 81 * supplied in this file in the creation of products supporting the | |
| 82 * Unicode Standard, and to make copies of this file in any form | |
| 83 * for internal or external distribution as long as this notice | |
| 84 * remains attached. | |
| 85 */ | |
| 86 | |
| 87 #define UNI_MAX_LEGAL_UTF32 (Py_UCS4)0x0010FFFF | |
| 88 #define UNI_SUR_HIGH_START (Py_UCS4)0xD800 | |
| 89 #define UNI_SUR_HIGH_END (Py_UCS4)0xDBFF | |
| 90 #define UNI_SUR_LOW_START (Py_UCS4)0xDC00 | |
| 91 #define UNI_SUR_LOW_END (Py_UCS4)0xDFFF | |
| 92 | |
| 93 | |
| 94 /* | |
| 95 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed | |
| 96 * into the first byte, depending on how many bytes follow. There are | |
| 97 * as many entries in this table as there are UTF-8 sequence types. | |
| 98 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs | |
| 99 * for *legal* UTF-8 will be 4 or fewer bytes total. | |
| 100 */ | |
| 101 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; | |
| 102 | |
| 103 | |
| 104 static | |
| 105 Py_ssize_t | |
| 106 _convert_ucs4_to_utf8( | |
| 107 Py_UCS4 ch, | |
| 108 unsigned char *targetStart, unsigned char *targetEnd, | |
| 109 int strict) | |
| 110 { | |
| 111 const Py_UCS4 byteMask = 0xBF; | |
| 112 const Py_UCS4 byteMark = 0x80; | |
| 113 | |
| 114 Py_ssize_t bytesToWrite = 0; | |
| 115 unsigned char *target = targetStart; | |
| 116 | |
| 117 if (strict) { | |
| 118 /* UTF-16 surrogate values are illegal */ | |
| 119 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { | |
| 120 PyErr_SetString(PyExc_UnicodeEncodeError, | |
| 121 "surrogate values not allowed"); | |
| 122 return -1; | |
| 123 } | |
| 124 } | |
| 125 /* | |
| 126 * Figure out how many bytes the result will require. Turn any | |
| 127 * illegally large UTF32 things (> Plane 17) into replacement chars. | |
| 128 */ | |
| 129 if (ch < (Py_UCS4)0x80) { | |
| 130 bytesToWrite = 1; | |
| 131 } else if (ch < (Py_UCS4)0x800) { | |
| 132 bytesToWrite = 2; | |
| 133 } else if (ch < (Py_UCS4)0x10000) { | |
| 134 bytesToWrite = 3; | |
| 135 } else if (ch <= UNI_MAX_LEGAL_UTF32) { | |
| 136 bytesToWrite = 4; | |
| 137 } else { | |
| 138 PyErr_SetString(PyExc_UnicodeEncodeError, | |
| 139 "max Unicode codepoint value exceeded"); | |
| 140 return -1; | |
| 141 } | |
| 142 | |
| 143 target += bytesToWrite; | |
| 144 if (target > targetEnd) { | |
| 145 PyErr_SetString(PyExc_UnicodeEncodeError, | |
| 146 "target exhausted"); | |
| 147 return -1; | |
| 148 } | |
| 149 switch (bytesToWrite) { /* note: everything falls through. */ | |
| 150 case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; | |
| 151 case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; | |
| 152 case 2: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; | |
| 153 case 1: *--target = (unsigned char) (ch | firstByteMark[bytesToWrite]); | |
| 154 } | |
| 155 return bytesToWrite; | |
| 156 } | |
| 157 | |
| 158 | |
| 63 static | 159 static |
| 64 PyObject * | 160 PyObject * |
| 65 _hex2string(PyObject *s, Py_ssize_t end) | 161 _hex2string(PyObject *s, Py_ssize_t end) |
| 66 { | 162 { |
| 67 Py_UCS4 c; | 163 Py_UCS4 c; |
| 164 unsigned char buf[6]; | |
| 165 Py_ssize_t buf_bytes; | |
| 166 PyObject *u; | |
| 167 | |
| 168 if (_hex2ucs4(s, end, &c) != 0) | |
| 169 return NULL; | |
| 170 | |
| 171 /* Replace the combination PyUniode_New/PyUnicode_WriteChar */ | |
| 172 buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1); | |
| 173 if (buf_bytes < 0) { | |
| 174 return NULL; | |
| 175 } | |
| 176 u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes); | |
| 177 if (u == NULL) { | |
| 178 return NULL; | |
| 179 } | |
| 180 return u; | |
| 181 } | |
| 182 | |
| 183 #else | |
| 184 | |
| 185 static | |
| 186 PyObject * | |
| 187 _hex2string(PyObject *s, Py_ssize_t end) | |
| 188 { | |
| 189 Py_UCS4 c; | |
| 68 PyObject *u = NULL; | 190 PyObject *u = NULL; |
| 69 | 191 |
| 70 if (_hex2ucs4(s, end, &c) != 0) | 192 if (_hex2ucs4(s, end, &c) != 0) |
| 71 return NULL; | 193 return NULL; |
| 72 u = PyUnicode_New(1, c); /* ARGH: not in the stable API */ | 194 u = PyUnicode_New(1, c); /* ARGH: not in the stable API */ |
| 76 Py_DECREF(u); | 198 Py_DECREF(u); |
| 77 return NULL; | 199 return NULL; |
| 78 } | 200 } |
| 79 return u; | 201 return u; |
| 80 } | 202 } |
| 203 | |
| 204 #endif /* Py_LIMITED_API */ | |
| 81 | 205 |
| 82 | 206 |
| 83 static | 207 static |
| 84 PyObject * | 208 PyObject * |
| 85 _fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate) | 209 _fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate) |
