comparison configmix/_speedups.c @ 545:6501fe0e116c

Build the speedup C-extension against the stable API. This is done by emulating PyUnicode_New()/PyUnicode_WriteChar() with encoding the UCS4 character to UTF-8 and using PyUnicode_FromStringAndSize().
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 01 Jan 2022 20:36:46 +0100
parents db2d108e14e7
children 1cbe8b0f2b78
comparison
equal deleted inserted replaced
544:db2d108e14e7 545:6501fe0e116c
58 *result = r; 58 *result = r;
59 return 0; /* success */ 59 return 0; /* success */
60 } 60 }
61 61
62 62
63 #if defined(Py_LIMITED_API)
64
65 /*
66 * Copyright 2001-2004 Unicode, Inc.
67 *
68 * Disclaimer
69 *
70 * This source code is provided as is by Unicode, Inc. No claims are
71 * made as to fitness for any particular purpose. No warranties of any
72 * kind are expressed or implied. The recipient agrees to determine
73 * applicability of information provided. If this file has been
74 * purchased on magnetic or optical media from Unicode, Inc., the
75 * sole remedy for any claim will be exchange of defective media
76 * within 90 days of receipt.
77 *
78 * Limitations on Rights to Redistribute This Code
79 *
80 * Unicode, Inc. hereby grants the right to freely use the information
81 * supplied in this file in the creation of products supporting the
82 * Unicode Standard, and to make copies of this file in any form
83 * for internal or external distribution as long as this notice
84 * remains attached.
85 */
86
87 #define UNI_MAX_LEGAL_UTF32 (Py_UCS4)0x0010FFFF
88 #define UNI_SUR_HIGH_START (Py_UCS4)0xD800
89 #define UNI_SUR_HIGH_END (Py_UCS4)0xDBFF
90 #define UNI_SUR_LOW_START (Py_UCS4)0xDC00
91 #define UNI_SUR_LOW_END (Py_UCS4)0xDFFF
92
93
94 /*
95 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
96 * into the first byte, depending on how many bytes follow. There are
97 * as many entries in this table as there are UTF-8 sequence types.
98 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
99 * for *legal* UTF-8 will be 4 or fewer bytes total.
100 */
101 static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
102
103
104 static
105 Py_ssize_t
106 _convert_ucs4_to_utf8(
107 Py_UCS4 ch,
108 unsigned char *targetStart, unsigned char *targetEnd,
109 int strict)
110 {
111 const Py_UCS4 byteMask = 0xBF;
112 const Py_UCS4 byteMark = 0x80;
113
114 Py_ssize_t bytesToWrite = 0;
115 unsigned char *target = targetStart;
116
117 if (strict) {
118 /* UTF-16 surrogate values are illegal */
119 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
120 PyErr_SetString(PyExc_UnicodeEncodeError,
121 "surrogate values not allowed");
122 return -1;
123 }
124 }
125 /*
126 * Figure out how many bytes the result will require. Turn any
127 * illegally large UTF32 things (> Plane 17) into replacement chars.
128 */
129 if (ch < (Py_UCS4)0x80) {
130 bytesToWrite = 1;
131 } else if (ch < (Py_UCS4)0x800) {
132 bytesToWrite = 2;
133 } else if (ch < (Py_UCS4)0x10000) {
134 bytesToWrite = 3;
135 } else if (ch <= UNI_MAX_LEGAL_UTF32) {
136 bytesToWrite = 4;
137 } else {
138 PyErr_SetString(PyExc_UnicodeEncodeError,
139 "max Unicode codepoint value exceeded");
140 return -1;
141 }
142
143 target += bytesToWrite;
144 if (target > targetEnd) {
145 PyErr_SetString(PyExc_UnicodeEncodeError,
146 "target exhausted");
147 return -1;
148 }
149 switch (bytesToWrite) { /* note: everything falls through. */
150 case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
151 case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
152 case 2: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
153 case 1: *--target = (unsigned char) (ch | firstByteMark[bytesToWrite]);
154 }
155 return bytesToWrite;
156 }
157
158
63 static 159 static
64 PyObject * 160 PyObject *
65 _hex2string(PyObject *s, Py_ssize_t end) 161 _hex2string(PyObject *s, Py_ssize_t end)
66 { 162 {
67 Py_UCS4 c; 163 Py_UCS4 c;
164 unsigned char buf[6];
165 Py_ssize_t buf_bytes;
166 PyObject *u;
167
168 if (_hex2ucs4(s, end, &c) != 0)
169 return NULL;
170
171 /* Replace the combination PyUniode_New/PyUnicode_WriteChar */
172 buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1);
173 if (buf_bytes < 0) {
174 return NULL;
175 }
176 u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes);
177 if (u == NULL) {
178 return NULL;
179 }
180 return u;
181 }
182
183 #else
184
185 static
186 PyObject *
187 _hex2string(PyObject *s, Py_ssize_t end)
188 {
189 Py_UCS4 c;
68 PyObject *u = NULL; 190 PyObject *u = NULL;
69 191
70 if (_hex2ucs4(s, end, &c) != 0) 192 if (_hex2ucs4(s, end, &c) != 0)
71 return NULL; 193 return NULL;
72 u = PyUnicode_New(1, c); /* ARGH: not in the stable API */ 194 u = PyUnicode_New(1, c); /* ARGH: not in the stable API */
76 Py_DECREF(u); 198 Py_DECREF(u);
77 return NULL; 199 return NULL;
78 } 200 }
79 return u; 201 return u;
80 } 202 }
203
204 #endif /* Py_LIMITED_API */
81 205
82 206
83 static 207 static
84 PyObject * 208 PyObject *
85 _fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate) 209 _fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate)