comparison configmix/_speedups.c @ 542:f71d34dda19f

Add an optional C-implementation for configmix.config.unquote and configmix.config.pathstr2path. This is currently for Python 3.5+. It is tested with Python 3.7 and Python3.8 (FreeBSD 12.2 amd64, LLVM 10.0.1). A build for the stable API ("abi3") fails because PyUnicode_New() is currently not in the stable API. Also includes are extended tests for unquote() and pathstr2path().
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 31 Dec 2021 21:24:16 +0100
parents
children 491413368c7c
comparison
equal deleted inserted replaced
541:25b61f0a1958 542:f71d34dda19f
1 /* -*- coding: utf-8 -*- */
2 /*
3 * Speedups for configmix.
4 *
5 * :Copyright: (c) 2021, Franz Glasner. All rights reserved.
6 * :License: BSD-3-Clause. See LICENSE.txt for details.
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11
12
13 const char _id[] = "@(#)configmix._speedups $Header$";
14 static const char release[] = "|VCSRevision|";
15 static const char date[] = "|VCSJustDate|";
16
17
18 /*
19 * Module state holds pre-created some objects
20 */
21 struct speedups_state {
22 PyObject *DOT;
23 PyObject *QUOTE;
24 PyObject *EMPTY_STR;
25 };
26
27
28 static
29 int
30 _hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result)
31 {
32 Py_ssize_t i;
33 Py_UCS4 c;
34 Py_UCS4 r = 0;
35
36 for (i=1; i < end; i++) {
37 r *= 16;
38 c = PyUnicode_ReadChar(s, i);
39 if ((c >= 48) && (c <= 57)) { /* 0 - 9 */
40 r += (c - 48);
41 }
42 else {
43 if ((c >= 97) && (c <= 102)) { /* a - f */
44 r += (c - 87);
45 }
46 else {
47 if ((c >= 65) && (c <= 70)) { /* A - F */
48 r += (c - 55);
49 }
50 else {
51 PyErr_SetString(PyExc_ValueError, "invalid base-16 literal");
52 return -1;
53 }
54 }
55 }
56 }
57 *result = r;
58 return 0; /* success */
59 }
60
61
62 static
63 PyObject *
64 _hex2string(PyObject *s, Py_ssize_t end)
65 {
66 Py_UCS4 c;
67 PyObject *u = NULL;
68
69 if (_hex2ucs4(s, end, &c) != 0)
70 return NULL;
71 u = PyUnicode_New(1, c); /* ARGH: not in the stable API */
72 if (u == NULL)
73 return NULL;
74 if (PyUnicode_WriteChar(u, 0, c) != 0) {
75 Py_DECREF(u);
76 return NULL;
77 }
78 return u;
79 }
80
81
82 static
83 PyObject *
84 _fast_unquote(PyObject *self, PyObject *s, struct speedups_state *sstate)
85 {
86 Py_ssize_t find;
87 Py_ssize_t s_len;
88 Py_ssize_t parts_len;
89 PyObject *res;
90 PyObject *res_parts = NULL;
91 PyObject *parts = NULL;
92 PyObject *o;
93 PyObject *pb;
94 Py_ssize_t pb_len;
95 Py_ssize_t i;
96 Py_UCS4 c;
97
98 if (!PyUnicode_Check(s)) {
99 PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
100 return NULL;
101 }
102 s_len = PyUnicode_GetLength(s);
103 if (s_len < 0) {
104 return NULL;
105 }
106 if (s_len == 0) {
107 Py_INCREF(s);
108 return s;
109 }
110 find = PyUnicode_FindChar(s, '%', 0, s_len, 1);
111 if (find == -2) {
112 return NULL;
113 }
114 if (find == -1) {
115 Py_INCREF(s);
116 return s;
117 }
118
119 if (sstate == NULL) {
120 sstate = PyModule_GetState(self);
121 if (sstate == NULL) {
122 PyErr_SetString(PyExc_RuntimeError, "no module state available");
123 return NULL;
124 }
125 }
126 parts = PyUnicode_Split(s, sstate->QUOTE, -1);
127 if (parts == NULL) {
128 goto error;
129 }
130 parts_len = PyList_Size(parts);
131 if (parts_len < 0) {
132 goto error;
133 }
134 res_parts = PyTuple_New((parts_len-1)*2 + 1);
135 if (res_parts == NULL) {
136 goto error;
137 }
138
139 o = PyList_GetItem(parts, 0); /* borrowed */
140 if (o == NULL) {
141 goto error;
142 }
143 /*
144 * The first item may be also the empty string if `s' starts with
145 * a quoted character.
146 */
147 Py_INCREF(o); /* because PyTuple_SetItem steals -- and o is borrowed */
148 PyTuple_SetItem(res_parts, 0, o);
149
150 for (i=1; i<parts_len; i++) {
151 pb = PyList_GetItem(parts, i); /* borrowed */
152 pb_len = PyUnicode_GetLength(pb);
153 if (pb_len < 1) {
154 PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
155 goto error;
156 }
157 c = PyUnicode_ReadChar(pb, 0);
158 switch (c) {
159 case 0x55: /* U */
160 if (pb_len < 9) {
161 PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
162 goto error;
163 }
164 o = _hex2string(pb, 9);
165 if (o == NULL) {
166 goto error;
167 }
168 PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */
169 o = PyUnicode_Substring(pb, 9, pb_len);
170 if (o == NULL) {
171 goto error;
172 }
173 PyTuple_SetItem(res_parts, i*2, o); /* steals */
174 break;
175 case 0x75: /* u */
176 if (pb_len < 5) {
177 PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
178 goto error;
179 }
180 o = _hex2string(pb, 5);
181 if (o == NULL) {
182 goto error;
183 }
184 PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */
185 o = PyUnicode_Substring(pb, 5, pb_len);
186 if (o == NULL) {
187 goto error;
188 }
189 PyTuple_SetItem(res_parts, i*2, o); /* steals */
190 break;
191 case 0x78: /* x */
192 if (pb_len < 3) {
193 PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
194 goto error;
195 }
196 o = _hex2string(pb, 3);
197 if (o == NULL) {
198 goto error;
199 }
200 PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */
201 o = PyUnicode_Substring(pb, 3, pb_len);
202 if (o == NULL) {
203 goto error;
204 }
205 PyTuple_SetItem(res_parts, i*2, o); /* steals */
206 break;
207
208 default:
209 PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
210 goto error;
211 }
212 }
213
214 res = PyUnicode_Join(sstate->EMPTY_STR, res_parts);
215 if (res == NULL) {
216 goto error;
217 }
218 Py_DECREF(parts);
219 Py_DECREF(res_parts);
220 return res;
221
222 error:
223 Py_XDECREF(res_parts);
224 Py_XDECREF(parts);
225 return NULL;
226 }
227
228
229 static
230 PyObject *
231 fast_unquote(PyObject *self, PyObject *s)
232 {
233 return _fast_unquote(self, s, NULL);
234 }
235
236
237 static
238 PyObject *
239 fast_pathstr2path(PyObject *self, PyObject *varname)
240 {
241 Py_ssize_t varname_len;
242 PyObject *parts = NULL;
243 Py_ssize_t parts_len;
244 PyObject *res = NULL;
245 Py_ssize_t i;
246 PyObject *o;
247 PyObject *u;
248 struct speedups_state *sstate;
249
250 if (!PyUnicode_Check(varname)) {
251 PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
252 return NULL;
253 }
254 varname_len = PyUnicode_GetLength(varname);
255 if (varname_len < 0) {
256 return NULL;
257 }
258 if (varname_len == 0) {
259 return PyTuple_New(0);
260 }
261
262 sstate = PyModule_GetState(self);
263 if (sstate == NULL) {
264 PyErr_SetString(PyExc_RuntimeError, "no module state available");
265 return NULL;
266 }
267 parts = PyUnicode_Split(varname, sstate->DOT, -1);
268 if (parts == NULL) {
269 goto error;
270 }
271 parts_len = PyList_Size(parts);
272 if (parts_len < 0) {
273 goto error;
274 }
275 res = PyTuple_New(parts_len);
276 if (res == NULL) {
277 goto error;
278 }
279 for (i=0; i < parts_len; i++) {
280 o = PyList_GetItem(parts, i); /* borrowed */
281 u = _fast_unquote(self, o, sstate);
282 if (u == NULL) {
283 goto error;
284 }
285 PyTuple_SetItem(res, i, u); /* steals */
286 }
287
288 Py_DECREF(parts);
289 return res;
290
291 error:
292 Py_XDECREF(parts);
293 Py_XDECREF(res);
294 return NULL;
295 }
296
297
298 static struct PyMethodDef speedups_methods[] = {
299 {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
300 {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
301 {NULL, NULL, 0, NULL}
302 };
303
304
305 static
306 int
307 speedups_exec(PyObject *module)
308 {
309 struct speedups_state *sstate = PyModule_GetState(module);
310
311 if (sstate == NULL) {
312 PyErr_SetString(PyExc_ImportError, "no module state available yet");
313 return -1;
314 }
315
316 PyModule_AddStringConstant(module, "__release__", release);
317 PyModule_AddStringConstant(module, "__date__", date);
318 PyModule_AddStringConstant(module, "__author__", "Franz Glasner");
319
320 sstate->DOT = PyUnicode_FromStringAndSize(".", 1);
321 if (sstate->DOT == NULL) {
322 return -1;
323 }
324 PyUnicode_InternInPlace(&(sstate->DOT));
325
326 sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1);
327 if (sstate->QUOTE == NULL) {
328 return -1;
329 }
330 PyUnicode_InternInPlace(&(sstate->QUOTE));
331
332 sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0);
333 if (sstate->EMPTY_STR == NULL) {
334 return -1;
335 }
336 PyUnicode_InternInPlace(&(sstate->EMPTY_STR));
337
338 return 0;
339 }
340
341
342 static
343 int
344 speeeupds_traverse(PyObject *module, visitproc visit, void *arg)
345 {
346 struct speedups_state *sstate = PyModule_GetState(module);
347
348 if (sstate != NULL) {
349 Py_VISIT(sstate->DOT);
350 Py_VISIT(sstate->QUOTE);
351 Py_VISIT(sstate->EMPTY_STR);
352 }
353 return 0;
354 }
355
356
357 static
358 int
359 speedups_clear(PyObject *module)
360 {
361 struct speedups_state *sstate = PyModule_GetState(module);
362
363 if (sstate != NULL) {
364 Py_CLEAR(sstate->DOT);
365 Py_CLEAR(sstate->QUOTE);
366 Py_CLEAR(sstate->EMPTY_STR);
367 }
368 return 0;
369 }
370
371
372 static struct PyModuleDef_Slot speedups_slots[] = {
373 {Py_mod_exec, speedups_exec},
374 {0, NULL}
375 };
376
377
378 static struct PyModuleDef speedups_def = {
379 PyModuleDef_HEAD_INIT, /* m_base */
380 "_speedups", /* m_name (relative) */
381 PyDoc_STR("Speedups for configmix"), /* m_doc */
382 sizeof(struct speedups_state), /* m_size */
383 speedups_methods, /* m_methods */
384 speedups_slots, /* m_slots */
385 speeeupds_traverse, /* m_traverse */
386 speedups_clear, /* m_clear */
387 NULL /* m_free */
388 };
389
390
391 PyMODINIT_FUNC
392 PyInit__speedups(void)
393 {
394 /*
395 * Use multi-phase extension module initialization (PEP 489).
396 * This is Python 3.5+.
397 */
398 return PyModuleDef_Init(&speedups_def);
399 }