Mercurial > hgrepos > Python > libs > ConfigMix
comparison configmix/_speedups.c @ 542:f71d34dda19f
Add an optional C-implementation for configmix.config.unquote and configmix.config.pathstr2path.
This is currently for Python 3.5+.
It is tested with Python 3.7 and Python3.8 (FreeBSD 12.2 amd64, LLVM 10.0.1).
A build for the stable API ("abi3") fails because PyUnicode_New() is currently
not in the stable API.
Also includes are extended tests for unquote() and pathstr2path().
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 31 Dec 2021 21:24:16 +0100 |
| parents | |
| children | 491413368c7c |
comparison
equal
deleted
inserted
replaced
| 541:25b61f0a1958 | 542:f71d34dda19f |
|---|---|
| 1 /* -*- coding: utf-8 -*- */ | |
| 2 /* | |
| 3 * Speedups for configmix. | |
| 4 * | |
| 5 * :Copyright: (c) 2021, Franz Glasner. All rights reserved. | |
| 6 * :License: BSD-3-Clause. See LICENSE.txt for details. | |
| 7 */ | |
| 8 | |
| 9 #define PY_SSIZE_T_CLEAN | |
| 10 #include "Python.h" | |
| 11 | |
| 12 | |
| 13 const char _id[] = "@(#)configmix._speedups $Header$"; | |
| 14 static const char release[] = "|VCSRevision|"; | |
| 15 static const char date[] = "|VCSJustDate|"; | |
| 16 | |
| 17 | |
| 18 /* | |
| 19 * Module state holds pre-created some objects | |
| 20 */ | |
| 21 struct speedups_state { | |
| 22 PyObject *DOT; | |
| 23 PyObject *QUOTE; | |
| 24 PyObject *EMPTY_STR; | |
| 25 }; | |
| 26 | |
| 27 | |
| 28 static | |
| 29 int | |
| 30 _hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result) | |
| 31 { | |
| 32 Py_ssize_t i; | |
| 33 Py_UCS4 c; | |
| 34 Py_UCS4 r = 0; | |
| 35 | |
| 36 for (i=1; i < end; i++) { | |
| 37 r *= 16; | |
| 38 c = PyUnicode_ReadChar(s, i); | |
| 39 if ((c >= 48) && (c <= 57)) { /* 0 - 9 */ | |
| 40 r += (c - 48); | |
| 41 } | |
| 42 else { | |
| 43 if ((c >= 97) && (c <= 102)) { /* a - f */ | |
| 44 r += (c - 87); | |
| 45 } | |
| 46 else { | |
| 47 if ((c >= 65) && (c <= 70)) { /* A - F */ | |
| 48 r += (c - 55); | |
| 49 } | |
| 50 else { | |
| 51 PyErr_SetString(PyExc_ValueError, "invalid base-16 literal"); | |
| 52 return -1; | |
| 53 } | |
| 54 } | |
| 55 } | |
| 56 } | |
| 57 *result = r; | |
| 58 return 0; /* success */ | |
| 59 } | |
| 60 | |
| 61 | |
| 62 static | |
| 63 PyObject * | |
| 64 _hex2string(PyObject *s, Py_ssize_t end) | |
| 65 { | |
| 66 Py_UCS4 c; | |
| 67 PyObject *u = NULL; | |
| 68 | |
| 69 if (_hex2ucs4(s, end, &c) != 0) | |
| 70 return NULL; | |
| 71 u = PyUnicode_New(1, c); /* ARGH: not in the stable API */ | |
| 72 if (u == NULL) | |
| 73 return NULL; | |
| 74 if (PyUnicode_WriteChar(u, 0, c) != 0) { | |
| 75 Py_DECREF(u); | |
| 76 return NULL; | |
| 77 } | |
| 78 return u; | |
| 79 } | |
| 80 | |
| 81 | |
| 82 static | |
| 83 PyObject * | |
| 84 _fast_unquote(PyObject *self, PyObject *s, struct speedups_state *sstate) | |
| 85 { | |
| 86 Py_ssize_t find; | |
| 87 Py_ssize_t s_len; | |
| 88 Py_ssize_t parts_len; | |
| 89 PyObject *res; | |
| 90 PyObject *res_parts = NULL; | |
| 91 PyObject *parts = NULL; | |
| 92 PyObject *o; | |
| 93 PyObject *pb; | |
| 94 Py_ssize_t pb_len; | |
| 95 Py_ssize_t i; | |
| 96 Py_UCS4 c; | |
| 97 | |
| 98 if (!PyUnicode_Check(s)) { | |
| 99 PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected"); | |
| 100 return NULL; | |
| 101 } | |
| 102 s_len = PyUnicode_GetLength(s); | |
| 103 if (s_len < 0) { | |
| 104 return NULL; | |
| 105 } | |
| 106 if (s_len == 0) { | |
| 107 Py_INCREF(s); | |
| 108 return s; | |
| 109 } | |
| 110 find = PyUnicode_FindChar(s, '%', 0, s_len, 1); | |
| 111 if (find == -2) { | |
| 112 return NULL; | |
| 113 } | |
| 114 if (find == -1) { | |
| 115 Py_INCREF(s); | |
| 116 return s; | |
| 117 } | |
| 118 | |
| 119 if (sstate == NULL) { | |
| 120 sstate = PyModule_GetState(self); | |
| 121 if (sstate == NULL) { | |
| 122 PyErr_SetString(PyExc_RuntimeError, "no module state available"); | |
| 123 return NULL; | |
| 124 } | |
| 125 } | |
| 126 parts = PyUnicode_Split(s, sstate->QUOTE, -1); | |
| 127 if (parts == NULL) { | |
| 128 goto error; | |
| 129 } | |
| 130 parts_len = PyList_Size(parts); | |
| 131 if (parts_len < 0) { | |
| 132 goto error; | |
| 133 } | |
| 134 res_parts = PyTuple_New((parts_len-1)*2 + 1); | |
| 135 if (res_parts == NULL) { | |
| 136 goto error; | |
| 137 } | |
| 138 | |
| 139 o = PyList_GetItem(parts, 0); /* borrowed */ | |
| 140 if (o == NULL) { | |
| 141 goto error; | |
| 142 } | |
| 143 /* | |
| 144 * The first item may be also the empty string if `s' starts with | |
| 145 * a quoted character. | |
| 146 */ | |
| 147 Py_INCREF(o); /* because PyTuple_SetItem steals -- and o is borrowed */ | |
| 148 PyTuple_SetItem(res_parts, 0, o); | |
| 149 | |
| 150 for (i=1; i<parts_len; i++) { | |
| 151 pb = PyList_GetItem(parts, i); /* borrowed */ | |
| 152 pb_len = PyUnicode_GetLength(pb); | |
| 153 if (pb_len < 1) { | |
| 154 PyErr_SetString(PyExc_ValueError, "unknown quote syntax string"); | |
| 155 goto error; | |
| 156 } | |
| 157 c = PyUnicode_ReadChar(pb, 0); | |
| 158 switch (c) { | |
| 159 case 0x55: /* U */ | |
| 160 if (pb_len < 9) { | |
| 161 PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); | |
| 162 goto error; | |
| 163 } | |
| 164 o = _hex2string(pb, 9); | |
| 165 if (o == NULL) { | |
| 166 goto error; | |
| 167 } | |
| 168 PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ | |
| 169 o = PyUnicode_Substring(pb, 9, pb_len); | |
| 170 if (o == NULL) { | |
| 171 goto error; | |
| 172 } | |
| 173 PyTuple_SetItem(res_parts, i*2, o); /* steals */ | |
| 174 break; | |
| 175 case 0x75: /* u */ | |
| 176 if (pb_len < 5) { | |
| 177 PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); | |
| 178 goto error; | |
| 179 } | |
| 180 o = _hex2string(pb, 5); | |
| 181 if (o == NULL) { | |
| 182 goto error; | |
| 183 } | |
| 184 PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ | |
| 185 o = PyUnicode_Substring(pb, 5, pb_len); | |
| 186 if (o == NULL) { | |
| 187 goto error; | |
| 188 } | |
| 189 PyTuple_SetItem(res_parts, i*2, o); /* steals */ | |
| 190 break; | |
| 191 case 0x78: /* x */ | |
| 192 if (pb_len < 3) { | |
| 193 PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); | |
| 194 goto error; | |
| 195 } | |
| 196 o = _hex2string(pb, 3); | |
| 197 if (o == NULL) { | |
| 198 goto error; | |
| 199 } | |
| 200 PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ | |
| 201 o = PyUnicode_Substring(pb, 3, pb_len); | |
| 202 if (o == NULL) { | |
| 203 goto error; | |
| 204 } | |
| 205 PyTuple_SetItem(res_parts, i*2, o); /* steals */ | |
| 206 break; | |
| 207 | |
| 208 default: | |
| 209 PyErr_SetString(PyExc_ValueError, "unknown quote syntax string"); | |
| 210 goto error; | |
| 211 } | |
| 212 } | |
| 213 | |
| 214 res = PyUnicode_Join(sstate->EMPTY_STR, res_parts); | |
| 215 if (res == NULL) { | |
| 216 goto error; | |
| 217 } | |
| 218 Py_DECREF(parts); | |
| 219 Py_DECREF(res_parts); | |
| 220 return res; | |
| 221 | |
| 222 error: | |
| 223 Py_XDECREF(res_parts); | |
| 224 Py_XDECREF(parts); | |
| 225 return NULL; | |
| 226 } | |
| 227 | |
| 228 | |
| 229 static | |
| 230 PyObject * | |
| 231 fast_unquote(PyObject *self, PyObject *s) | |
| 232 { | |
| 233 return _fast_unquote(self, s, NULL); | |
| 234 } | |
| 235 | |
| 236 | |
| 237 static | |
| 238 PyObject * | |
| 239 fast_pathstr2path(PyObject *self, PyObject *varname) | |
| 240 { | |
| 241 Py_ssize_t varname_len; | |
| 242 PyObject *parts = NULL; | |
| 243 Py_ssize_t parts_len; | |
| 244 PyObject *res = NULL; | |
| 245 Py_ssize_t i; | |
| 246 PyObject *o; | |
| 247 PyObject *u; | |
| 248 struct speedups_state *sstate; | |
| 249 | |
| 250 if (!PyUnicode_Check(varname)) { | |
| 251 PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected"); | |
| 252 return NULL; | |
| 253 } | |
| 254 varname_len = PyUnicode_GetLength(varname); | |
| 255 if (varname_len < 0) { | |
| 256 return NULL; | |
| 257 } | |
| 258 if (varname_len == 0) { | |
| 259 return PyTuple_New(0); | |
| 260 } | |
| 261 | |
| 262 sstate = PyModule_GetState(self); | |
| 263 if (sstate == NULL) { | |
| 264 PyErr_SetString(PyExc_RuntimeError, "no module state available"); | |
| 265 return NULL; | |
| 266 } | |
| 267 parts = PyUnicode_Split(varname, sstate->DOT, -1); | |
| 268 if (parts == NULL) { | |
| 269 goto error; | |
| 270 } | |
| 271 parts_len = PyList_Size(parts); | |
| 272 if (parts_len < 0) { | |
| 273 goto error; | |
| 274 } | |
| 275 res = PyTuple_New(parts_len); | |
| 276 if (res == NULL) { | |
| 277 goto error; | |
| 278 } | |
| 279 for (i=0; i < parts_len; i++) { | |
| 280 o = PyList_GetItem(parts, i); /* borrowed */ | |
| 281 u = _fast_unquote(self, o, sstate); | |
| 282 if (u == NULL) { | |
| 283 goto error; | |
| 284 } | |
| 285 PyTuple_SetItem(res, i, u); /* steals */ | |
| 286 } | |
| 287 | |
| 288 Py_DECREF(parts); | |
| 289 return res; | |
| 290 | |
| 291 error: | |
| 292 Py_XDECREF(parts); | |
| 293 Py_XDECREF(res); | |
| 294 return NULL; | |
| 295 } | |
| 296 | |
| 297 | |
| 298 static struct PyMethodDef speedups_methods[] = { | |
| 299 {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")}, | |
| 300 {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")}, | |
| 301 {NULL, NULL, 0, NULL} | |
| 302 }; | |
| 303 | |
| 304 | |
| 305 static | |
| 306 int | |
| 307 speedups_exec(PyObject *module) | |
| 308 { | |
| 309 struct speedups_state *sstate = PyModule_GetState(module); | |
| 310 | |
| 311 if (sstate == NULL) { | |
| 312 PyErr_SetString(PyExc_ImportError, "no module state available yet"); | |
| 313 return -1; | |
| 314 } | |
| 315 | |
| 316 PyModule_AddStringConstant(module, "__release__", release); | |
| 317 PyModule_AddStringConstant(module, "__date__", date); | |
| 318 PyModule_AddStringConstant(module, "__author__", "Franz Glasner"); | |
| 319 | |
| 320 sstate->DOT = PyUnicode_FromStringAndSize(".", 1); | |
| 321 if (sstate->DOT == NULL) { | |
| 322 return -1; | |
| 323 } | |
| 324 PyUnicode_InternInPlace(&(sstate->DOT)); | |
| 325 | |
| 326 sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1); | |
| 327 if (sstate->QUOTE == NULL) { | |
| 328 return -1; | |
| 329 } | |
| 330 PyUnicode_InternInPlace(&(sstate->QUOTE)); | |
| 331 | |
| 332 sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0); | |
| 333 if (sstate->EMPTY_STR == NULL) { | |
| 334 return -1; | |
| 335 } | |
| 336 PyUnicode_InternInPlace(&(sstate->EMPTY_STR)); | |
| 337 | |
| 338 return 0; | |
| 339 } | |
| 340 | |
| 341 | |
| 342 static | |
| 343 int | |
| 344 speeeupds_traverse(PyObject *module, visitproc visit, void *arg) | |
| 345 { | |
| 346 struct speedups_state *sstate = PyModule_GetState(module); | |
| 347 | |
| 348 if (sstate != NULL) { | |
| 349 Py_VISIT(sstate->DOT); | |
| 350 Py_VISIT(sstate->QUOTE); | |
| 351 Py_VISIT(sstate->EMPTY_STR); | |
| 352 } | |
| 353 return 0; | |
| 354 } | |
| 355 | |
| 356 | |
| 357 static | |
| 358 int | |
| 359 speedups_clear(PyObject *module) | |
| 360 { | |
| 361 struct speedups_state *sstate = PyModule_GetState(module); | |
| 362 | |
| 363 if (sstate != NULL) { | |
| 364 Py_CLEAR(sstate->DOT); | |
| 365 Py_CLEAR(sstate->QUOTE); | |
| 366 Py_CLEAR(sstate->EMPTY_STR); | |
| 367 } | |
| 368 return 0; | |
| 369 } | |
| 370 | |
| 371 | |
| 372 static struct PyModuleDef_Slot speedups_slots[] = { | |
| 373 {Py_mod_exec, speedups_exec}, | |
| 374 {0, NULL} | |
| 375 }; | |
| 376 | |
| 377 | |
| 378 static struct PyModuleDef speedups_def = { | |
| 379 PyModuleDef_HEAD_INIT, /* m_base */ | |
| 380 "_speedups", /* m_name (relative) */ | |
| 381 PyDoc_STR("Speedups for configmix"), /* m_doc */ | |
| 382 sizeof(struct speedups_state), /* m_size */ | |
| 383 speedups_methods, /* m_methods */ | |
| 384 speedups_slots, /* m_slots */ | |
| 385 speeeupds_traverse, /* m_traverse */ | |
| 386 speedups_clear, /* m_clear */ | |
| 387 NULL /* m_free */ | |
| 388 }; | |
| 389 | |
| 390 | |
| 391 PyMODINIT_FUNC | |
| 392 PyInit__speedups(void) | |
| 393 { | |
| 394 /* | |
| 395 * Use multi-phase extension module initialization (PEP 489). | |
| 396 * This is Python 3.5+. | |
| 397 */ | |
| 398 return PyModuleDef_Init(&speedups_def); | |
| 399 } |
