Mercurial > hgrepos > Python > libs > ConfigMix
diff configmix/_speedups.c @ 542:f71d34dda19f
Add an optional C-implementation for configmix.config.unquote and configmix.config.pathstr2path.
This is currently for Python 3.5+.
It is tested with Python 3.7 and Python3.8 (FreeBSD 12.2 amd64, LLVM 10.0.1).
A build for the stable API ("abi3") fails because PyUnicode_New() is currently
not in the stable API.
Also includes are extended tests for unquote() and pathstr2path().
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Fri, 31 Dec 2021 21:24:16 +0100 |
| parents | |
| children | 491413368c7c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/configmix/_speedups.c Fri Dec 31 21:24:16 2021 +0100 @@ -0,0 +1,399 @@ +/* -*- coding: utf-8 -*- */ +/* + * Speedups for configmix. + * + * :Copyright: (c) 2021, Franz Glasner. All rights reserved. + * :License: BSD-3-Clause. See LICENSE.txt for details. + */ + +#define PY_SSIZE_T_CLEAN +#include "Python.h" + + +const char _id[] = "@(#)configmix._speedups $Header$"; +static const char release[] = "|VCSRevision|"; +static const char date[] = "|VCSJustDate|"; + + +/* + * Module state holds pre-created some objects + */ +struct speedups_state { + PyObject *DOT; + PyObject *QUOTE; + PyObject *EMPTY_STR; +}; + + +static +int +_hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result) +{ + Py_ssize_t i; + Py_UCS4 c; + Py_UCS4 r = 0; + + for (i=1; i < end; i++) { + r *= 16; + c = PyUnicode_ReadChar(s, i); + if ((c >= 48) && (c <= 57)) { /* 0 - 9 */ + r += (c - 48); + } + else { + if ((c >= 97) && (c <= 102)) { /* a - f */ + r += (c - 87); + } + else { + if ((c >= 65) && (c <= 70)) { /* A - F */ + r += (c - 55); + } + else { + PyErr_SetString(PyExc_ValueError, "invalid base-16 literal"); + return -1; + } + } + } + } + *result = r; + return 0; /* success */ +} + + +static +PyObject * +_hex2string(PyObject *s, Py_ssize_t end) +{ + Py_UCS4 c; + PyObject *u = NULL; + + if (_hex2ucs4(s, end, &c) != 0) + return NULL; + u = PyUnicode_New(1, c); /* ARGH: not in the stable API */ + if (u == NULL) + return NULL; + if (PyUnicode_WriteChar(u, 0, c) != 0) { + Py_DECREF(u); + return NULL; + } + return u; +} + + +static +PyObject * +_fast_unquote(PyObject *self, PyObject *s, struct speedups_state *sstate) +{ + Py_ssize_t find; + Py_ssize_t s_len; + Py_ssize_t parts_len; + PyObject *res; + PyObject *res_parts = NULL; + PyObject *parts = NULL; + PyObject *o; + PyObject *pb; + Py_ssize_t pb_len; + Py_ssize_t i; + Py_UCS4 c; + + if (!PyUnicode_Check(s)) { + PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected"); + return NULL; + } + s_len = PyUnicode_GetLength(s); + if (s_len < 0) { + return NULL; + } + if (s_len == 0) { + Py_INCREF(s); + return s; + } + find = PyUnicode_FindChar(s, '%', 0, s_len, 1); + if (find == -2) { + return NULL; + } + if (find == -1) { + Py_INCREF(s); + return s; + } + + if (sstate == NULL) { + sstate = PyModule_GetState(self); + if (sstate == NULL) { + PyErr_SetString(PyExc_RuntimeError, "no module state available"); + return NULL; + } + } + parts = PyUnicode_Split(s, sstate->QUOTE, -1); + if (parts == NULL) { + goto error; + } + parts_len = PyList_Size(parts); + if (parts_len < 0) { + goto error; + } + res_parts = PyTuple_New((parts_len-1)*2 + 1); + if (res_parts == NULL) { + goto error; + } + + o = PyList_GetItem(parts, 0); /* borrowed */ + if (o == NULL) { + goto error; + } + /* + * The first item may be also the empty string if `s' starts with + * a quoted character. + */ + Py_INCREF(o); /* because PyTuple_SetItem steals -- and o is borrowed */ + PyTuple_SetItem(res_parts, 0, o); + + for (i=1; i<parts_len; i++) { + pb = PyList_GetItem(parts, i); /* borrowed */ + pb_len = PyUnicode_GetLength(pb); + if (pb_len < 1) { + PyErr_SetString(PyExc_ValueError, "unknown quote syntax string"); + goto error; + } + c = PyUnicode_ReadChar(pb, 0); + switch (c) { + case 0x55: /* U */ + if (pb_len < 9) { + PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); + goto error; + } + o = _hex2string(pb, 9); + if (o == NULL) { + goto error; + } + PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ + o = PyUnicode_Substring(pb, 9, pb_len); + if (o == NULL) { + goto error; + } + PyTuple_SetItem(res_parts, i*2, o); /* steals */ + break; + case 0x75: /* u */ + if (pb_len < 5) { + PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); + goto error; + } + o = _hex2string(pb, 5); + if (o == NULL) { + goto error; + } + PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ + o = PyUnicode_Substring(pb, 5, pb_len); + if (o == NULL) { + goto error; + } + PyTuple_SetItem(res_parts, i*2, o); /* steals */ + break; + case 0x78: /* x */ + if (pb_len < 3) { + PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); + goto error; + } + o = _hex2string(pb, 3); + if (o == NULL) { + goto error; + } + PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ + o = PyUnicode_Substring(pb, 3, pb_len); + if (o == NULL) { + goto error; + } + PyTuple_SetItem(res_parts, i*2, o); /* steals */ + break; + + default: + PyErr_SetString(PyExc_ValueError, "unknown quote syntax string"); + goto error; + } + } + + res = PyUnicode_Join(sstate->EMPTY_STR, res_parts); + if (res == NULL) { + goto error; + } + Py_DECREF(parts); + Py_DECREF(res_parts); + return res; + +error: + Py_XDECREF(res_parts); + Py_XDECREF(parts); + return NULL; +} + + +static +PyObject * +fast_unquote(PyObject *self, PyObject *s) +{ + return _fast_unquote(self, s, NULL); +} + + +static +PyObject * +fast_pathstr2path(PyObject *self, PyObject *varname) +{ + Py_ssize_t varname_len; + PyObject *parts = NULL; + Py_ssize_t parts_len; + PyObject *res = NULL; + Py_ssize_t i; + PyObject *o; + PyObject *u; + struct speedups_state *sstate; + + if (!PyUnicode_Check(varname)) { + PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected"); + return NULL; + } + varname_len = PyUnicode_GetLength(varname); + if (varname_len < 0) { + return NULL; + } + if (varname_len == 0) { + return PyTuple_New(0); + } + + sstate = PyModule_GetState(self); + if (sstate == NULL) { + PyErr_SetString(PyExc_RuntimeError, "no module state available"); + return NULL; + } + parts = PyUnicode_Split(varname, sstate->DOT, -1); + if (parts == NULL) { + goto error; + } + parts_len = PyList_Size(parts); + if (parts_len < 0) { + goto error; + } + res = PyTuple_New(parts_len); + if (res == NULL) { + goto error; + } + for (i=0; i < parts_len; i++) { + o = PyList_GetItem(parts, i); /* borrowed */ + u = _fast_unquote(self, o, sstate); + if (u == NULL) { + goto error; + } + PyTuple_SetItem(res, i, u); /* steals */ + } + + Py_DECREF(parts); + return res; + +error: + Py_XDECREF(parts); + Py_XDECREF(res); + return NULL; +} + + +static struct PyMethodDef speedups_methods[] = { + {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")}, + {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")}, + {NULL, NULL, 0, NULL} +}; + + +static +int +speedups_exec(PyObject *module) +{ + struct speedups_state *sstate = PyModule_GetState(module); + + if (sstate == NULL) { + PyErr_SetString(PyExc_ImportError, "no module state available yet"); + return -1; + } + + PyModule_AddStringConstant(module, "__release__", release); + PyModule_AddStringConstant(module, "__date__", date); + PyModule_AddStringConstant(module, "__author__", "Franz Glasner"); + + sstate->DOT = PyUnicode_FromStringAndSize(".", 1); + if (sstate->DOT == NULL) { + return -1; + } + PyUnicode_InternInPlace(&(sstate->DOT)); + + sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1); + if (sstate->QUOTE == NULL) { + return -1; + } + PyUnicode_InternInPlace(&(sstate->QUOTE)); + + sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0); + if (sstate->EMPTY_STR == NULL) { + return -1; + } + PyUnicode_InternInPlace(&(sstate->EMPTY_STR)); + + return 0; +} + + +static +int +speeeupds_traverse(PyObject *module, visitproc visit, void *arg) +{ + struct speedups_state *sstate = PyModule_GetState(module); + + if (sstate != NULL) { + Py_VISIT(sstate->DOT); + Py_VISIT(sstate->QUOTE); + Py_VISIT(sstate->EMPTY_STR); + } + return 0; +} + + +static +int +speedups_clear(PyObject *module) +{ + struct speedups_state *sstate = PyModule_GetState(module); + + if (sstate != NULL) { + Py_CLEAR(sstate->DOT); + Py_CLEAR(sstate->QUOTE); + Py_CLEAR(sstate->EMPTY_STR); + } + return 0; +} + + +static struct PyModuleDef_Slot speedups_slots[] = { + {Py_mod_exec, speedups_exec}, + {0, NULL} +}; + + +static struct PyModuleDef speedups_def = { + PyModuleDef_HEAD_INIT, /* m_base */ + "_speedups", /* m_name (relative) */ + PyDoc_STR("Speedups for configmix"), /* m_doc */ + sizeof(struct speedups_state), /* m_size */ + speedups_methods, /* m_methods */ + speedups_slots, /* m_slots */ + speeeupds_traverse, /* m_traverse */ + speedups_clear, /* m_clear */ + NULL /* m_free */ +}; + + +PyMODINIT_FUNC +PyInit__speedups(void) +{ + /* + * Use multi-phase extension module initialization (PEP 489). + * This is Python 3.5+. + */ + return PyModuleDef_Init(&speedups_def); +}
