diff configmix/_speedups.c @ 542:f71d34dda19f

Add an optional C-implementation for configmix.config.unquote and configmix.config.pathstr2path. This is currently for Python 3.5+. It is tested with Python 3.7 and Python3.8 (FreeBSD 12.2 amd64, LLVM 10.0.1). A build for the stable API ("abi3") fails because PyUnicode_New() is currently not in the stable API. Also includes are extended tests for unquote() and pathstr2path().
author Franz Glasner <fzglas.hg@dom66.de>
date Fri, 31 Dec 2021 21:24:16 +0100
parents
children 491413368c7c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/configmix/_speedups.c	Fri Dec 31 21:24:16 2021 +0100
@@ -0,0 +1,399 @@
+/* -*- coding: utf-8 -*- */
+/*
+ * Speedups for configmix.
+ *
+ * :Copyright: (c) 2021, Franz Glasner. All rights reserved.
+ * :License:   BSD-3-Clause. See LICENSE.txt for details.
+ */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+
+
+const char _id[] = "@(#)configmix._speedups $Header$";
+static const char release[] = "|VCSRevision|";
+static const char date[] = "|VCSJustDate|";
+
+
+/*
+ * Module state holds pre-created some objects
+ */
+struct speedups_state {
+    PyObject *DOT;
+    PyObject *QUOTE;
+    PyObject *EMPTY_STR;
+};
+
+
+static
+int
+_hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result)
+{
+    Py_ssize_t i;
+    Py_UCS4 c;
+    Py_UCS4 r = 0;
+
+    for (i=1; i < end; i++) {
+        r *= 16;
+        c = PyUnicode_ReadChar(s, i);
+        if ((c >= 48) && (c <= 57)) {    /* 0 - 9 */
+            r += (c - 48);
+        }
+        else {
+            if ((c >= 97) && (c <= 102)) {    /* a - f */
+                r += (c - 87);
+            }
+            else {
+                if ((c >= 65) && (c <= 70)) {   /* A - F */
+                    r += (c - 55);
+                }
+                else {
+                    PyErr_SetString(PyExc_ValueError, "invalid base-16 literal");
+                    return -1;
+                }
+            }
+        }
+    }
+    *result = r;
+    return 0;  /* success */
+}
+
+
+static
+PyObject *
+_hex2string(PyObject *s, Py_ssize_t end)
+{
+    Py_UCS4 c;
+    PyObject *u = NULL;
+
+    if (_hex2ucs4(s, end, &c) != 0)
+        return NULL;
+    u = PyUnicode_New(1, c);    /* ARGH: not  in the stable API */
+    if (u == NULL)
+        return NULL;
+    if (PyUnicode_WriteChar(u, 0, c) != 0) {
+        Py_DECREF(u);
+        return NULL;
+    }
+    return u;
+}
+
+
+static
+PyObject *
+_fast_unquote(PyObject *self, PyObject *s, struct speedups_state *sstate)
+{
+    Py_ssize_t find;
+    Py_ssize_t s_len;
+    Py_ssize_t parts_len;
+    PyObject *res;
+    PyObject *res_parts = NULL;
+    PyObject *parts = NULL;
+    PyObject *o;
+    PyObject *pb;
+    Py_ssize_t pb_len;
+    Py_ssize_t i;
+    Py_UCS4 c;
+
+    if (!PyUnicode_Check(s)) {
+        PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
+        return NULL;
+    }
+    s_len = PyUnicode_GetLength(s);
+    if (s_len < 0) {
+        return NULL;
+    }
+    if (s_len == 0) {
+        Py_INCREF(s);
+        return s;
+    }
+    find = PyUnicode_FindChar(s, '%', 0, s_len, 1);
+    if (find == -2) {
+        return NULL;
+    }
+    if (find == -1) {
+        Py_INCREF(s);
+        return s;
+    }
+
+    if (sstate == NULL) {
+        sstate = PyModule_GetState(self);
+        if (sstate == NULL) {
+            PyErr_SetString(PyExc_RuntimeError, "no module state available");
+            return NULL;
+        }
+    }
+    parts = PyUnicode_Split(s, sstate->QUOTE, -1);
+    if (parts == NULL) {
+        goto error;
+    }
+    parts_len = PyList_Size(parts);
+    if (parts_len < 0) {
+        goto error;
+    }
+    res_parts = PyTuple_New((parts_len-1)*2 + 1);
+    if (res_parts == NULL) {
+        goto error;
+    }
+
+    o = PyList_GetItem(parts, 0);   /* borrowed */
+    if (o == NULL) {
+        goto error;
+    }
+    /*
+     * The first item may be also the empty string if `s' starts with
+     * a quoted character.
+     */
+    Py_INCREF(o);   /* because PyTuple_SetItem steals -- and o is borrowed */
+    PyTuple_SetItem(res_parts, 0, o);
+
+    for (i=1; i<parts_len; i++) {
+        pb = PyList_GetItem(parts, i);   /* borrowed */
+        pb_len = PyUnicode_GetLength(pb);
+        if (pb_len < 1) {
+            PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
+            goto error;
+        }
+        c = PyUnicode_ReadChar(pb, 0);
+        switch (c) {
+        case 0x55:   /* U */
+            if (pb_len < 9) {
+                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
+                goto error;
+            }
+            o = _hex2string(pb, 9);
+            if (o == NULL) {
+                goto error;
+            }
+            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);   /* steals */
+            o = PyUnicode_Substring(pb, 9, pb_len);
+            if (o == NULL) {
+                goto error;
+            }
+            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
+            break;
+        case 0x75:   /* u */
+            if (pb_len < 5) {
+                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
+                goto error;
+            }
+            o = _hex2string(pb, 5);
+            if (o == NULL) {
+                goto error;
+            }
+            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);  /* steals */
+            o = PyUnicode_Substring(pb, 5, pb_len);
+            if (o == NULL) {
+                goto error;
+            }
+            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
+            break;
+        case 0x78:   /* x */
+            if (pb_len < 3) {
+                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
+                goto error;
+            }
+            o = _hex2string(pb, 3);
+            if (o == NULL) {
+                goto error;
+            }
+            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);  /* steals */
+            o = PyUnicode_Substring(pb, 3, pb_len);
+            if (o == NULL) {
+                goto error;
+            }
+            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
+            break;
+
+        default:
+            PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
+            goto error;
+        }
+    }
+
+    res = PyUnicode_Join(sstate->EMPTY_STR, res_parts);
+    if (res == NULL) {
+        goto error;
+    }
+    Py_DECREF(parts);
+    Py_DECREF(res_parts);
+    return res;
+
+error:
+    Py_XDECREF(res_parts);
+    Py_XDECREF(parts);
+    return NULL;
+}
+
+
+static
+PyObject *
+fast_unquote(PyObject *self, PyObject *s)
+{
+    return _fast_unquote(self, s, NULL);
+}
+
+
+static
+PyObject *
+fast_pathstr2path(PyObject *self, PyObject *varname)
+{
+    Py_ssize_t varname_len;
+    PyObject *parts = NULL;
+    Py_ssize_t parts_len;
+    PyObject *res = NULL;
+    Py_ssize_t i;
+    PyObject *o;
+    PyObject *u;
+    struct speedups_state *sstate;
+
+    if (!PyUnicode_Check(varname)) {
+        PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
+        return NULL;
+    }
+    varname_len = PyUnicode_GetLength(varname);
+    if (varname_len < 0) {
+        return NULL;
+    }
+    if (varname_len == 0) {
+        return PyTuple_New(0);
+    }
+
+    sstate = PyModule_GetState(self);
+    if (sstate == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "no module state available");
+        return NULL;
+    }
+    parts = PyUnicode_Split(varname, sstate->DOT, -1);
+    if (parts == NULL) {
+        goto error;
+    }
+    parts_len = PyList_Size(parts);
+    if (parts_len < 0) {
+        goto error;
+    }
+    res = PyTuple_New(parts_len);
+    if (res == NULL) {
+        goto error;
+    }
+    for (i=0; i < parts_len; i++) {
+        o = PyList_GetItem(parts, i);   /* borrowed */
+        u = _fast_unquote(self, o, sstate);
+        if (u == NULL) {
+            goto error;
+        }
+        PyTuple_SetItem(res, i, u);     /* steals */
+    }
+
+    Py_DECREF(parts);
+    return res;
+
+error:
+    Py_XDECREF(parts);
+    Py_XDECREF(res);
+    return NULL;
+}
+
+
+static struct PyMethodDef speedups_methods[] = {
+    {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
+    {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
+    {NULL, NULL, 0, NULL}
+};
+
+
+static
+int
+speedups_exec(PyObject *module)
+{
+    struct speedups_state *sstate = PyModule_GetState(module);
+
+    if (sstate == NULL) {
+        PyErr_SetString(PyExc_ImportError, "no module state available yet");
+        return -1;
+    }
+
+    PyModule_AddStringConstant(module, "__release__", release);
+    PyModule_AddStringConstant(module, "__date__", date);
+    PyModule_AddStringConstant(module, "__author__", "Franz Glasner");
+
+    sstate->DOT = PyUnicode_FromStringAndSize(".", 1);
+    if (sstate->DOT == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->DOT));
+
+    sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1);
+    if (sstate->QUOTE == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->QUOTE));
+
+    sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0);
+    if (sstate->EMPTY_STR == NULL) {
+        return -1;
+    }
+    PyUnicode_InternInPlace(&(sstate->EMPTY_STR));
+
+    return 0;
+}
+
+
+static
+int
+speeeupds_traverse(PyObject *module, visitproc visit, void *arg)
+{
+    struct speedups_state *sstate = PyModule_GetState(module);
+
+    if (sstate != NULL) {
+        Py_VISIT(sstate->DOT);
+        Py_VISIT(sstate->QUOTE);
+        Py_VISIT(sstate->EMPTY_STR);
+    }
+    return 0;
+}
+
+
+static
+int
+speedups_clear(PyObject *module)
+{
+    struct speedups_state *sstate = PyModule_GetState(module);
+
+    if (sstate != NULL) {
+        Py_CLEAR(sstate->DOT);
+        Py_CLEAR(sstate->QUOTE);
+        Py_CLEAR(sstate->EMPTY_STR);
+    }
+    return 0;
+}
+
+
+static struct PyModuleDef_Slot speedups_slots[] = {
+    {Py_mod_exec, speedups_exec},
+    {0, NULL}
+};
+
+
+static struct PyModuleDef speedups_def = {
+    PyModuleDef_HEAD_INIT,                      /* m_base */
+    "_speedups",                                /* m_name  (relative) */
+    PyDoc_STR("Speedups for configmix"),        /* m_doc */
+    sizeof(struct speedups_state),              /* m_size */
+    speedups_methods,                           /* m_methods */
+    speedups_slots,                             /* m_slots */
+    speeeupds_traverse,                         /* m_traverse */
+    speedups_clear,                             /* m_clear */
+    NULL                                        /* m_free */
+};
+
+
+PyMODINIT_FUNC
+PyInit__speedups(void)
+{
+    /*
+     * Use multi-phase extension module initialization (PEP 489).
+     * This is Python 3.5+.
+     */
+    return PyModuleDef_Init(&speedups_def);
+}