view configmix/_speedups.c @ 543:491413368c7c

Added also a fast C-implementation of configmix.config._split_ns
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 01 Jan 2022 18:01:32 +0100
parents f71d34dda19f
children db2d108e14e7
line wrap: on
line source

/* -*- coding: utf-8 -*- */
/*
 * Speedups for configmix.
 *
 * :Copyright: (c) 2021, Franz Glasner. All rights reserved.
 * :License:   BSD-3-Clause. See LICENSE.txt for details.
 */

#define PY_SSIZE_T_CLEAN
#include "Python.h"


const char _id[] = "@(#)configmix._speedups $Header$";
static const char release[] = "|VCSRevision|";
static const char date[] = "|VCSJustDate|";


/*
 * Module state holds pre-created some objects
 */
struct speedups_state {
    PyObject *DOT;
    PyObject *QUOTE;
    PyObject *NS_SEPARATOR;
    PyObject *EMPTY_STR;
};


static
int
_hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result)
{
    Py_ssize_t i;
    Py_UCS4 c;
    Py_UCS4 r = 0;

    for (i=1; i < end; i++) {
        r *= 16;
        c = PyUnicode_ReadChar(s, i);
        if ((c >= 48) && (c <= 57)) {    /* 0 - 9 */
            r += (c - 48);
        }
        else {
            if ((c >= 97) && (c <= 102)) {    /* a - f */
                r += (c - 87);
            }
            else {
                if ((c >= 65) && (c <= 70)) {   /* A - F */
                    r += (c - 55);
                }
                else {
                    PyErr_SetString(PyExc_ValueError, "invalid base-16 literal");
                    return -1;
                }
            }
        }
    }
    *result = r;
    return 0;  /* success */
}


static
PyObject *
_hex2string(PyObject *s, Py_ssize_t end)
{
    Py_UCS4 c;
    PyObject *u = NULL;

    if (_hex2ucs4(s, end, &c) != 0)
        return NULL;
    u = PyUnicode_New(1, c);    /* ARGH: not  in the stable API */
    if (u == NULL)
        return NULL;
    if (PyUnicode_WriteChar(u, 0, c) != 0) {
        Py_DECREF(u);
        return NULL;
    }
    return u;
}


static
PyObject *
_fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate)
{
    Py_ssize_t find;
    Py_ssize_t parts_len;
    PyObject *res;
    PyObject *res_parts = NULL;
    PyObject *parts = NULL;
    PyObject *o;
    PyObject *pb;
    Py_ssize_t pb_len;
    Py_ssize_t i;
    Py_UCS4 c;

    if (!PyUnicode_Check(s)) {
        PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
        return NULL;
    }
    if (s_len < 0) {
        s_len = PyUnicode_GetLength(s);
        if (s_len < 0) {
            return NULL;
        }
    }
    if (s_len == 0) {
        Py_INCREF(s);
        return s;
    }
    find = PyUnicode_FindChar(s, '%', 0, s_len, 1);
    if (find == -2) {
        return NULL;
    }
    if (find == -1) {
        Py_INCREF(s);
        return s;
    }

    if (sstate == NULL) {
        sstate = PyModule_GetState(self);
        if (sstate == NULL) {
            PyErr_SetString(PyExc_RuntimeError, "no module state available");
            return NULL;
        }
    }
    parts = PyUnicode_Split(s, sstate->QUOTE, -1);
    if (parts == NULL) {
        goto error;
    }
    parts_len = PyList_Size(parts);
    if (parts_len < 0) {
        goto error;
    }
    res_parts = PyTuple_New((parts_len-1)*2 + 1);
    if (res_parts == NULL) {
        goto error;
    }

    o = PyList_GetItem(parts, 0);   /* borrowed */
    if (o == NULL) {
        goto error;
    }
    /*
     * The first item may be also the empty string if `s' starts with
     * a quoted character.
     */
    Py_INCREF(o);   /* because PyTuple_SetItem steals -- and o is borrowed */
    PyTuple_SetItem(res_parts, 0, o);

    for (i=1; i<parts_len; i++) {
        pb = PyList_GetItem(parts, i);   /* borrowed */
        pb_len = PyUnicode_GetLength(pb);
        if (pb_len < 1) {
            PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
            goto error;
        }
        c = PyUnicode_ReadChar(pb, 0);
        switch (c) {
        case 0x55:   /* U */
            if (pb_len < 9) {
                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
                goto error;
            }
            o = _hex2string(pb, 9);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);   /* steals */
            o = PyUnicode_Substring(pb, 9, pb_len);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
            break;
        case 0x75:   /* u */
            if (pb_len < 5) {
                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
                goto error;
            }
            o = _hex2string(pb, 5);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);  /* steals */
            o = PyUnicode_Substring(pb, 5, pb_len);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
            break;
        case 0x78:   /* x */
            if (pb_len < 3) {
                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
                goto error;
            }
            o = _hex2string(pb, 3);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);  /* steals */
            o = PyUnicode_Substring(pb, 3, pb_len);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
            break;

        default:
            PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
            goto error;
        }
    }

    res = PyUnicode_Join(sstate->EMPTY_STR, res_parts);
    if (res == NULL) {
        goto error;
    }
    Py_DECREF(parts);
    Py_DECREF(res_parts);
    return res;

error:
    Py_XDECREF(res_parts);
    Py_XDECREF(parts);
    return NULL;
}


static
PyObject *
fast_unquote(PyObject *self, PyObject *s)
{
    return _fast_unquote(self, s, -1, NULL);
}


static
PyObject *
fast_pathstr2path(PyObject *self, PyObject *varname)
{
    Py_ssize_t varname_len;
    PyObject *parts = NULL;
    Py_ssize_t parts_len;
    PyObject *res = NULL;
    Py_ssize_t i;
    PyObject *o;
    PyObject *u;
    struct speedups_state *sstate;

    if (!PyUnicode_Check(varname)) {
        PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
        return NULL;
    }
    varname_len = PyUnicode_GetLength(varname);
    if (varname_len < 0) {
        return NULL;
    }
    if (varname_len == 0) {
        return PyTuple_New(0);
    }

    sstate = PyModule_GetState(self);
    if (sstate == NULL) {
        PyErr_SetString(PyExc_RuntimeError, "no module state available");
        return NULL;
    }
    parts = PyUnicode_Split(varname, sstate->DOT, -1);
    if (parts == NULL) {
        goto error;
    }
    parts_len = PyList_Size(parts);
    if (parts_len < 0) {
        goto error;
    }
    res = PyTuple_New(parts_len);
    if (res == NULL) {
        goto error;
    }
    for (i=0; i < parts_len; i++) {
        o = PyList_GetItem(parts, i);   /* borrowed */
        u = _fast_unquote(self, o, -1, sstate);
        if (u == NULL) {
            goto error;
        }
        PyTuple_SetItem(res, i, u);     /* steals */
    }

    Py_DECREF(parts);
    return res;

error:
    Py_XDECREF(parts);
    Py_XDECREF(res);
    return NULL;
}


static
PyObject *
fast_split_ns(PyObject *self, PyObject *varname)
{
    PyObject *res = NULL;
    Py_ssize_t ns_idx;
    Py_ssize_t varname_len;
    PyObject *o1;
    PyObject *o2;

    varname_len = PyUnicode_GetLength(varname);
    if (varname_len < 0) {
        return NULL;
    }
    ns_idx = PyUnicode_FindChar(varname, ':', 0, varname_len, 1);
    if (ns_idx == -2) {
        return NULL;
    }
    if (ns_idx == -1) {
        res = PyTuple_New(2);
        if (res == NULL) {
            return NULL;
        }
        Py_INCREF(Py_None);
        PyTuple_SetItem(res, 0, Py_None);  /* steals */
        Py_INCREF(varname);
        PyTuple_SetItem(res, 1, varname);  /* steals */
        return res;
    }

    res = PyTuple_New(2);
    if (res == NULL) {
        return NULL;
    }
    o1 = PyUnicode_Substring(varname, 0, ns_idx);
    if (o1 == NULL) {
        Py_DECREF(res);
        return NULL;
    }
    o2 = _fast_unquote(self, o1, ns_idx, NULL);
    if (o2 == NULL) {
        Py_DECREF(o1);
        Py_DECREF(res);
        return NULL;
    }
    Py_DECREF(o1);
    PyTuple_SetItem(res, 0, o2);    /* steals */
    o1 = PyUnicode_Substring(varname, ns_idx+1, varname_len);
    if (o1 == NULL) {
        Py_DECREF(res);
        return NULL;
    }
    PyTuple_SetItem(res, 1, o1);    /* steals */
    return res;
}


static struct PyMethodDef speedups_methods[] = {
    {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
    {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
    {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")},
    {NULL, NULL, 0, NULL}
};


static
int
speedups_exec(PyObject *module)
{
    struct speedups_state *sstate = PyModule_GetState(module);

    if (sstate == NULL) {
        PyErr_SetString(PyExc_ImportError, "no module state available yet");
        return -1;
    }

    PyModule_AddStringConstant(module, "__release__", release);
    PyModule_AddStringConstant(module, "__date__", date);
    PyModule_AddStringConstant(module, "__author__", "Franz Glasner");

    sstate->DOT = PyUnicode_FromStringAndSize(".", 1);
    if (sstate->DOT == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->DOT));

    sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1);
    if (sstate->QUOTE == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->QUOTE));

    sstate->NS_SEPARATOR = PyUnicode_FromStringAndSize(":", 1);
    if (sstate->NS_SEPARATOR == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->NS_SEPARATOR));

    sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0);
    if (sstate->EMPTY_STR == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->EMPTY_STR));

    return 0;
}


static
int
speeeupds_traverse(PyObject *module, visitproc visit, void *arg)
{
    struct speedups_state *sstate = PyModule_GetState(module);

    if (sstate != NULL) {
        Py_VISIT(sstate->DOT);
        Py_VISIT(sstate->QUOTE);
        Py_VISIT(sstate->NS_SEPARATOR);
        Py_VISIT(sstate->EMPTY_STR);
    }
    return 0;
}


static
int
speedups_clear(PyObject *module)
{
    struct speedups_state *sstate = PyModule_GetState(module);

    if (sstate != NULL) {
        Py_CLEAR(sstate->DOT);
        Py_CLEAR(sstate->QUOTE);
        Py_CLEAR(sstate->NS_SEPARATOR);
        Py_CLEAR(sstate->EMPTY_STR);
    }
    return 0;
}


static struct PyModuleDef_Slot speedups_slots[] = {
    {Py_mod_exec, speedups_exec},
    {0, NULL}
};


static struct PyModuleDef speedups_def = {
    PyModuleDef_HEAD_INIT,                      /* m_base */
    "_speedups",                                /* m_name  (relative) */
    PyDoc_STR("Speedups for configmix"),        /* m_doc */
    sizeof(struct speedups_state),              /* m_size */
    speedups_methods,                           /* m_methods */
    speedups_slots,                             /* m_slots */
    speeeupds_traverse,                         /* m_traverse */
    speedups_clear,                             /* m_clear */
    NULL                                        /* m_free */
};


PyMODINIT_FUNC
PyInit__speedups(void)
{
    /*
     * Use multi-phase extension module initialization (PEP 489).
     * This is Python 3.5+.
     */
    return PyModuleDef_Init(&speedups_def);
}