view configmix/_speedups.c @ 547:1cbe8b0f2b78

Put the value of Py_LIMITED_API into the module dict when the C-extension is compiled against the stable API
author Franz Glasner <fzglas.hg@dom66.de>
date Sat, 01 Jan 2022 21:18:55 +0100
parents 6501fe0e116c
children 84657447ab39
line wrap: on
line source

/* -*- coding: utf-8 -*- */
/*
 * Speedups for configmix.
 *
 * :Copyright: (c) 2021, Franz Glasner. All rights reserved.
 * :License:   BSD-3-Clause. See LICENSE.txt for details.
 */

#define PY_SSIZE_T_CLEAN
#include "Python.h"


const char _id[] = "@(#)configmix._speedups $Header$";
static const char release[] = "|VCSRevision|";
static const char date[] = "|VCSJustDate|";


/*
 * Module state holds pre-created some objects
 */
struct speedups_state {
    PyObject *DOT;
    PyObject *QUOTE;
    PyObject *NS_SEPARATOR;
    PyObject *EMPTY_STR;
};


static
int
_hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result)
{
    Py_ssize_t i;
    Py_UCS4 c;
    Py_UCS4 r = 0;

    for (i=1; i < end; i++) {
        r *= 16;
        c = PyUnicode_ReadChar(s, i);
        if ((c >= 48) && (c <= 57)) {    /* 0 - 9 */
            r += (c - 48);
        }
        else {
            if ((c >= 97) && (c <= 102)) {    /* a - f */
                r += (c - 87);
            }
            else {
                if ((c >= 65) && (c <= 70)) {   /* A - F */
                    r += (c - 55);
                }
                else {
                    PyErr_SetString(PyExc_ValueError, "invalid base-16 literal");
                    return -1;
                }
            }
        }
    }
    *result = r;
    return 0;  /* success */
}


#if defined(Py_LIMITED_API)

/*
 * Copyright 2001-2004 Unicode, Inc.
 *
 * Disclaimer
 *
 * This source code is provided as is by Unicode, Inc. No claims are
 * made as to fitness for any particular purpose. No warranties of any
 * kind are expressed or implied. The recipient agrees to determine
 * applicability of information provided. If this file has been
 * purchased on magnetic or optical media from Unicode, Inc., the
 * sole remedy for any claim will be exchange of defective media
 * within 90 days of receipt.
 *
 * Limitations on Rights to Redistribute This Code
 *
 * Unicode, Inc. hereby grants the right to freely use the information
 * supplied in this file in the creation of products supporting the
 * Unicode Standard, and to make copies of this file in any form
 * for internal or external distribution as long as this notice
 * remains attached.
 */

#define UNI_MAX_LEGAL_UTF32 (Py_UCS4)0x0010FFFF
#define UNI_SUR_HIGH_START  (Py_UCS4)0xD800
#define UNI_SUR_HIGH_END    (Py_UCS4)0xDBFF
#define UNI_SUR_LOW_START   (Py_UCS4)0xDC00
#define UNI_SUR_LOW_END     (Py_UCS4)0xDFFF


/*
 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
 * into the first byte, depending on how many bytes follow.  There are
 * as many entries in this table as there are UTF-8 sequence types.
 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
 * for *legal* UTF-8 will be 4 or fewer bytes total.
 */
static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };


static
Py_ssize_t
_convert_ucs4_to_utf8(
    Py_UCS4 ch,
    unsigned char *targetStart, unsigned char *targetEnd,
    int strict)
{
    const Py_UCS4 byteMask = 0xBF;
    const Py_UCS4 byteMark = 0x80;

    Py_ssize_t bytesToWrite = 0;
    unsigned char *target = targetStart;

    if (strict) {
        /* UTF-16 surrogate values are illegal */
        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
            PyErr_SetString(PyExc_UnicodeEncodeError,
                            "surrogate values not allowed");
            return -1;
        }
    }
    /*
     * Figure out how many bytes the result will require. Turn any
     * illegally large UTF32 things (> Plane 17) into replacement chars.
     */
    if (ch < (Py_UCS4)0x80) {
        bytesToWrite = 1;
    } else if (ch < (Py_UCS4)0x800) {
        bytesToWrite = 2;
    } else if (ch < (Py_UCS4)0x10000) {
        bytesToWrite = 3;
    } else if (ch <= UNI_MAX_LEGAL_UTF32) {
        bytesToWrite = 4;
    } else {
        PyErr_SetString(PyExc_UnicodeEncodeError,
                        "max Unicode codepoint value exceeded");
        return -1;
    }

    target += bytesToWrite;
    if (target > targetEnd) {
        PyErr_SetString(PyExc_UnicodeEncodeError,
                        "target exhausted");
        return -1;
    }
    switch (bytesToWrite) { /* note: everything falls through. */
    case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
    case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
    case 2: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6;
    case 1: *--target = (unsigned char) (ch | firstByteMark[bytesToWrite]);
    }
    return bytesToWrite;
}


static
PyObject *
_hex2string(PyObject *s, Py_ssize_t end)
{
    Py_UCS4 c;
    unsigned char buf[6];
    Py_ssize_t buf_bytes;
    PyObject *u;

    if (_hex2ucs4(s, end, &c) != 0)
        return NULL;

    /* Replace the combination PyUniode_New/PyUnicode_WriteChar */
    buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1);
    if (buf_bytes < 0) {
        return NULL;
    }
    u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes);
    if (u == NULL) {
        return NULL;
    }
    return u;
}

#else

static
PyObject *
_hex2string(PyObject *s, Py_ssize_t end)
{
    Py_UCS4 c;
    PyObject *u = NULL;

    if (_hex2ucs4(s, end, &c) != 0)
        return NULL;
    u = PyUnicode_New(1, c);    /* ARGH: not  in the stable API */
    if (u == NULL)
        return NULL;
    if (PyUnicode_WriteChar(u, 0, c) != 0) {
        Py_DECREF(u);
        return NULL;
    }
    return u;
}

#endif /* Py_LIMITED_API */


static
PyObject *
_fast_unquote(PyObject *self, PyObject *s, Py_ssize_t s_len, struct speedups_state *sstate)
{
    Py_ssize_t find;
    Py_ssize_t parts_len;
    PyObject *res;
    PyObject *res_parts = NULL;
    PyObject *parts = NULL;
    PyObject *o;
    PyObject *pb;
    Py_ssize_t pb_len;
    Py_ssize_t i;
    Py_UCS4 c;

    if (s_len < 0) {
        s_len = PyUnicode_GetLength(s);
        if (s_len < 0) {
            return NULL;
        }
    }
    if (s_len == 0) {
        Py_INCREF(s);
        return s;
    }
    find = PyUnicode_FindChar(s, '%', 0, s_len, 1);
    if (find == -2) {
        return NULL;
    }
    if (find == -1) {
        Py_INCREF(s);
        return s;
    }

    if (sstate == NULL) {
        sstate = PyModule_GetState(self);
        if (sstate == NULL) {
            PyErr_SetString(PyExc_RuntimeError, "no module state available");
            return NULL;
        }
    }
    parts = PyUnicode_Split(s, sstate->QUOTE, -1);
    if (parts == NULL) {
        goto error;
    }
    parts_len = PyList_Size(parts);
    if (parts_len < 0) {
        goto error;
    }
    res_parts = PyTuple_New((parts_len-1)*2 + 1);
    if (res_parts == NULL) {
        goto error;
    }

    o = PyList_GetItem(parts, 0);   /* borrowed */
    if (o == NULL) {
        goto error;
    }
    /*
     * The first item may be also the empty string if `s' starts with
     * a quoted character.
     */
    Py_INCREF(o);   /* because PyTuple_SetItem steals -- and o is borrowed */
    PyTuple_SetItem(res_parts, 0, o);

    for (i=1; i<parts_len; i++) {
        pb = PyList_GetItem(parts, i);   /* borrowed */
        pb_len = PyUnicode_GetLength(pb);
        if (pb_len < 1) {
            PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
            goto error;
        }
        c = PyUnicode_ReadChar(pb, 0);
        switch (c) {
        case 0x55:   /* U */
            if (pb_len < 9) {
                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
                goto error;
            }
            o = _hex2string(pb, 9);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);   /* steals */
            o = PyUnicode_Substring(pb, 9, pb_len);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
            break;
        case 0x75:   /* u */
            if (pb_len < 5) {
                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
                goto error;
            }
            o = _hex2string(pb, 5);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);  /* steals */
            o = PyUnicode_Substring(pb, 5, pb_len);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
            break;
        case 0x78:   /* x */
            if (pb_len < 3) {
                PyErr_SetString(PyExc_ValueError, "quote syntax: length too small");
                goto error;
            }
            o = _hex2string(pb, 3);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, (i-1)*2 + 1, o);  /* steals */
            o = PyUnicode_Substring(pb, 3, pb_len);
            if (o == NULL) {
                goto error;
            }
            PyTuple_SetItem(res_parts, i*2, o);    /* steals */
            break;

        default:
            PyErr_SetString(PyExc_ValueError, "unknown quote syntax string");
            goto error;
        }
    }

    res = PyUnicode_Join(sstate->EMPTY_STR, res_parts);
    if (res == NULL) {
        goto error;
    }
    Py_DECREF(parts);
    Py_DECREF(res_parts);
    return res;

error:
    Py_XDECREF(res_parts);
    Py_XDECREF(parts);
    return NULL;
}


static
PyObject *
fast_unquote(PyObject *self, PyObject *s)
{
    return _fast_unquote(self, s, -1, NULL);
}


static
PyObject *
fast_pathstr2path(PyObject *self, PyObject *varname)
{
    Py_ssize_t varname_len;
    PyObject *parts = NULL;
    Py_ssize_t parts_len;
    PyObject *res = NULL;
    Py_ssize_t i;
    PyObject *o;
    PyObject *u;
    struct speedups_state *sstate;

    if (!PyUnicode_Check(varname)) {
        PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected");
        return NULL;
    }
    varname_len = PyUnicode_GetLength(varname);
    if (varname_len < 0) {
        return NULL;
    }
    if (varname_len == 0) {
        return PyTuple_New(0);
    }

    sstate = PyModule_GetState(self);
    if (sstate == NULL) {
        PyErr_SetString(PyExc_RuntimeError, "no module state available");
        return NULL;
    }
    parts = PyUnicode_Split(varname, sstate->DOT, -1);
    if (parts == NULL) {
        goto error;
    }
    parts_len = PyList_Size(parts);
    if (parts_len < 0) {
        goto error;
    }
    res = PyTuple_New(parts_len);
    if (res == NULL) {
        goto error;
    }
    for (i=0; i < parts_len; i++) {
        o = PyList_GetItem(parts, i);   /* borrowed */
        u = _fast_unquote(self, o, -1, sstate);
        if (u == NULL) {
            goto error;
        }
        PyTuple_SetItem(res, i, u);     /* steals */
    }

    Py_DECREF(parts);
    return res;

error:
    Py_XDECREF(parts);
    Py_XDECREF(res);
    return NULL;
}


static
PyObject *
fast_split_ns(PyObject *self, PyObject *varname)
{
    PyObject *res = NULL;
    Py_ssize_t ns_idx;
    Py_ssize_t varname_len;
    PyObject *o1;
    PyObject *o2;

    varname_len = PyUnicode_GetLength(varname);
    if (varname_len < 0) {
        return NULL;
    }
    ns_idx = PyUnicode_FindChar(varname, ':', 0, varname_len, 1);
    if (ns_idx == -2) {
        return NULL;
    }
    if (ns_idx == -1) {
        res = PyTuple_New(2);
        if (res == NULL) {
            return NULL;
        }
        Py_INCREF(Py_None);
        PyTuple_SetItem(res, 0, Py_None);  /* steals */
        Py_INCREF(varname);
        PyTuple_SetItem(res, 1, varname);  /* steals */
        return res;
    }

    res = PyTuple_New(2);
    if (res == NULL) {
        return NULL;
    }
    o1 = PyUnicode_Substring(varname, 0, ns_idx);
    if (o1 == NULL) {
        Py_DECREF(res);
        return NULL;
    }
    o2 = _fast_unquote(self, o1, ns_idx, NULL);
    if (o2 == NULL) {
        Py_DECREF(o1);
        Py_DECREF(res);
        return NULL;
    }
    Py_DECREF(o1);
    PyTuple_SetItem(res, 0, o2);    /* steals */
    o1 = PyUnicode_Substring(varname, ns_idx+1, varname_len);
    if (o1 == NULL) {
        Py_DECREF(res);
        return NULL;
    }
    PyTuple_SetItem(res, 1, o1);    /* steals */
    return res;
}


static struct PyMethodDef speedups_methods[] = {
    {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")},
    {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")},
    {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")},
    {NULL, NULL, 0, NULL}
};

#define STRINGIFY(s) #s
#define XSTRINGIFY(s) STRINGIFY(s)


static
int
speedups_exec(PyObject *module)
{
    struct speedups_state *sstate = PyModule_GetState(module);

    if (sstate == NULL) {
        PyErr_SetString(PyExc_ImportError, "no module state available yet");
        return -1;
    }

    PyModule_AddStringConstant(module, "__release__", release);
    PyModule_AddStringConstant(module, "__date__", date);
    PyModule_AddStringConstant(module, "__author__", "Franz Glasner");
#if defined(Py_LIMITED_API)
    PyModule_AddStringConstant(module, "Py_LIMITED_API", XSTRINGIFY(Py_LIMITED_API));
#endif

    sstate->DOT = PyUnicode_FromStringAndSize(".", 1);
    if (sstate->DOT == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->DOT));

    sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1);
    if (sstate->QUOTE == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->QUOTE));

    sstate->NS_SEPARATOR = PyUnicode_FromStringAndSize(":", 1);
    if (sstate->NS_SEPARATOR == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->NS_SEPARATOR));

    sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0);
    if (sstate->EMPTY_STR == NULL) {
        return -1;
    }
    PyUnicode_InternInPlace(&(sstate->EMPTY_STR));

    return 0;
}


static
int
speeeupds_traverse(PyObject *module, visitproc visit, void *arg)
{
    struct speedups_state *sstate = PyModule_GetState(module);

    if (sstate != NULL) {
        Py_VISIT(sstate->DOT);
        Py_VISIT(sstate->QUOTE);
        Py_VISIT(sstate->NS_SEPARATOR);
        Py_VISIT(sstate->EMPTY_STR);
    }
    return 0;
}


static
int
speedups_clear(PyObject *module)
{
    struct speedups_state *sstate = PyModule_GetState(module);

    if (sstate != NULL) {
        Py_CLEAR(sstate->DOT);
        Py_CLEAR(sstate->QUOTE);
        Py_CLEAR(sstate->NS_SEPARATOR);
        Py_CLEAR(sstate->EMPTY_STR);
    }
    return 0;
}


static struct PyModuleDef_Slot speedups_slots[] = {
    {Py_mod_exec, speedups_exec},
    {0, NULL}
};


static struct PyModuleDef speedups_def = {
    PyModuleDef_HEAD_INIT,                      /* m_base */
    "_speedups",                                /* m_name  (relative) */
    PyDoc_STR("Speedups for configmix"),        /* m_doc */
    sizeof(struct speedups_state),              /* m_size */
    speedups_methods,                           /* m_methods */
    speedups_slots,                             /* m_slots */
    speeeupds_traverse,                         /* m_traverse */
    speedups_clear,                             /* m_clear */
    NULL                                        /* m_free */
};


PyMODINIT_FUNC
PyInit__speedups(void)
{
    /*
     * Use multi-phase extension module initialization (PEP 489).
     * This is Python 3.5+.
     */
    return PyModuleDef_Init(&speedups_def);
}