Mercurial > hgrepos > Python > libs > ConfigMix
view configmix/_speedups.c @ 553:9d2bd411f5c5
Do not rstrip() the remaining variable name when parsing out filters from variable names
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 02 Jan 2022 20:43:24 +0100 |
| parents | 39e5d07d8dbc |
| children | 36d7aa000435 |
line wrap: on
line source
/* -*- coding: utf-8 -*- */ /* * Speedups for configmix. * * :Copyright: (c) 2021, Franz Glasner. All rights reserved. * :License: BSD-3-Clause. See LICENSE.txt for details. */ #define PY_SSIZE_T_CLEAN #include "Python.h" const char _id[] = "@(#)configmix._speedups $Header$"; static const char release[] = "|VCSRevision|"; static const char date[] = "|VCSJustDate|"; /* * Module state holds pre-created some objects */ struct speedups_state { PyObject *DOT; PyObject *QUOTE; PyObject *NS_SEPARATOR; PyObject *FILTER_SEPARATOR; PyObject *EMPTY_STR; PyObject *QUOTE_MAP; }; static int _hex2ucs4(PyObject *s, Py_ssize_t end, Py_UCS4 *result) { Py_ssize_t i; Py_UCS4 c; Py_UCS4 r = 0; for (i=1; i < end; i++) { r *= 16; c = PyUnicode_ReadChar(s, i); if ((c >= 48) && (c <= 57)) { /* 0 - 9 */ r += (c - 48); } else { if ((c >= 97) && (c <= 102)) { /* a - f */ r += (c - 87); } else { if ((c >= 65) && (c <= 70)) { /* A - F */ r += (c - 55); } else { PyErr_SetString(PyExc_ValueError, "invalid base-16 literal"); return -1; } } } } *result = r; return 0; /* success */ } #if defined(Py_LIMITED_API) static void _raise_utf8_encode_error(PyObject *s, Py_ssize_t start, Py_ssize_t end, const char *reason) { /* * See also: https://docs.python.org/3/c-api/exceptions.html#unicode-exception-objects */ PyObject *errobj = PyObject_CallFunction( PyExc_UnicodeEncodeError, "sOnns", "utf-8", s, start, end, reason); if (errobj == NULL) { /* cannot do anything here */ return; } /* Make PyExc_UnicodeEncodeError owned because PyErr_Restore steals */ //Py_INCREF(PyExc_UnicodeEncodeError); //PyErr_Restore(PyExc_UnicodeEncodeError, errobj, NULL); PyErr_SetObject(PyExc_UnicodeEncodeError, errobj); Py_DECREF(errobj); } /* * Copyright 2001-2004 Unicode, Inc. * * Disclaimer * * This source code is provided as is by Unicode, Inc. No claims are * made as to fitness for any particular purpose. No warranties of any * kind are expressed or implied. The recipient agrees to determine * applicability of information provided. If this file has been * purchased on magnetic or optical media from Unicode, Inc., the * sole remedy for any claim will be exchange of defective media * within 90 days of receipt. * * Limitations on Rights to Redistribute This Code * * Unicode, Inc. hereby grants the right to freely use the information * supplied in this file in the creation of products supporting the * Unicode Standard, and to make copies of this file in any form * for internal or external distribution as long as this notice * remains attached. */ #define UNI_MAX_LEGAL_UTF32 (Py_UCS4)0x0010FFFF #define UNI_SUR_HIGH_START (Py_UCS4)0xD800 #define UNI_SUR_HIGH_END (Py_UCS4)0xDBFF #define UNI_SUR_LOW_START (Py_UCS4)0xDC00 #define UNI_SUR_LOW_END (Py_UCS4)0xDFFF /* * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed * into the first byte, depending on how many bytes follow. There are * as many entries in this table as there are UTF-8 sequence types. * (I.e., one byte sequence, two byte... etc.). Remember that sequencs * for *legal* UTF-8 will be 4 or fewer bytes total. */ static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; static Py_ssize_t _convert_ucs4_to_utf8( Py_UCS4 ch, PyObject *ch_obj, /* for error messages: the string where ch comes from */ Py_ssize_t ch_obj_end, /* effective length of ch_obj (error reporting) */ unsigned char *targetStart, unsigned char *targetEnd, int strict) { const Py_UCS4 byteMask = 0xBF; const Py_UCS4 byteMark = 0x80; Py_ssize_t bytesToWrite = 0; unsigned char *target = targetStart; if (strict) { /* UTF-16 surrogate values are illegal */ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { _raise_utf8_encode_error( ch_obj, 1, ch_obj_end, "surrogate values are illegal in UCS4"); return -1; } } /* * Figure out how many bytes the result will require. Turn any * illegally large UTF32 things (> Plane 17) into replacement chars. */ if (ch < (Py_UCS4)0x80) { bytesToWrite = 1; } else if (ch < (Py_UCS4)0x800) { bytesToWrite = 2; } else if (ch < (Py_UCS4)0x10000) { bytesToWrite = 3; } else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4; } else { _raise_utf8_encode_error( ch_obj, 1, ch_obj_end, "max Unicode codepoint value exceeded"); return -1; } target += bytesToWrite; if (target > targetEnd) { _raise_utf8_encode_error( ch_obj, 1, ch_obj_end, "temporary target buffer exhausted"); return -1; } switch (bytesToWrite) { /* note: everything falls through. */ case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; case 2: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; case 1: *--target = (unsigned char) (ch | firstByteMark[bytesToWrite]); } return bytesToWrite; } /* * End of Copyright 2001-2004 Unicode, Inc. */ static PyObject * _hex2string(PyObject *s, Py_ssize_t end) { Py_UCS4 c; unsigned char buf[6]; Py_ssize_t buf_bytes; PyObject *u; if (_hex2ucs4(s, end, &c) != 0) return NULL; /* Replace the combination PyUniode_New/PyUnicode_WriteChar */ buf_bytes = _convert_ucs4_to_utf8(c, s, end+1, buf, &(buf[6]), 1); if (buf_bytes < 0) { return NULL; } u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes); if (u == NULL) { return NULL; } return u; } #else static PyObject * _hex2string(PyObject *s, Py_ssize_t end) { Py_UCS4 c; PyObject *u = NULL; if (_hex2ucs4(s, end, &c) != 0) return NULL; u = PyUnicode_New(1, c); /* ARGH: not in the stable API */ if (u == NULL) return NULL; if (PyUnicode_WriteChar(u, 0, c) != 0) { Py_DECREF(u); return NULL; } return u; } #endif /* Py_LIMITED_API */ static PyObject * _fast_unquote(PyObject *s, Py_ssize_t s_len, PyObject *self, struct speedups_state *sstate) { Py_ssize_t find; Py_ssize_t parts_len; PyObject *res; PyObject *res_parts = NULL; PyObject *parts = NULL; PyObject *o; PyObject *pb; Py_ssize_t pb_len; Py_ssize_t i; Py_UCS4 c; if (s_len < 0) { s_len = PyUnicode_GetLength(s); if (s_len < 0) { return NULL; } } if (s_len == 0) { Py_INCREF(s); return s; } find = PyUnicode_FindChar(s, '%', 0, s_len, 1); if (find == -2) { return NULL; } if (find == -1) { Py_INCREF(s); return s; } if (sstate == NULL) { sstate = PyModule_GetState(self); if (sstate == NULL) { PyErr_SetString(PyExc_RuntimeError, "no module state available"); return NULL; } } parts = PyUnicode_Split(s, sstate->QUOTE, -1); if (parts == NULL) { goto error; } parts_len = PyList_Size(parts); if (parts_len < 0) { goto error; } res_parts = PyTuple_New((parts_len-1)*2 + 1); if (res_parts == NULL) { goto error; } o = PyList_GetItem(parts, 0); /* borrowed */ if (o == NULL) { goto error; } /* * The first item may be also the empty string if `s' starts with * a quoted character. */ Py_INCREF(o); /* because PyTuple_SetItem steals -- and o is borrowed */ PyTuple_SetItem(res_parts, 0, o); for (i=1; i<parts_len; i++) { pb = PyList_GetItem(parts, i); /* borrowed */ pb_len = PyUnicode_GetLength(pb); if (pb_len < 1) { PyErr_SetString(PyExc_ValueError, "unknown quote syntax string"); goto error; } c = PyUnicode_ReadChar(pb, 0); switch (c) { case 0x55: /* U */ if (pb_len < 9) { PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); goto error; } o = _hex2string(pb, 9); if (o == NULL) { goto error; } PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ o = PyUnicode_Substring(pb, 9, pb_len); if (o == NULL) { goto error; } PyTuple_SetItem(res_parts, i*2, o); /* steals */ break; case 0x75: /* u */ if (pb_len < 5) { PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); goto error; } o = _hex2string(pb, 5); if (o == NULL) { goto error; } PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ o = PyUnicode_Substring(pb, 5, pb_len); if (o == NULL) { goto error; } PyTuple_SetItem(res_parts, i*2, o); /* steals */ break; case 0x78: /* x */ if (pb_len < 3) { PyErr_SetString(PyExc_ValueError, "quote syntax: length too small"); goto error; } o = _hex2string(pb, 3); if (o == NULL) { goto error; } PyTuple_SetItem(res_parts, (i-1)*2 + 1, o); /* steals */ o = PyUnicode_Substring(pb, 3, pb_len); if (o == NULL) { goto error; } PyTuple_SetItem(res_parts, i*2, o); /* steals */ break; default: PyErr_SetString(PyExc_ValueError, "unknown quote syntax string"); goto error; } } res = PyUnicode_Join(sstate->EMPTY_STR, res_parts); if (res == NULL) { goto error; } Py_DECREF(parts); Py_DECREF(res_parts); return res; error: Py_XDECREF(res_parts); Py_XDECREF(parts); return NULL; } static PyObject * fast_unquote(PyObject *self, PyObject *s) { return _fast_unquote(s, -1, self, NULL); } static PyObject * fast_quote(PyObject *self, PyObject *s) { Py_ssize_t s_len; Py_ssize_t i; Py_UCS4 c; int need_quoting; struct speedups_state *sstate; s_len = PyUnicode_GetLength(s); if (s_len < 0) { return NULL; } if (s_len == 0) { Py_INCREF(s); return s; } need_quoting = 0; for (i=0; i<s_len; i++) { c = PyUnicode_ReadChar(s, i); /* type already checked */ switch (c) { case 0x25: case 0x2e: case 0x3a: case 0x23: case 0x7c: case 0x22: case 0x27: case 0x7b: case 0x7d: case 0x5b: case 0x5d: need_quoting = 1; i = s_len; /* break the for-loop */ break; default: /* VOID */ ; } } if (!need_quoting) { Py_INCREF(s); return s; } sstate = PyModule_GetState(self); if (sstate == NULL) { PyErr_SetString(PyExc_RuntimeError, "no module state available"); return NULL; } return PyUnicode_Translate(s, sstate->QUOTE_MAP, "strict"); } static PyObject * fast_pathstr2path(PyObject *self, PyObject *varname) { Py_ssize_t varname_len; PyObject *parts = NULL; Py_ssize_t parts_len; PyObject *res = NULL; Py_ssize_t i; PyObject *o; PyObject *u; struct speedups_state *sstate; if (!PyUnicode_Check(varname)) { PyErr_SetString(PyExc_TypeError, "a (unicode) string type is expected"); return NULL; } varname_len = PyUnicode_GetLength(varname); if (varname_len < 0) { return NULL; } if (varname_len == 0) { return PyTuple_New(0); } sstate = PyModule_GetState(self); if (sstate == NULL) { PyErr_SetString(PyExc_RuntimeError, "no module state available"); return NULL; } parts = PyUnicode_Split(varname, sstate->DOT, -1); if (parts == NULL) { goto error; } parts_len = PyList_Size(parts); if (parts_len < 0) { goto error; } res = PyTuple_New(parts_len); if (res == NULL) { goto error; } for (i=0; i < parts_len; i++) { o = PyList_GetItem(parts, i); /* borrowed */ u = _fast_unquote(o, -1, NULL, sstate); if (u == NULL) { goto error; } PyTuple_SetItem(res, i, u); /* steals */ } Py_DECREF(parts); return res; error: Py_XDECREF(parts); Py_XDECREF(res); return NULL; } static PyObject * _fast_split_filters(PyObject *varname, PyObject *self, struct speedups_state *sstate) { Py_ssize_t varname_len; Py_ssize_t sep; PyObject *res = NULL; PyObject *filters = NULL; PyObject *name = NULL; PyObject *tmp; varname_len = PyUnicode_GetLength(varname); if (varname_len < 0) { return NULL; } if (varname_len == 0) { sep = -1; } else { sep = PyUnicode_FindChar(varname, '|', 0, varname_len, 1); if (sep == -2) { return NULL; } } if (sep == -1) { res = PyTuple_New(2); if (res == NULL) { goto error; } Py_INCREF(varname); /* because PyTuple_SetItem steals */ PyTuple_SetItem(res, 0, varname); /* steals */ filters = PyList_New(0); if (filters == NULL) { goto error; } PyTuple_SetItem(res, 1, filters); /* steals */ return res; } name = PyUnicode_Substring(varname, 0, sep); if (name == NULL) { goto error; } filters = PyUnicode_Substring(varname, sep+1, varname_len); if (filters == NULL) { goto error; } tmp = PyObject_CallMethod(filters, "strip", NULL); if (tmp == NULL) { goto error; } Py_DECREF(filters); filters = tmp; if (PyObject_Not(filters)) { Py_DECREF(filters); filters = NULL; res = PyTuple_New(2); if (res == NULL) { goto error; } PyTuple_SetItem(res, 0, name); /* steals */ name = NULL; /* no ownership any more */ filters = PyList_New(0); if (filters == NULL) { goto error; } PyTuple_SetItem(res, 1, filters); /* steals */ return res; } if (sstate == NULL) { sstate = PyModule_GetState(self); if (sstate == NULL) { PyErr_SetString(PyExc_RuntimeError, "no module state available"); goto error; } } tmp = PyUnicode_Split(filters, sstate->FILTER_SEPARATOR, -1); if (tmp == NULL) { goto error; } Py_DECREF(filters); filters = tmp; res = PyTuple_New(2); if (res == NULL) { goto error; } PyTuple_SetItem(res, 0, name); /* steals -- ownership changed */ PyTuple_SetItem(res, 1, filters); /* steals -- ownership changed */ return res; error: Py_XDECREF(res); Py_XDECREF(filters); Py_XDECREF(name); return NULL; } static PyObject * fast_split_filters(PyObject *self, PyObject *varname) { return _fast_split_filters(varname, self, NULL); } static PyObject * fast_split_ns(PyObject *self, PyObject *varname) { PyObject *res = NULL; Py_ssize_t ns_idx; Py_ssize_t varname_len; PyObject *o1; PyObject *o2; varname_len = PyUnicode_GetLength(varname); if (varname_len < 0) { return NULL; } ns_idx = PyUnicode_FindChar(varname, ':', 0, varname_len, 1); if (ns_idx == -2) { return NULL; } if (ns_idx == -1) { res = PyTuple_New(2); if (res == NULL) { return NULL; } Py_INCREF(Py_None); PyTuple_SetItem(res, 0, Py_None); /* steals */ Py_INCREF(varname); PyTuple_SetItem(res, 1, varname); /* steals */ return res; } res = PyTuple_New(2); if (res == NULL) { return NULL; } o1 = PyUnicode_Substring(varname, 0, ns_idx); if (o1 == NULL) { Py_DECREF(res); return NULL; } o2 = _fast_unquote(o1, ns_idx, self, NULL); if (o2 == NULL) { Py_DECREF(o1); Py_DECREF(res); return NULL; } Py_DECREF(o1); PyTuple_SetItem(res, 0, o2); /* steals */ o1 = PyUnicode_Substring(varname, ns_idx+1, varname_len); if (o1 == NULL) { Py_DECREF(res); return NULL; } PyTuple_SetItem(res, 1, o1); /* steals */ return res; } static struct PyMethodDef speedups_methods[] = { {"fast_unquote", fast_unquote, METH_O, PyDoc_STR("C-implementation of configmix.unquote")}, {"fast_quote", fast_quote, METH_O, PyDoc_STR("C-implementation of configmix.quote")}, {"fast_pathstr2path", fast_pathstr2path, METH_O, PyDoc_STR("C-implementation of configmix.pathstr2path")}, {"_fast_split_filters", fast_split_filters, METH_O, PyDoc_STR("C-implementation of configmix.config._split_filters")}, {"_fast_split_ns", fast_split_ns, METH_O, PyDoc_STR("C-implementation of configmix.config._split_ns")}, {NULL, NULL, 0, NULL} }; #define STRINGIFY(s) #s #define XSTRINGIFY(s) STRINGIFY(s) static int speedups_exec(PyObject *module) { struct speedups_state *sstate = PyModule_GetState(module); if (sstate == NULL) { PyErr_SetString(PyExc_ImportError, "no module state available yet"); return -1; } PyModule_AddStringConstant(module, "__release__", release); PyModule_AddStringConstant(module, "__date__", date); PyModule_AddStringConstant(module, "__author__", "Franz Glasner"); #if defined(Py_LIMITED_API) PyModule_AddStringConstant(module, "Py_LIMITED_API", XSTRINGIFY(Py_LIMITED_API)); #endif sstate->DOT = PyUnicode_FromStringAndSize(".", 1); if (sstate->DOT == NULL) { return -1; } PyUnicode_InternInPlace(&(sstate->DOT)); sstate->QUOTE = PyUnicode_FromStringAndSize("%", 1); if (sstate->QUOTE == NULL) { return -1; } PyUnicode_InternInPlace(&(sstate->QUOTE)); sstate->NS_SEPARATOR = PyUnicode_FromStringAndSize(":", 1); if (sstate->NS_SEPARATOR == NULL) { return -1; } PyUnicode_InternInPlace(&(sstate->NS_SEPARATOR)); sstate->FILTER_SEPARATOR = PyUnicode_FromStringAndSize("|", 1); if (sstate->FILTER_SEPARATOR == NULL) { return -1; } PyUnicode_InternInPlace(&(sstate->FILTER_SEPARATOR)); sstate->EMPTY_STR = PyUnicode_FromStringAndSize("", 0); if (sstate->EMPTY_STR == NULL) { return -1; } PyUnicode_InternInPlace(&(sstate->EMPTY_STR)); sstate->QUOTE_MAP = Py_BuildValue( "{IsIsIsIsIsIsIsIsIsIsIs}", 0x25, "%x25", /* QUOTE: % */ 0x2e, "%x2e", /* DOT: . */ 0x3a, "%x3a", /* NS_SEPARATOR: : */ 0x23, "%x23", /* COMMENT/anchor: # */ 0x7c, "%x7c", /* FILTER_SEPARATOR: | */ 0x22, "%x22", 0x27, "%x27", 0x7b, "%x7b", 0x7d, "%x7d", 0x5b, "%x5b", 0x5d, "%x5d"); if (sstate->QUOTE_MAP == NULL) { return -1; } return 0; } static int speeeupds_traverse(PyObject *module, visitproc visit, void *arg) { struct speedups_state *sstate = PyModule_GetState(module); if (sstate != NULL) { Py_VISIT(sstate->DOT); Py_VISIT(sstate->QUOTE); Py_VISIT(sstate->NS_SEPARATOR); Py_VISIT(sstate->FILTER_SEPARATOR); Py_VISIT(sstate->EMPTY_STR); Py_VISIT(sstate->QUOTE_MAP); } return 0; } static int speedups_clear(PyObject *module) { struct speedups_state *sstate = PyModule_GetState(module); if (sstate != NULL) { Py_CLEAR(sstate->DOT); Py_CLEAR(sstate->QUOTE); Py_CLEAR(sstate->NS_SEPARATOR); Py_CLEAR(sstate->FILTER_SEPARATOR); Py_CLEAR(sstate->EMPTY_STR); Py_CLEAR(sstate->QUOTE_MAP); } return 0; } static struct PyModuleDef_Slot speedups_slots[] = { {Py_mod_exec, speedups_exec}, {0, NULL} }; static struct PyModuleDef speedups_def = { PyModuleDef_HEAD_INIT, /* m_base */ "_speedups", /* m_name (relative) */ PyDoc_STR("Speedups for configmix"), /* m_doc */ sizeof(struct speedups_state), /* m_size */ speedups_methods, /* m_methods */ speedups_slots, /* m_slots */ speeeupds_traverse, /* m_traverse */ speedups_clear, /* m_clear */ NULL /* m_free */ }; PyMODINIT_FUNC PyInit__speedups(void) { /* * Use multi-phase extension module initialization (PEP 489). * This is Python 3.5+. */ return PyModuleDef_Init(&speedups_def); }
