Mercurial > hgrepos > Python > libs > ConfigMix
comparison configmix/_speedups.c @ 549:84657447ab39
FIX: Properly raise a UnicodeEncodeError from C
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 02 Jan 2022 01:00:10 +0100 |
| parents | 1cbe8b0f2b78 |
| children | 79db28e879f8 |
comparison
equal
deleted
inserted
replaced
| 548:325008573bc6 | 549:84657447ab39 |
|---|---|
| 59 return 0; /* success */ | 59 return 0; /* success */ |
| 60 } | 60 } |
| 61 | 61 |
| 62 | 62 |
| 63 #if defined(Py_LIMITED_API) | 63 #if defined(Py_LIMITED_API) |
| 64 | |
| 65 static | |
| 66 void | |
| 67 _raise_utf8_encode_error(PyObject *s, | |
| 68 Py_ssize_t start, Py_ssize_t end, | |
| 69 const char *reason) | |
| 70 { | |
| 71 /* | |
| 72 * See also: https://docs.python.org/3/c-api/exceptions.html#unicode-exception-objects | |
| 73 */ | |
| 74 PyObject *errobj = PyObject_CallFunction( | |
| 75 PyExc_UnicodeEncodeError, | |
| 76 "sOnns", | |
| 77 "utf-8", | |
| 78 s, | |
| 79 start, | |
| 80 end, | |
| 81 reason); | |
| 82 | |
| 83 if (errobj == NULL) { | |
| 84 /* cannot do anything here */ | |
| 85 return; | |
| 86 } | |
| 87 /* Make PyExc_UnicodeEncodeError owned because PyErr_Restore steals */ | |
| 88 //Py_INCREF(PyExc_UnicodeEncodeError); | |
| 89 //PyErr_Restore(PyExc_UnicodeEncodeError, errobj, NULL); | |
| 90 | |
| 91 PyErr_SetObject(PyExc_UnicodeEncodeError, errobj); | |
| 92 Py_DECREF(errobj); | |
| 93 } | |
| 94 | |
| 64 | 95 |
| 65 /* | 96 /* |
| 66 * Copyright 2001-2004 Unicode, Inc. | 97 * Copyright 2001-2004 Unicode, Inc. |
| 67 * | 98 * |
| 68 * Disclaimer | 99 * Disclaimer |
| 103 | 134 |
| 104 static | 135 static |
| 105 Py_ssize_t | 136 Py_ssize_t |
| 106 _convert_ucs4_to_utf8( | 137 _convert_ucs4_to_utf8( |
| 107 Py_UCS4 ch, | 138 Py_UCS4 ch, |
| 139 PyObject *ch_obj, /* for error messages: the string where ch comes from */ | |
| 140 Py_ssize_t ch_obj_end, /* effective length of ch_obj (error reporting) */ | |
| 108 unsigned char *targetStart, unsigned char *targetEnd, | 141 unsigned char *targetStart, unsigned char *targetEnd, |
| 109 int strict) | 142 int strict) |
| 110 { | 143 { |
| 111 const Py_UCS4 byteMask = 0xBF; | 144 const Py_UCS4 byteMask = 0xBF; |
| 112 const Py_UCS4 byteMark = 0x80; | 145 const Py_UCS4 byteMark = 0x80; |
| 115 unsigned char *target = targetStart; | 148 unsigned char *target = targetStart; |
| 116 | 149 |
| 117 if (strict) { | 150 if (strict) { |
| 118 /* UTF-16 surrogate values are illegal */ | 151 /* UTF-16 surrogate values are illegal */ |
| 119 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { | 152 if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) { |
| 120 PyErr_SetString(PyExc_UnicodeEncodeError, | 153 _raise_utf8_encode_error( |
| 121 "surrogate values not allowed"); | 154 ch_obj, |
| 155 1, ch_obj_end, | |
| 156 "surrogate values are illegal in UCS4"); | |
| 122 return -1; | 157 return -1; |
| 123 } | 158 } |
| 124 } | 159 } |
| 125 /* | 160 /* |
| 126 * Figure out how many bytes the result will require. Turn any | 161 * Figure out how many bytes the result will require. Turn any |
| 133 } else if (ch < (Py_UCS4)0x10000) { | 168 } else if (ch < (Py_UCS4)0x10000) { |
| 134 bytesToWrite = 3; | 169 bytesToWrite = 3; |
| 135 } else if (ch <= UNI_MAX_LEGAL_UTF32) { | 170 } else if (ch <= UNI_MAX_LEGAL_UTF32) { |
| 136 bytesToWrite = 4; | 171 bytesToWrite = 4; |
| 137 } else { | 172 } else { |
| 138 PyErr_SetString(PyExc_UnicodeEncodeError, | 173 _raise_utf8_encode_error( |
| 139 "max Unicode codepoint value exceeded"); | 174 ch_obj, |
| 175 1, ch_obj_end, | |
| 176 "max Unicode codepoint value exceeded"); | |
| 140 return -1; | 177 return -1; |
| 141 } | 178 } |
| 142 | 179 |
| 143 target += bytesToWrite; | 180 target += bytesToWrite; |
| 144 if (target > targetEnd) { | 181 if (target > targetEnd) { |
| 145 PyErr_SetString(PyExc_UnicodeEncodeError, | 182 _raise_utf8_encode_error( |
| 146 "target exhausted"); | 183 ch_obj, |
| 184 1, ch_obj_end, | |
| 185 "temporary target buffer exhausted"); | |
| 147 return -1; | 186 return -1; |
| 148 } | 187 } |
| 149 switch (bytesToWrite) { /* note: everything falls through. */ | 188 switch (bytesToWrite) { /* note: everything falls through. */ |
| 150 case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; | 189 case 4: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; |
| 151 case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; | 190 case 3: *--target = (unsigned char)((ch | byteMark) & byteMask); ch >>= 6; |
| 154 } | 193 } |
| 155 return bytesToWrite; | 194 return bytesToWrite; |
| 156 } | 195 } |
| 157 | 196 |
| 158 | 197 |
| 198 /* | |
| 199 * End of Copyright 2001-2004 Unicode, Inc. | |
| 200 */ | |
| 201 | |
| 202 | |
| 159 static | 203 static |
| 160 PyObject * | 204 PyObject * |
| 161 _hex2string(PyObject *s, Py_ssize_t end) | 205 _hex2string(PyObject *s, Py_ssize_t end) |
| 162 { | 206 { |
| 163 Py_UCS4 c; | 207 Py_UCS4 c; |
| 167 | 211 |
| 168 if (_hex2ucs4(s, end, &c) != 0) | 212 if (_hex2ucs4(s, end, &c) != 0) |
| 169 return NULL; | 213 return NULL; |
| 170 | 214 |
| 171 /* Replace the combination PyUniode_New/PyUnicode_WriteChar */ | 215 /* Replace the combination PyUniode_New/PyUnicode_WriteChar */ |
| 172 buf_bytes = _convert_ucs4_to_utf8(c, buf, &(buf[6]), 1); | 216 buf_bytes = _convert_ucs4_to_utf8(c, s, end+1, buf, &(buf[6]), 1); |
| 173 if (buf_bytes < 0) { | 217 if (buf_bytes < 0) { |
| 174 return NULL; | 218 return NULL; |
| 175 } | 219 } |
| 176 u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes); | 220 u = PyUnicode_FromStringAndSize((const char *)buf, buf_bytes); |
| 177 if (u == NULL) { | 221 if (u == NULL) { |
