Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/harfbuzz/src/hb-utf.hh @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright © 2011,2012,2014 Google, Inc. | |
| 3 * | |
| 4 * This is part of HarfBuzz, a text shaping library. | |
| 5 * | |
| 6 * Permission is hereby granted, without written agreement and without | |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | |
| 8 * software and its documentation for any purpose, provided that the | |
| 9 * above copyright notice and the following two paragraphs appear in | |
| 10 * all copies of this software. | |
| 11 * | |
| 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
| 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
| 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
| 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
| 16 * DAMAGE. | |
| 17 * | |
| 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
| 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
| 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
| 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
| 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
| 23 * | |
| 24 * Google Author(s): Behdad Esfahbod | |
| 25 */ | |
| 26 | |
| 27 #ifndef HB_UTF_HH | |
| 28 #define HB_UTF_HH | |
| 29 | |
| 30 #include "hb.hh" | |
| 31 | |
| 32 #include "hb-open-type.hh" | |
| 33 | |
| 34 | |
| 35 struct hb_utf8_t | |
| 36 { | |
| 37 typedef uint8_t codepoint_t; | |
| 38 | |
| 39 static const codepoint_t * | |
| 40 next (const codepoint_t *text, | |
| 41 const codepoint_t *end, | |
| 42 hb_codepoint_t *unicode, | |
| 43 hb_codepoint_t replacement) | |
| 44 { | |
| 45 /* Written to only accept well-formed sequences. | |
| 46 * Based on ideas from ICU's U8_NEXT. | |
| 47 * Generates one "replacement" for each ill-formed byte. */ | |
| 48 | |
| 49 hb_codepoint_t c = *text++; | |
| 50 | |
| 51 if (c > 0x7Fu) | |
| 52 { | |
| 53 if (hb_in_range<hb_codepoint_t> (c, 0xC2u, 0xDFu)) /* Two-byte */ | |
| 54 { | |
| 55 unsigned int t1; | |
| 56 if (likely (text < end && | |
| 57 (t1 = text[0] - 0x80u) <= 0x3Fu)) | |
| 58 { | |
| 59 c = ((c&0x1Fu)<<6) | t1; | |
| 60 text++; | |
| 61 } | |
| 62 else | |
| 63 goto error; | |
| 64 } | |
| 65 else if (hb_in_range<hb_codepoint_t> (c, 0xE0u, 0xEFu)) /* Three-byte */ | |
| 66 { | |
| 67 unsigned int t1, t2; | |
| 68 if (likely (1 < end - text && | |
| 69 (t1 = text[0] - 0x80u) <= 0x3Fu && | |
| 70 (t2 = text[1] - 0x80u) <= 0x3Fu)) | |
| 71 { | |
| 72 c = ((c&0xFu)<<12) | (t1<<6) | t2; | |
| 73 if (unlikely (c < 0x0800u || hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu))) | |
| 74 goto error; | |
| 75 text += 2; | |
| 76 } | |
| 77 else | |
| 78 goto error; | |
| 79 } | |
| 80 else if (hb_in_range<hb_codepoint_t> (c, 0xF0u, 0xF4u)) /* Four-byte */ | |
| 81 { | |
| 82 unsigned int t1, t2, t3; | |
| 83 if (likely (2 < end - text && | |
| 84 (t1 = text[0] - 0x80u) <= 0x3Fu && | |
| 85 (t2 = text[1] - 0x80u) <= 0x3Fu && | |
| 86 (t3 = text[2] - 0x80u) <= 0x3Fu)) | |
| 87 { | |
| 88 c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; | |
| 89 if (unlikely (!hb_in_range<hb_codepoint_t> (c, 0x10000u, 0x10FFFFu))) | |
| 90 goto error; | |
| 91 text += 3; | |
| 92 } | |
| 93 else | |
| 94 goto error; | |
| 95 } | |
| 96 else | |
| 97 goto error; | |
| 98 } | |
| 99 | |
| 100 *unicode = c; | |
| 101 return text; | |
| 102 | |
| 103 error: | |
| 104 *unicode = replacement; | |
| 105 return text; | |
| 106 } | |
| 107 | |
| 108 static const codepoint_t * | |
| 109 prev (const codepoint_t *text, | |
| 110 const codepoint_t *start, | |
| 111 hb_codepoint_t *unicode, | |
| 112 hb_codepoint_t replacement) | |
| 113 { | |
| 114 const codepoint_t *end = text--; | |
| 115 while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) | |
| 116 text--; | |
| 117 | |
| 118 if (likely (next (text, end, unicode, replacement) == end)) | |
| 119 return text; | |
| 120 | |
| 121 *unicode = replacement; | |
| 122 return end - 1; | |
| 123 } | |
| 124 | |
| 125 static unsigned int | |
| 126 strlen (const codepoint_t *text) | |
| 127 { return ::strlen ((const char *) text); } | |
| 128 | |
| 129 static unsigned int | |
| 130 encode_len (hb_codepoint_t unicode) | |
| 131 { | |
| 132 if (unicode < 0x0080u) return 1; | |
| 133 if (unicode < 0x0800u) return 2; | |
| 134 if (unicode < 0x10000u) return 3; | |
| 135 if (unicode < 0x110000u) return 4; | |
| 136 return 3; | |
| 137 } | |
| 138 | |
| 139 static codepoint_t * | |
| 140 encode (codepoint_t *text, | |
| 141 const codepoint_t *end, | |
| 142 hb_codepoint_t unicode) | |
| 143 { | |
| 144 if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu))) | |
| 145 unicode = 0xFFFDu; | |
| 146 if (unicode < 0x0080u) | |
| 147 *text++ = unicode; | |
| 148 else if (unicode < 0x0800u) | |
| 149 { | |
| 150 if (end - text >= 2) | |
| 151 { | |
| 152 *text++ = 0xC0u + (0x1Fu & (unicode >> 6)); | |
| 153 *text++ = 0x80u + (0x3Fu & (unicode )); | |
| 154 } | |
| 155 } | |
| 156 else if (unicode < 0x10000u) | |
| 157 { | |
| 158 if (end - text >= 3) | |
| 159 { | |
| 160 *text++ = 0xE0u + (0x0Fu & (unicode >> 12)); | |
| 161 *text++ = 0x80u + (0x3Fu & (unicode >> 6)); | |
| 162 *text++ = 0x80u + (0x3Fu & (unicode )); | |
| 163 } | |
| 164 } | |
| 165 else | |
| 166 { | |
| 167 if (end - text >= 4) | |
| 168 { | |
| 169 *text++ = 0xF0u + (0x07u & (unicode >> 18)); | |
| 170 *text++ = 0x80u + (0x3Fu & (unicode >> 12)); | |
| 171 *text++ = 0x80u + (0x3Fu & (unicode >> 6)); | |
| 172 *text++ = 0x80u + (0x3Fu & (unicode )); | |
| 173 } | |
| 174 } | |
| 175 return text; | |
| 176 } | |
| 177 }; | |
| 178 | |
| 179 | |
| 180 template <typename TCodepoint> | |
| 181 struct hb_utf16_xe_t | |
| 182 { | |
| 183 static_assert (sizeof (TCodepoint) == 2, ""); | |
| 184 typedef TCodepoint codepoint_t; | |
| 185 | |
| 186 static const codepoint_t * | |
| 187 next (const codepoint_t *text, | |
| 188 const codepoint_t *end, | |
| 189 hb_codepoint_t *unicode, | |
| 190 hb_codepoint_t replacement) | |
| 191 { | |
| 192 hb_codepoint_t c = *text++; | |
| 193 | |
| 194 if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu))) | |
| 195 { | |
| 196 *unicode = c; | |
| 197 return text; | |
| 198 } | |
| 199 | |
| 200 if (likely (c <= 0xDBFFu && text < end)) | |
| 201 { | |
| 202 /* High-surrogate in c */ | |
| 203 hb_codepoint_t l = *text; | |
| 204 if (likely (hb_in_range<hb_codepoint_t> (l, 0xDC00u, 0xDFFFu))) | |
| 205 { | |
| 206 /* Low-surrogate in l */ | |
| 207 *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); | |
| 208 text++; | |
| 209 return text; | |
| 210 } | |
| 211 } | |
| 212 | |
| 213 /* Lonely / out-of-order surrogate. */ | |
| 214 *unicode = replacement; | |
| 215 return text; | |
| 216 } | |
| 217 | |
| 218 static const codepoint_t * | |
| 219 prev (const codepoint_t *text, | |
| 220 const codepoint_t *start, | |
| 221 hb_codepoint_t *unicode, | |
| 222 hb_codepoint_t replacement) | |
| 223 { | |
| 224 hb_codepoint_t c = *--text; | |
| 225 | |
| 226 if (likely (!hb_in_range<hb_codepoint_t> (c, 0xD800u, 0xDFFFu))) | |
| 227 { | |
| 228 *unicode = c; | |
| 229 return text; | |
| 230 } | |
| 231 | |
| 232 if (likely (c >= 0xDC00u && start < text)) | |
| 233 { | |
| 234 /* Low-surrogate in c */ | |
| 235 hb_codepoint_t h = text[-1]; | |
| 236 if (likely (hb_in_range<hb_codepoint_t> (h, 0xD800u, 0xDBFFu))) | |
| 237 { | |
| 238 /* High-surrogate in h */ | |
| 239 *unicode = (h << 10) + c - ((0xD800u << 10) - 0x10000u + 0xDC00u); | |
| 240 text--; | |
| 241 return text; | |
| 242 } | |
| 243 } | |
| 244 | |
| 245 /* Lonely / out-of-order surrogate. */ | |
| 246 *unicode = replacement; | |
| 247 return text; | |
| 248 } | |
| 249 | |
| 250 | |
| 251 static unsigned int | |
| 252 strlen (const codepoint_t *text) | |
| 253 { | |
| 254 unsigned int l = 0; | |
| 255 while (*text++) l++; | |
| 256 return l; | |
| 257 } | |
| 258 | |
| 259 static unsigned int | |
| 260 encode_len (hb_codepoint_t unicode) | |
| 261 { | |
| 262 return unicode < 0x10000 ? 1 : 2; | |
| 263 } | |
| 264 | |
| 265 static codepoint_t * | |
| 266 encode (codepoint_t *text, | |
| 267 const codepoint_t *end, | |
| 268 hb_codepoint_t unicode) | |
| 269 { | |
| 270 if (unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu))) | |
| 271 unicode = 0xFFFDu; | |
| 272 if (unicode < 0x10000u) | |
| 273 *text++ = unicode; | |
| 274 else if (end - text >= 2) | |
| 275 { | |
| 276 unicode -= 0x10000u; | |
| 277 *text++ = 0xD800u + (unicode >> 10); | |
| 278 *text++ = 0xDC00u + (unicode & 0x03FFu); | |
| 279 } | |
| 280 return text; | |
| 281 } | |
| 282 }; | |
| 283 | |
| 284 typedef hb_utf16_xe_t<uint16_t> hb_utf16_t; | |
| 285 typedef hb_utf16_xe_t<OT::HBUINT16> hb_utf16_be_t; | |
| 286 | |
| 287 | |
| 288 template <typename TCodepoint, bool validate=true> | |
| 289 struct hb_utf32_xe_t | |
| 290 { | |
| 291 static_assert (sizeof (TCodepoint) == 4, ""); | |
| 292 typedef TCodepoint codepoint_t; | |
| 293 | |
| 294 static const TCodepoint * | |
| 295 next (const TCodepoint *text, | |
| 296 const TCodepoint *end HB_UNUSED, | |
| 297 hb_codepoint_t *unicode, | |
| 298 hb_codepoint_t replacement) | |
| 299 { | |
| 300 hb_codepoint_t c = *unicode = *text++; | |
| 301 if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu))) | |
| 302 *unicode = replacement; | |
| 303 return text; | |
| 304 } | |
| 305 | |
| 306 static const TCodepoint * | |
| 307 prev (const TCodepoint *text, | |
| 308 const TCodepoint *start HB_UNUSED, | |
| 309 hb_codepoint_t *unicode, | |
| 310 hb_codepoint_t replacement) | |
| 311 { | |
| 312 hb_codepoint_t c = *unicode = *--text; | |
| 313 if (validate && unlikely (c >= 0xD800u && (c <= 0xDFFFu || c > 0x10FFFFu))) | |
| 314 *unicode = replacement; | |
| 315 return text; | |
| 316 } | |
| 317 | |
| 318 static unsigned int | |
| 319 strlen (const TCodepoint *text) | |
| 320 { | |
| 321 unsigned int l = 0; | |
| 322 while (*text++) l++; | |
| 323 return l; | |
| 324 } | |
| 325 | |
| 326 static unsigned int | |
| 327 encode_len (hb_codepoint_t unicode HB_UNUSED) | |
| 328 { | |
| 329 return 1; | |
| 330 } | |
| 331 | |
| 332 static codepoint_t * | |
| 333 encode (codepoint_t *text, | |
| 334 const codepoint_t *end HB_UNUSED, | |
| 335 hb_codepoint_t unicode) | |
| 336 { | |
| 337 if (validate && unlikely (unicode >= 0xD800u && (unicode <= 0xDFFFu || unicode > 0x10FFFFu))) | |
| 338 unicode = 0xFFFDu; | |
| 339 *text++ = unicode; | |
| 340 return text; | |
| 341 } | |
| 342 }; | |
| 343 | |
| 344 typedef hb_utf32_xe_t<uint32_t> hb_utf32_t; | |
| 345 typedef hb_utf32_xe_t<uint32_t, false> hb_utf32_novalidate_t; | |
| 346 | |
| 347 | |
| 348 struct hb_latin1_t | |
| 349 { | |
| 350 typedef uint8_t codepoint_t; | |
| 351 | |
| 352 static const codepoint_t * | |
| 353 next (const codepoint_t *text, | |
| 354 const codepoint_t *end HB_UNUSED, | |
| 355 hb_codepoint_t *unicode, | |
| 356 hb_codepoint_t replacement HB_UNUSED) | |
| 357 { | |
| 358 *unicode = *text++; | |
| 359 return text; | |
| 360 } | |
| 361 | |
| 362 static const codepoint_t * | |
| 363 prev (const codepoint_t *text, | |
| 364 const codepoint_t *start HB_UNUSED, | |
| 365 hb_codepoint_t *unicode, | |
| 366 hb_codepoint_t replacement HB_UNUSED) | |
| 367 { | |
| 368 *unicode = *--text; | |
| 369 return text; | |
| 370 } | |
| 371 | |
| 372 static unsigned int | |
| 373 strlen (const codepoint_t *text) | |
| 374 { | |
| 375 unsigned int l = 0; | |
| 376 while (*text++) l++; | |
| 377 return l; | |
| 378 } | |
| 379 | |
| 380 static unsigned int | |
| 381 encode_len (hb_codepoint_t unicode HB_UNUSED) | |
| 382 { | |
| 383 return 1; | |
| 384 } | |
| 385 | |
| 386 static codepoint_t * | |
| 387 encode (codepoint_t *text, | |
| 388 const codepoint_t *end HB_UNUSED, | |
| 389 hb_codepoint_t unicode) | |
| 390 { | |
| 391 if (unlikely (unicode >= 0x0100u)) | |
| 392 unicode = '?'; | |
| 393 *text++ = unicode; | |
| 394 return text; | |
| 395 } | |
| 396 }; | |
| 397 | |
| 398 | |
| 399 struct hb_ascii_t | |
| 400 { | |
| 401 typedef uint8_t codepoint_t; | |
| 402 | |
| 403 static const codepoint_t * | |
| 404 next (const codepoint_t *text, | |
| 405 const codepoint_t *end HB_UNUSED, | |
| 406 hb_codepoint_t *unicode, | |
| 407 hb_codepoint_t replacement HB_UNUSED) | |
| 408 { | |
| 409 *unicode = *text++; | |
| 410 if (*unicode >= 0x0080u) | |
| 411 *unicode = replacement; | |
| 412 return text; | |
| 413 } | |
| 414 | |
| 415 static const codepoint_t * | |
| 416 prev (const codepoint_t *text, | |
| 417 const codepoint_t *start HB_UNUSED, | |
| 418 hb_codepoint_t *unicode, | |
| 419 hb_codepoint_t replacement) | |
| 420 { | |
| 421 *unicode = *--text; | |
| 422 if (*unicode >= 0x0080u) | |
| 423 *unicode = replacement; | |
| 424 return text; | |
| 425 } | |
| 426 | |
| 427 static unsigned int | |
| 428 strlen (const codepoint_t *text) | |
| 429 { | |
| 430 unsigned int l = 0; | |
| 431 while (*text++) l++; | |
| 432 return l; | |
| 433 } | |
| 434 | |
| 435 static unsigned int | |
| 436 encode_len (hb_codepoint_t unicode HB_UNUSED) | |
| 437 { | |
| 438 return 1; | |
| 439 } | |
| 440 | |
| 441 static codepoint_t * | |
| 442 encode (codepoint_t *text, | |
| 443 const codepoint_t *end HB_UNUSED, | |
| 444 hb_codepoint_t unicode) | |
| 445 { | |
| 446 if (unlikely (unicode >= 0x0080u)) | |
| 447 unicode = '?'; | |
| 448 *text++ = unicode; | |
| 449 return text; | |
| 450 } | |
| 451 }; | |
| 452 | |
| 453 #endif /* HB_UTF_HH */ |
