Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/harfbuzz/src/hb-buffer-verify.cc @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright © 2022 Behdad Esfahbod | |
| 3 * | |
| 4 * This is part of HarfBuzz, a text shaping library. | |
| 5 * | |
| 6 * Permission is hereby granted, without written agreement and without | |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | |
| 8 * software and its documentation for any purpose, provided that the | |
| 9 * above copyright notice and the following two paragraphs appear in | |
| 10 * all copies of this software. | |
| 11 * | |
| 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
| 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
| 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
| 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
| 16 * DAMAGE. | |
| 17 * | |
| 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
| 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
| 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
| 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
| 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
| 23 * | |
| 24 * Google Author(s): Behdad Esfahbod | |
| 25 */ | |
| 26 | |
| 27 #include "hb.hh" | |
| 28 | |
| 29 #ifndef HB_NO_BUFFER_VERIFY | |
| 30 | |
| 31 #include "hb-buffer.hh" | |
| 32 | |
| 33 | |
| 34 #define BUFFER_VERIFY_ERROR "buffer verify error: " | |
| 35 static inline void | |
| 36 buffer_verify_error (hb_buffer_t *buffer, | |
| 37 hb_font_t *font, | |
| 38 const char *fmt, | |
| 39 ...) HB_PRINTF_FUNC(3, 4); | |
| 40 | |
| 41 static inline void | |
| 42 buffer_verify_error (hb_buffer_t *buffer, | |
| 43 hb_font_t *font, | |
| 44 const char *fmt, | |
| 45 ...) | |
| 46 { | |
| 47 va_list ap; | |
| 48 va_start (ap, fmt); | |
| 49 if (buffer->messaging ()) | |
| 50 { | |
| 51 buffer->message_impl (font, fmt, ap); | |
| 52 } | |
| 53 else | |
| 54 { | |
| 55 fprintf (stderr, "harfbuzz "); | |
| 56 vfprintf (stderr, fmt, ap); | |
| 57 fprintf (stderr, "\n"); | |
| 58 } | |
| 59 va_end (ap); | |
| 60 } | |
| 61 | |
| 62 static bool | |
| 63 buffer_verify_monotone (hb_buffer_t *buffer, | |
| 64 hb_font_t *font) | |
| 65 { | |
| 66 /* Check that clusters are monotone. */ | |
| 67 if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES || | |
| 68 buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) | |
| 69 { | |
| 70 bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); | |
| 71 | |
| 72 unsigned int num_glyphs; | |
| 73 hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); | |
| 74 | |
| 75 for (unsigned int i = 1; i < num_glyphs; i++) | |
| 76 if (info[i-1].cluster != info[i].cluster && | |
| 77 (info[i-1].cluster < info[i].cluster) != is_forward) | |
| 78 { | |
| 79 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone."); | |
| 80 return false; | |
| 81 } | |
| 82 } | |
| 83 | |
| 84 return true; | |
| 85 } | |
| 86 | |
| 87 static bool | |
| 88 buffer_verify_unsafe_to_break (hb_buffer_t *buffer, | |
| 89 hb_buffer_t *text_buffer, | |
| 90 hb_font_t *font, | |
| 91 const hb_feature_t *features, | |
| 92 unsigned int num_features, | |
| 93 const char * const *shapers) | |
| 94 { | |
| 95 if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && | |
| 96 buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) | |
| 97 { | |
| 98 /* Cannot perform this check without monotone clusters. */ | |
| 99 return true; | |
| 100 } | |
| 101 | |
| 102 /* Check that breaking up shaping at safe-to-break is indeed safe. */ | |
| 103 | |
| 104 hb_buffer_t *fragment = hb_buffer_create_similar (buffer); | |
| 105 hb_buffer_set_flags (fragment, (hb_buffer_flags_t (hb_buffer_get_flags (fragment) & ~HB_BUFFER_FLAG_VERIFY))); | |
| 106 hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); | |
| 107 hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); | |
| 108 | |
| 109 unsigned int num_glyphs; | |
| 110 hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); | |
| 111 | |
| 112 unsigned int num_chars; | |
| 113 hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); | |
| 114 | |
| 115 /* Chop text and shape fragments. */ | |
| 116 bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); | |
| 117 unsigned int start = 0; | |
| 118 unsigned int text_start = forward ? 0 : num_chars; | |
| 119 unsigned int text_end = text_start; | |
| 120 for (unsigned int end = 1; end < num_glyphs + 1; end++) | |
| 121 { | |
| 122 if (end < num_glyphs && | |
| 123 (info[end].cluster == info[end-1].cluster || | |
| 124 info[end-(forward?0:1)].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)) | |
| 125 continue; | |
| 126 | |
| 127 /* Shape segment corresponding to glyphs start..end. */ | |
| 128 if (end == num_glyphs) | |
| 129 { | |
| 130 if (forward) | |
| 131 text_end = num_chars; | |
| 132 else | |
| 133 text_start = 0; | |
| 134 } | |
| 135 else | |
| 136 { | |
| 137 if (forward) | |
| 138 { | |
| 139 unsigned int cluster = info[end].cluster; | |
| 140 while (text_end < num_chars && text[text_end].cluster < cluster) | |
| 141 text_end++; | |
| 142 } | |
| 143 else | |
| 144 { | |
| 145 unsigned int cluster = info[end - 1].cluster; | |
| 146 while (text_start && text[text_start - 1].cluster >= cluster) | |
| 147 text_start--; | |
| 148 } | |
| 149 } | |
| 150 assert (text_start < text_end); | |
| 151 | |
| 152 if (0) | |
| 153 printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end); | |
| 154 | |
| 155 hb_buffer_clear_contents (fragment); | |
| 156 | |
| 157 hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); | |
| 158 if (0 < text_start) | |
| 159 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); | |
| 160 if (text_end < num_chars) | |
| 161 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); | |
| 162 hb_buffer_set_flags (fragment, flags); | |
| 163 | |
| 164 hb_buffer_append (fragment, text_buffer, text_start, text_end); | |
| 165 if (!hb_shape_full (font, fragment, features, num_features, shapers)) | |
| 166 { | |
| 167 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "shaping failed while shaping fragment."); | |
| 168 hb_buffer_destroy (reconstruction); | |
| 169 hb_buffer_destroy (fragment); | |
| 170 return false; | |
| 171 } | |
| 172 else if (!fragment->successful || fragment->shaping_failed) | |
| 173 { | |
| 174 hb_buffer_destroy (reconstruction); | |
| 175 hb_buffer_destroy (fragment); | |
| 176 return true; | |
| 177 } | |
| 178 hb_buffer_append (reconstruction, fragment, 0, -1); | |
| 179 | |
| 180 start = end; | |
| 181 if (forward) | |
| 182 text_start = text_end; | |
| 183 else | |
| 184 text_end = text_start; | |
| 185 } | |
| 186 | |
| 187 bool ret = true; | |
| 188 hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); | |
| 189 if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) | |
| 190 { | |
| 191 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break test failed."); | |
| 192 ret = false; | |
| 193 | |
| 194 /* Return the reconstructed result instead so it can be inspected. */ | |
| 195 hb_buffer_set_length (buffer, 0); | |
| 196 hb_buffer_append (buffer, reconstruction, 0, -1); | |
| 197 } | |
| 198 | |
| 199 hb_buffer_destroy (reconstruction); | |
| 200 hb_buffer_destroy (fragment); | |
| 201 | |
| 202 return ret; | |
| 203 } | |
| 204 | |
| 205 static bool | |
| 206 buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, | |
| 207 hb_buffer_t *text_buffer, | |
| 208 hb_font_t *font, | |
| 209 const hb_feature_t *features, | |
| 210 unsigned int num_features, | |
| 211 const char * const *shapers) | |
| 212 { | |
| 213 if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && | |
| 214 buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) | |
| 215 { | |
| 216 /* Cannot perform this check without monotone clusters. */ | |
| 217 return true; | |
| 218 } | |
| 219 | |
| 220 /* Check that shuffling up text before shaping at safe-to-concat points | |
| 221 * is indeed safe. */ | |
| 222 | |
| 223 /* This is what we do: | |
| 224 * | |
| 225 * 1. We shape text once. Then segment the text at all the safe-to-concat | |
| 226 * points; | |
| 227 * | |
| 228 * 2. Then we create two buffers, one containing all the even segments and | |
| 229 * one all the odd segments. | |
| 230 * | |
| 231 * 3. Because all these segments were safe-to-concat at both ends, we | |
| 232 * expect that concatenating them and shaping should NOT change the | |
| 233 * shaping results of each segment. As such, we expect that after | |
| 234 * shaping the two buffers, we still get cluster boundaries at the | |
| 235 * segment boundaries, and that those all are safe-to-concat points. | |
| 236 * Moreover, that there are NOT any safe-to-concat points within the | |
| 237 * segments. | |
| 238 * | |
| 239 * 4. Finally, we reconstruct the shaping results of the original text by | |
| 240 * simply interleaving the shaping results of the segments from the two | |
| 241 * buffers, and assert that the total shaping results is the same as | |
| 242 * the one from original buffer in step 1. | |
| 243 */ | |
| 244 | |
| 245 hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), | |
| 246 hb_buffer_create_similar (buffer)}; | |
| 247 hb_buffer_set_flags (fragments[0], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[0]) & ~HB_BUFFER_FLAG_VERIFY))); | |
| 248 hb_buffer_set_flags (fragments[1], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[1]) & ~HB_BUFFER_FLAG_VERIFY))); | |
| 249 hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); | |
| 250 hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); | |
| 251 hb_segment_properties_t props; | |
| 252 hb_buffer_get_segment_properties (buffer, &props); | |
| 253 hb_buffer_set_segment_properties (fragments[0], &props); | |
| 254 hb_buffer_set_segment_properties (fragments[1], &props); | |
| 255 hb_buffer_set_segment_properties (reconstruction, &props); | |
| 256 | |
| 257 unsigned num_glyphs; | |
| 258 hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); | |
| 259 | |
| 260 unsigned num_chars; | |
| 261 hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); | |
| 262 | |
| 263 bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); | |
| 264 | |
| 265 if (!forward) | |
| 266 hb_buffer_reverse (buffer); | |
| 267 | |
| 268 /* | |
| 269 * Split text into segments and collect into to fragment streams. | |
| 270 */ | |
| 271 { | |
| 272 unsigned fragment_idx = 0; | |
| 273 unsigned start = 0; | |
| 274 unsigned text_start = 0; | |
| 275 unsigned text_end = 0; | |
| 276 for (unsigned end = 1; end < num_glyphs + 1; end++) | |
| 277 { | |
| 278 if (end < num_glyphs && | |
| 279 (info[end].cluster == info[end-1].cluster || | |
| 280 info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) | |
| 281 continue; | |
| 282 | |
| 283 /* Accumulate segment corresponding to glyphs start..end. */ | |
| 284 if (end == num_glyphs) | |
| 285 text_end = num_chars; | |
| 286 else | |
| 287 { | |
| 288 unsigned cluster = info[end].cluster; | |
| 289 while (text_end < num_chars && text[text_end].cluster < cluster) | |
| 290 text_end++; | |
| 291 } | |
| 292 assert (text_start < text_end); | |
| 293 | |
| 294 if (0) | |
| 295 printf("start %d end %d text start %d end %d\n", start, end, text_start, text_end); | |
| 296 | |
| 297 #if 0 | |
| 298 hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); | |
| 299 if (0 < text_start) | |
| 300 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); | |
| 301 if (text_end < num_chars) | |
| 302 flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); | |
| 303 hb_buffer_set_flags (fragment, flags); | |
| 304 #endif | |
| 305 | |
| 306 hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); | |
| 307 | |
| 308 start = end; | |
| 309 text_start = text_end; | |
| 310 fragment_idx = 1 - fragment_idx; | |
| 311 } | |
| 312 } | |
| 313 | |
| 314 bool ret = true; | |
| 315 hb_buffer_diff_flags_t diff; | |
| 316 /* | |
| 317 * Shape the two fragment streams. | |
| 318 */ | |
| 319 if (!hb_shape_full (font, fragments[0], features, num_features, shapers)) | |
| 320 { | |
| 321 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "shaping failed while shaping fragment."); | |
| 322 ret = false; | |
| 323 goto out; | |
| 324 } | |
| 325 else if (!fragments[0]->successful || fragments[0]->shaping_failed) | |
| 326 { | |
| 327 ret = true; | |
| 328 goto out; | |
| 329 } | |
| 330 if (!hb_shape_full (font, fragments[1], features, num_features, shapers)) | |
| 331 { | |
| 332 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "shaping failed while shaping fragment."); | |
| 333 ret = false; | |
| 334 goto out; | |
| 335 } | |
| 336 else if (!fragments[1]->successful || fragments[1]->shaping_failed) | |
| 337 { | |
| 338 ret = true; | |
| 339 goto out; | |
| 340 } | |
| 341 | |
| 342 if (!forward) | |
| 343 { | |
| 344 hb_buffer_reverse (fragments[0]); | |
| 345 hb_buffer_reverse (fragments[1]); | |
| 346 } | |
| 347 | |
| 348 /* | |
| 349 * Reconstruct results. | |
| 350 */ | |
| 351 { | |
| 352 unsigned fragment_idx = 0; | |
| 353 unsigned fragment_start[2] {0, 0}; | |
| 354 unsigned fragment_num_glyphs[2]; | |
| 355 hb_glyph_info_t *fragment_info[2]; | |
| 356 for (unsigned i = 0; i < 2; i++) | |
| 357 fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); | |
| 358 while (fragment_start[0] < fragment_num_glyphs[0] || | |
| 359 fragment_start[1] < fragment_num_glyphs[1]) | |
| 360 { | |
| 361 unsigned fragment_end = fragment_start[fragment_idx] + 1; | |
| 362 while (fragment_end < fragment_num_glyphs[fragment_idx] && | |
| 363 (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || | |
| 364 fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) | |
| 365 fragment_end++; | |
| 366 | |
| 367 hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); | |
| 368 | |
| 369 fragment_start[fragment_idx] = fragment_end; | |
| 370 fragment_idx = 1 - fragment_idx; | |
| 371 } | |
| 372 } | |
| 373 | |
| 374 if (!forward) | |
| 375 { | |
| 376 hb_buffer_reverse (buffer); | |
| 377 hb_buffer_reverse (reconstruction); | |
| 378 } | |
| 379 | |
| 380 /* | |
| 381 * Diff results. | |
| 382 */ | |
| 383 diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); | |
| 384 if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) | |
| 385 { | |
| 386 buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-concat test failed."); | |
| 387 ret = false; | |
| 388 | |
| 389 /* Return the reconstructed result instead so it can be inspected. */ | |
| 390 hb_buffer_set_length (buffer, 0); | |
| 391 hb_buffer_append (buffer, reconstruction, 0, -1); | |
| 392 } | |
| 393 | |
| 394 | |
| 395 out: | |
| 396 hb_buffer_destroy (reconstruction); | |
| 397 hb_buffer_destroy (fragments[0]); | |
| 398 hb_buffer_destroy (fragments[1]); | |
| 399 | |
| 400 return ret; | |
| 401 } | |
| 402 | |
| 403 bool | |
| 404 hb_buffer_t::verify (hb_buffer_t *text_buffer, | |
| 405 hb_font_t *font, | |
| 406 const hb_feature_t *features, | |
| 407 unsigned int num_features, | |
| 408 const char * const *shapers) | |
| 409 { | |
| 410 bool ret = true; | |
| 411 if (!buffer_verify_monotone (this, font)) | |
| 412 ret = false; | |
| 413 if (!buffer_verify_unsafe_to_break (this, text_buffer, font, features, num_features, shapers)) | |
| 414 ret = false; | |
| 415 if ((flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) != 0 && | |
| 416 !buffer_verify_unsafe_to_concat (this, text_buffer, font, features, num_features, shapers)) | |
| 417 ret = false; | |
| 418 if (!ret) | |
| 419 { | |
| 420 #ifndef HB_NO_BUFFER_SERIALIZE | |
| 421 unsigned len = text_buffer->len; | |
| 422 hb_vector_t<char> bytes; | |
| 423 if (likely (bytes.resize (len * 10 + 16))) | |
| 424 { | |
| 425 hb_buffer_serialize_unicode (text_buffer, | |
| 426 0, len, | |
| 427 bytes.arrayZ, bytes.length, | |
| 428 &len, | |
| 429 HB_BUFFER_SERIALIZE_FORMAT_TEXT, | |
| 430 HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS); | |
| 431 buffer_verify_error (this, font, BUFFER_VERIFY_ERROR "text was: %s.", bytes.arrayZ); | |
| 432 } | |
| 433 #endif | |
| 434 } | |
| 435 return ret; | |
| 436 } | |
| 437 | |
| 438 | |
| 439 #endif |
