Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/harfbuzz/src/hb-ot-shaper-khmer.cc @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright © 2011,2012 Google, Inc. | |
| 3 * | |
| 4 * This is part of HarfBuzz, a text shaping library. | |
| 5 * | |
| 6 * Permission is hereby granted, without written agreement and without | |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | |
| 8 * software and its documentation for any purpose, provided that the | |
| 9 * above copyright notice and the following two paragraphs appear in | |
| 10 * all copies of this software. | |
| 11 * | |
| 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
| 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
| 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
| 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
| 16 * DAMAGE. | |
| 17 * | |
| 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
| 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
| 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
| 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
| 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
| 23 * | |
| 24 * Google Author(s): Behdad Esfahbod | |
| 25 */ | |
| 26 | |
| 27 #include "hb.hh" | |
| 28 | |
| 29 #ifndef HB_NO_OT_SHAPE | |
| 30 | |
| 31 #include "hb-ot-shaper-khmer-machine.hh" | |
| 32 #include "hb-ot-shaper-indic.hh" | |
| 33 #include "hb-ot-layout.hh" | |
| 34 | |
| 35 | |
| 36 /* | |
| 37 * Khmer shaper. | |
| 38 */ | |
| 39 | |
| 40 | |
| 41 static const hb_ot_map_feature_t | |
| 42 khmer_features[] = | |
| 43 { | |
| 44 /* | |
| 45 * Basic features. | |
| 46 * These features are applied all at once, before reordering, constrained | |
| 47 * to the syllable. | |
| 48 */ | |
| 49 {HB_TAG('p','r','e','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, | |
| 50 {HB_TAG('b','l','w','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, | |
| 51 {HB_TAG('a','b','v','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, | |
| 52 {HB_TAG('p','s','t','f'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, | |
| 53 {HB_TAG('c','f','a','r'), F_MANUAL_JOINERS | F_PER_SYLLABLE}, | |
| 54 /* | |
| 55 * Other features. | |
| 56 * These features are applied all at once after clearing syllables. | |
| 57 */ | |
| 58 {HB_TAG('p','r','e','s'), F_GLOBAL_MANUAL_JOINERS}, | |
| 59 {HB_TAG('a','b','v','s'), F_GLOBAL_MANUAL_JOINERS}, | |
| 60 {HB_TAG('b','l','w','s'), F_GLOBAL_MANUAL_JOINERS}, | |
| 61 {HB_TAG('p','s','t','s'), F_GLOBAL_MANUAL_JOINERS}, | |
| 62 }; | |
| 63 | |
| 64 /* | |
| 65 * Must be in the same order as the khmer_features array. | |
| 66 */ | |
| 67 enum { | |
| 68 KHMER_PREF, | |
| 69 KHMER_BLWF, | |
| 70 KHMER_ABVF, | |
| 71 KHMER_PSTF, | |
| 72 KHMER_CFAR, | |
| 73 | |
| 74 _KHMER_PRES, | |
| 75 _KHMER_ABVS, | |
| 76 _KHMER_BLWS, | |
| 77 _KHMER_PSTS, | |
| 78 | |
| 79 KHMER_NUM_FEATURES, | |
| 80 KHMER_BASIC_FEATURES = _KHMER_PRES, /* Don't forget to update this! */ | |
| 81 }; | |
| 82 | |
| 83 static inline void | |
| 84 set_khmer_properties (hb_glyph_info_t &info) | |
| 85 { | |
| 86 hb_codepoint_t u = info.codepoint; | |
| 87 unsigned int type = hb_indic_get_categories (u); | |
| 88 | |
| 89 info.khmer_category() = (khmer_category_t) (type & 0xFFu); | |
| 90 } | |
| 91 | |
| 92 static bool | |
| 93 setup_syllables_khmer (const hb_ot_shape_plan_t *plan, | |
| 94 hb_font_t *font, | |
| 95 hb_buffer_t *buffer); | |
| 96 static bool | |
| 97 reorder_khmer (const hb_ot_shape_plan_t *plan, | |
| 98 hb_font_t *font, | |
| 99 hb_buffer_t *buffer); | |
| 100 | |
| 101 static void | |
| 102 collect_features_khmer (hb_ot_shape_planner_t *plan) | |
| 103 { | |
| 104 hb_ot_map_builder_t *map = &plan->map; | |
| 105 | |
| 106 /* Do this before any lookups have been applied. */ | |
| 107 map->add_gsub_pause (setup_syllables_khmer); | |
| 108 map->add_gsub_pause (reorder_khmer); | |
| 109 | |
| 110 /* Testing suggests that Uniscribe does NOT pause between basic | |
| 111 * features. Test with KhmerUI.ttf and the following three | |
| 112 * sequences: | |
| 113 * | |
| 114 * U+1789,U+17BC | |
| 115 * U+1789,U+17D2,U+1789 | |
| 116 * U+1789,U+17D2,U+1789,U+17BC | |
| 117 * | |
| 118 * https://github.com/harfbuzz/harfbuzz/issues/974 | |
| 119 */ | |
| 120 map->enable_feature (HB_TAG('l','o','c','l'), F_PER_SYLLABLE); | |
| 121 map->enable_feature (HB_TAG('c','c','m','p'), F_PER_SYLLABLE); | |
| 122 | |
| 123 unsigned int i = 0; | |
| 124 for (; i < KHMER_BASIC_FEATURES; i++) | |
| 125 map->add_feature (khmer_features[i]); | |
| 126 | |
| 127 /* https://github.com/harfbuzz/harfbuzz/issues/3531 */ | |
| 128 map->add_gsub_pause (hb_syllabic_clear_var); // Don't need syllables anymore, use stop to free buffer var | |
| 129 | |
| 130 for (; i < KHMER_NUM_FEATURES; i++) | |
| 131 map->add_feature (khmer_features[i]); | |
| 132 } | |
| 133 | |
| 134 static void | |
| 135 override_features_khmer (hb_ot_shape_planner_t *plan) | |
| 136 { | |
| 137 hb_ot_map_builder_t *map = &plan->map; | |
| 138 | |
| 139 /* Khmer spec has 'clig' as part of required shaping features: | |
| 140 * "Apply feature 'clig' to form ligatures that are desired for | |
| 141 * typographical correctness.", hence in overrides... */ | |
| 142 map->enable_feature (HB_TAG('c','l','i','g')); | |
| 143 | |
| 144 /* Uniscribe does not apply 'kern' in Khmer. */ | |
| 145 if (hb_options ().uniscribe_bug_compatible) | |
| 146 { | |
| 147 map->disable_feature (HB_TAG('k','e','r','n')); | |
| 148 } | |
| 149 | |
| 150 map->disable_feature (HB_TAG('l','i','g','a')); | |
| 151 } | |
| 152 | |
| 153 | |
| 154 struct khmer_shape_plan_t | |
| 155 { | |
| 156 hb_mask_t mask_array[KHMER_NUM_FEATURES]; | |
| 157 }; | |
| 158 | |
| 159 static void * | |
| 160 data_create_khmer (const hb_ot_shape_plan_t *plan) | |
| 161 { | |
| 162 khmer_shape_plan_t *khmer_plan = (khmer_shape_plan_t *) hb_calloc (1, sizeof (khmer_shape_plan_t)); | |
| 163 if (unlikely (!khmer_plan)) | |
| 164 return nullptr; | |
| 165 | |
| 166 for (unsigned int i = 0; i < ARRAY_LENGTH (khmer_plan->mask_array); i++) | |
| 167 khmer_plan->mask_array[i] = (khmer_features[i].flags & F_GLOBAL) ? | |
| 168 0 : plan->map.get_1_mask (khmer_features[i].tag); | |
| 169 | |
| 170 return khmer_plan; | |
| 171 } | |
| 172 | |
| 173 static void | |
| 174 data_destroy_khmer (void *data) | |
| 175 { | |
| 176 hb_free (data); | |
| 177 } | |
| 178 | |
| 179 static void | |
| 180 setup_masks_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, | |
| 181 hb_buffer_t *buffer, | |
| 182 hb_font_t *font HB_UNUSED) | |
| 183 { | |
| 184 HB_BUFFER_ALLOCATE_VAR (buffer, khmer_category); | |
| 185 | |
| 186 /* We cannot setup masks here. We save information about characters | |
| 187 * and setup masks later on in a pause-callback. */ | |
| 188 | |
| 189 unsigned int count = buffer->len; | |
| 190 hb_glyph_info_t *info = buffer->info; | |
| 191 for (unsigned int i = 0; i < count; i++) | |
| 192 set_khmer_properties (info[i]); | |
| 193 } | |
| 194 | |
| 195 static bool | |
| 196 setup_syllables_khmer (const hb_ot_shape_plan_t *plan HB_UNUSED, | |
| 197 hb_font_t *font HB_UNUSED, | |
| 198 hb_buffer_t *buffer) | |
| 199 { | |
| 200 HB_BUFFER_ALLOCATE_VAR (buffer, syllable); | |
| 201 find_syllables_khmer (buffer); | |
| 202 foreach_syllable (buffer, start, end) | |
| 203 buffer->unsafe_to_break (start, end); | |
| 204 return false; | |
| 205 } | |
| 206 | |
| 207 | |
| 208 /* Rules from: | |
| 209 * https://docs.microsoft.com/en-us/typography/script-development/devanagari */ | |
| 210 | |
| 211 static void | |
| 212 reorder_consonant_syllable (const hb_ot_shape_plan_t *plan, | |
| 213 hb_face_t *face HB_UNUSED, | |
| 214 hb_buffer_t *buffer, | |
| 215 unsigned int start, unsigned int end) | |
| 216 { | |
| 217 const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data; | |
| 218 hb_glyph_info_t *info = buffer->info; | |
| 219 | |
| 220 /* Setup masks. */ | |
| 221 { | |
| 222 /* Post-base */ | |
| 223 hb_mask_t mask = khmer_plan->mask_array[KHMER_BLWF] | | |
| 224 khmer_plan->mask_array[KHMER_ABVF] | | |
| 225 khmer_plan->mask_array[KHMER_PSTF]; | |
| 226 for (unsigned int i = start + 1; i < end; i++) | |
| 227 info[i].mask |= mask; | |
| 228 } | |
| 229 | |
| 230 unsigned int num_coengs = 0; | |
| 231 for (unsigned int i = start + 1; i < end; i++) | |
| 232 { | |
| 233 /* """ | |
| 234 * When a COENG + (Cons | IndV) combination are found (and subscript count | |
| 235 * is less than two) the character combination is handled according to the | |
| 236 * subscript type of the character following the COENG. | |
| 237 * | |
| 238 * ... | |
| 239 * | |
| 240 * Subscript Type 2 - The COENG + RO characters are reordered to immediately | |
| 241 * before the base glyph. Then the COENG + RO characters are assigned to have | |
| 242 * the 'pref' OpenType feature applied to them. | |
| 243 * """ | |
| 244 */ | |
| 245 if (info[i].khmer_category() == K_Cat(H) && num_coengs <= 2 && i + 1 < end) | |
| 246 { | |
| 247 num_coengs++; | |
| 248 | |
| 249 if (info[i + 1].khmer_category() == K_Cat(Ra)) | |
| 250 { | |
| 251 for (unsigned int j = 0; j < 2; j++) | |
| 252 info[i + j].mask |= khmer_plan->mask_array[KHMER_PREF]; | |
| 253 | |
| 254 /* Move the Coeng,Ro sequence to the start. */ | |
| 255 buffer->merge_clusters (start, i + 2); | |
| 256 hb_glyph_info_t t0 = info[i]; | |
| 257 hb_glyph_info_t t1 = info[i + 1]; | |
| 258 memmove (&info[start + 2], &info[start], (i - start) * sizeof (info[0])); | |
| 259 info[start] = t0; | |
| 260 info[start + 1] = t1; | |
| 261 | |
| 262 /* Mark the subsequent stuff with 'cfar'. Used in Khmer. | |
| 263 * Read the feature spec. | |
| 264 * This allows distinguishing the following cases with MS Khmer fonts: | |
| 265 * U+1784,U+17D2,U+179A,U+17D2,U+1782 | |
| 266 * U+1784,U+17D2,U+1782,U+17D2,U+179A | |
| 267 */ | |
| 268 if (khmer_plan->mask_array[KHMER_CFAR]) | |
| 269 for (unsigned int j = i + 2; j < end; j++) | |
| 270 info[j].mask |= khmer_plan->mask_array[KHMER_CFAR]; | |
| 271 | |
| 272 num_coengs = 2; /* Done. */ | |
| 273 } | |
| 274 } | |
| 275 | |
| 276 /* Reorder left matra piece. */ | |
| 277 else if (info[i].khmer_category() == K_Cat(VPre)) | |
| 278 { | |
| 279 /* Move to the start. */ | |
| 280 buffer->merge_clusters (start, i + 1); | |
| 281 hb_glyph_info_t t = info[i]; | |
| 282 memmove (&info[start + 1], &info[start], (i - start) * sizeof (info[0])); | |
| 283 info[start] = t; | |
| 284 } | |
| 285 } | |
| 286 } | |
| 287 | |
| 288 static void | |
| 289 reorder_syllable_khmer (const hb_ot_shape_plan_t *plan, | |
| 290 hb_face_t *face, | |
| 291 hb_buffer_t *buffer, | |
| 292 unsigned int start, unsigned int end) | |
| 293 { | |
| 294 khmer_syllable_type_t syllable_type = (khmer_syllable_type_t) (buffer->info[start].syllable() & 0x0F); | |
| 295 switch (syllable_type) | |
| 296 { | |
| 297 case khmer_broken_cluster: /* We already inserted dotted-circles, so just call the consonant_syllable. */ | |
| 298 case khmer_consonant_syllable: | |
| 299 reorder_consonant_syllable (plan, face, buffer, start, end); | |
| 300 break; | |
| 301 | |
| 302 case khmer_non_khmer_cluster: | |
| 303 break; | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 static bool | |
| 308 reorder_khmer (const hb_ot_shape_plan_t *plan, | |
| 309 hb_font_t *font, | |
| 310 hb_buffer_t *buffer) | |
| 311 { | |
| 312 bool ret = false; | |
| 313 if (buffer->message (font, "start reordering khmer")) | |
| 314 { | |
| 315 if (hb_syllabic_insert_dotted_circles (font, buffer, | |
| 316 khmer_broken_cluster, | |
| 317 K_Cat(DOTTEDCIRCLE), | |
| 318 (unsigned) -1)) | |
| 319 ret = true; | |
| 320 | |
| 321 foreach_syllable (buffer, start, end) | |
| 322 reorder_syllable_khmer (plan, font->face, buffer, start, end); | |
| 323 (void) buffer->message (font, "end reordering khmer"); | |
| 324 } | |
| 325 HB_BUFFER_DEALLOCATE_VAR (buffer, khmer_category); | |
| 326 | |
| 327 return ret; | |
| 328 } | |
| 329 | |
| 330 | |
| 331 static bool | |
| 332 decompose_khmer (const hb_ot_shape_normalize_context_t *c, | |
| 333 hb_codepoint_t ab, | |
| 334 hb_codepoint_t *a, | |
| 335 hb_codepoint_t *b) | |
| 336 { | |
| 337 switch (ab) | |
| 338 { | |
| 339 /* | |
| 340 * Decompose split matras that don't have Unicode decompositions. | |
| 341 */ | |
| 342 | |
| 343 /* Khmer */ | |
| 344 case 0x17BEu : *a = 0x17C1u; *b= 0x17BEu; return true; | |
| 345 case 0x17BFu : *a = 0x17C1u; *b= 0x17BFu; return true; | |
| 346 case 0x17C0u : *a = 0x17C1u; *b= 0x17C0u; return true; | |
| 347 case 0x17C4u : *a = 0x17C1u; *b= 0x17C4u; return true; | |
| 348 case 0x17C5u : *a = 0x17C1u; *b= 0x17C5u; return true; | |
| 349 } | |
| 350 | |
| 351 return (bool) c->unicode->decompose (ab, a, b); | |
| 352 } | |
| 353 | |
| 354 static bool | |
| 355 compose_khmer (const hb_ot_shape_normalize_context_t *c, | |
| 356 hb_codepoint_t a, | |
| 357 hb_codepoint_t b, | |
| 358 hb_codepoint_t *ab) | |
| 359 { | |
| 360 /* Avoid recomposing split matras. */ | |
| 361 if (HB_UNICODE_GENERAL_CATEGORY_IS_MARK (c->unicode->general_category (a))) | |
| 362 return false; | |
| 363 | |
| 364 return (bool) c->unicode->compose (a, b, ab); | |
| 365 } | |
| 366 | |
| 367 | |
| 368 const hb_ot_shaper_t _hb_ot_shaper_khmer = | |
| 369 { | |
| 370 collect_features_khmer, | |
| 371 override_features_khmer, | |
| 372 data_create_khmer, | |
| 373 data_destroy_khmer, | |
| 374 nullptr, /* preprocess_text */ | |
| 375 nullptr, /* postprocess_glyphs */ | |
| 376 decompose_khmer, | |
| 377 compose_khmer, | |
| 378 setup_masks_khmer, | |
| 379 nullptr, /* reorder_marks */ | |
| 380 HB_TAG_NONE, /* gpos_tag */ | |
| 381 HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, | |
| 382 HB_OT_SHAPE_ZERO_WIDTH_MARKS_NONE, | |
| 383 false, /* fallback_position */ | |
| 384 }; | |
| 385 | |
| 386 | |
| 387 #endif |
