Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/harfbuzz/src/hb-repacker.hh @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright © 2020 Google, Inc. | |
| 3 * | |
| 4 * This is part of HarfBuzz, a text shaping library. | |
| 5 * | |
| 6 * Permission is hereby granted, without written agreement and without | |
| 7 * license or royalty fees, to use, copy, modify, and distribute this | |
| 8 * software and its documentation for any purpose, provided that the | |
| 9 * above copyright notice and the following two paragraphs appear in | |
| 10 * all copies of this software. | |
| 11 * | |
| 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
| 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
| 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
| 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
| 16 * DAMAGE. | |
| 17 * | |
| 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
| 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
| 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
| 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
| 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
| 23 * | |
| 24 * Google Author(s): Garret Rieger | |
| 25 */ | |
| 26 | |
| 27 #ifndef HB_REPACKER_HH | |
| 28 #define HB_REPACKER_HH | |
| 29 | |
| 30 #include "hb-open-type.hh" | |
| 31 #include "hb-map.hh" | |
| 32 #include "hb-vector.hh" | |
| 33 #include "graph/graph.hh" | |
| 34 #include "graph/gsubgpos-graph.hh" | |
| 35 #include "graph/serialize.hh" | |
| 36 | |
| 37 using graph::graph_t; | |
| 38 | |
| 39 /* | |
| 40 * For a detailed writeup on the overflow resolution algorithm see: | |
| 41 * docs/repacker.md | |
| 42 */ | |
| 43 | |
| 44 struct lookup_size_t | |
| 45 { | |
| 46 unsigned lookup_index; | |
| 47 size_t size; | |
| 48 unsigned num_subtables; | |
| 49 | |
| 50 static int cmp (const void* a, const void* b) | |
| 51 { | |
| 52 return cmp ((const lookup_size_t*) a, | |
| 53 (const lookup_size_t*) b); | |
| 54 } | |
| 55 | |
| 56 static int cmp (const lookup_size_t* a, const lookup_size_t* b) | |
| 57 { | |
| 58 double subtables_per_byte_a = (double) a->num_subtables / (double) a->size; | |
| 59 double subtables_per_byte_b = (double) b->num_subtables / (double) b->size; | |
| 60 if (subtables_per_byte_a == subtables_per_byte_b) { | |
| 61 return b->lookup_index - a->lookup_index; | |
| 62 } | |
| 63 | |
| 64 double cmp = subtables_per_byte_b - subtables_per_byte_a; | |
| 65 if (cmp < 0) return -1; | |
| 66 if (cmp > 0) return 1; | |
| 67 return 0; | |
| 68 } | |
| 69 }; | |
| 70 | |
| 71 static inline | |
| 72 bool _presplit_subtables_if_needed (graph::gsubgpos_graph_context_t& ext_context) | |
| 73 { | |
| 74 // For each lookup this will check the size of subtables and split them as needed | |
| 75 // so that no subtable is at risk of overflowing. (where we support splitting for | |
| 76 // that subtable type). | |
| 77 // | |
| 78 // TODO(grieger): de-dup newly added nodes as necessary. Probably just want a full de-dup | |
| 79 // pass after this processing is done. Not super necessary as splits are | |
| 80 // only done where overflow is likely, so de-dup probably will get undone | |
| 81 // later anyways. | |
| 82 for (unsigned lookup_index : ext_context.lookups.keys ()) | |
| 83 { | |
| 84 graph::Lookup* lookup = ext_context.lookups.get(lookup_index); | |
| 85 if (!lookup->split_subtables_if_needed (ext_context, lookup_index)) | |
| 86 return false; | |
| 87 } | |
| 88 | |
| 89 return true; | |
| 90 } | |
| 91 | |
| 92 /* | |
| 93 * Analyze the lookups in a GSUB/GPOS table and decide if any should be promoted | |
| 94 * to extension lookups. | |
| 95 */ | |
| 96 static inline | |
| 97 bool _promote_extensions_if_needed (graph::gsubgpos_graph_context_t& ext_context) | |
| 98 { | |
| 99 // Simple Algorithm (v1, current): | |
| 100 // 1. Calculate how many bytes each non-extension lookup consumes. | |
| 101 // 2. Select up to 64k of those to remain as non-extension (greedy, highest subtables per byte first) | |
| 102 // 3. Promote the rest. | |
| 103 // | |
| 104 // Advanced Algorithm (v2, not implemented): | |
| 105 // 1. Perform connected component analysis using lookups as roots. | |
| 106 // 2. Compute size of each connected component. | |
| 107 // 3. Select up to 64k worth of connected components to remain as non-extensions. | |
| 108 // (greedy, highest subtables per byte first) | |
| 109 // 4. Promote the rest. | |
| 110 | |
| 111 // TODO(garretrieger): support extension demotion, then consider all lookups. Requires advanced algo. | |
| 112 // TODO(garretrieger): also support extension promotion during iterative resolution phase, then | |
| 113 // we can use a less conservative threshold here. | |
| 114 // TODO(grieger): skip this for the 24 bit case. | |
| 115 if (!ext_context.lookups) return true; | |
| 116 | |
| 117 hb_vector_t<lookup_size_t> lookup_sizes; | |
| 118 lookup_sizes.alloc (ext_context.lookups.get_population ()); | |
| 119 | |
| 120 for (unsigned lookup_index : ext_context.lookups.keys ()) | |
| 121 { | |
| 122 const graph::Lookup* lookup = ext_context.lookups.get(lookup_index); | |
| 123 hb_set_t visited; | |
| 124 lookup_sizes.push (lookup_size_t { | |
| 125 lookup_index, | |
| 126 ext_context.graph.find_subgraph_size (lookup_index, visited), | |
| 127 lookup->number_of_subtables (), | |
| 128 }); | |
| 129 } | |
| 130 | |
| 131 lookup_sizes.qsort (); | |
| 132 | |
| 133 size_t lookup_list_size = ext_context.graph.vertices_[ext_context.lookup_list_index].table_size (); | |
| 134 size_t l2_l3_size = lookup_list_size; // Lookup List + Lookups | |
| 135 size_t l3_l4_size = 0; // Lookups + SubTables | |
| 136 size_t l4_plus_size = 0; // SubTables + their descendants | |
| 137 | |
| 138 // Start by assuming all lookups are using extension subtables, this size will be removed later | |
| 139 // if it's decided to not make a lookup extension. | |
| 140 for (auto p : lookup_sizes) | |
| 141 { | |
| 142 unsigned subtables_size = p.num_subtables * 8; | |
| 143 l3_l4_size += subtables_size; | |
| 144 l4_plus_size += subtables_size; | |
| 145 } | |
| 146 | |
| 147 bool layers_full = false; | |
| 148 for (auto p : lookup_sizes) | |
| 149 { | |
| 150 const graph::Lookup* lookup = ext_context.lookups.get(p.lookup_index); | |
| 151 if (lookup->is_extension (ext_context.table_tag)) | |
| 152 // already an extension so size is counted by the loop above. | |
| 153 continue; | |
| 154 | |
| 155 if (!layers_full) | |
| 156 { | |
| 157 size_t lookup_size = ext_context.graph.vertices_[p.lookup_index].table_size (); | |
| 158 hb_set_t visited; | |
| 159 size_t subtables_size = ext_context.graph.find_subgraph_size (p.lookup_index, visited, 1) - lookup_size; | |
| 160 size_t remaining_size = p.size - subtables_size - lookup_size; | |
| 161 | |
| 162 l2_l3_size += lookup_size; | |
| 163 l3_l4_size += lookup_size + subtables_size; | |
| 164 l3_l4_size -= p.num_subtables * 8; | |
| 165 l4_plus_size += subtables_size + remaining_size; | |
| 166 | |
| 167 if (l2_l3_size < (1 << 16) | |
| 168 && l3_l4_size < (1 << 16) | |
| 169 && l4_plus_size < (1 << 16)) continue; // this lookup fits within all layers groups | |
| 170 | |
| 171 layers_full = true; | |
| 172 } | |
| 173 | |
| 174 if (!ext_context.lookups.get(p.lookup_index)->make_extension (ext_context, p.lookup_index)) | |
| 175 return false; | |
| 176 } | |
| 177 | |
| 178 return true; | |
| 179 } | |
| 180 | |
| 181 static inline | |
| 182 bool _try_isolating_subgraphs (const hb_vector_t<graph::overflow_record_t>& overflows, | |
| 183 graph_t& sorted_graph) | |
| 184 { | |
| 185 unsigned space = 0; | |
| 186 hb_set_t roots_to_isolate; | |
| 187 | |
| 188 for (int i = overflows.length - 1; i >= 0; i--) | |
| 189 { | |
| 190 const graph::overflow_record_t& r = overflows[i]; | |
| 191 | |
| 192 unsigned root; | |
| 193 unsigned overflow_space = sorted_graph.space_for (r.parent, &root); | |
| 194 if (!overflow_space) continue; | |
| 195 if (sorted_graph.num_roots_for_space (overflow_space) <= 1) continue; | |
| 196 | |
| 197 if (!space) { | |
| 198 space = overflow_space; | |
| 199 } | |
| 200 | |
| 201 if (space == overflow_space) | |
| 202 roots_to_isolate.add(root); | |
| 203 } | |
| 204 | |
| 205 if (!roots_to_isolate) return false; | |
| 206 | |
| 207 unsigned maximum_to_move = hb_max ((sorted_graph.num_roots_for_space (space) / 2u), 1u); | |
| 208 if (roots_to_isolate.get_population () > maximum_to_move) { | |
| 209 // Only move at most half of the roots in a space at a time. | |
| 210 unsigned extra = roots_to_isolate.get_population () - maximum_to_move; | |
| 211 while (extra--) { | |
| 212 uint32_t root = HB_SET_VALUE_INVALID; | |
| 213 roots_to_isolate.previous (&root); | |
| 214 roots_to_isolate.del (root); | |
| 215 } | |
| 216 } | |
| 217 | |
| 218 DEBUG_MSG (SUBSET_REPACK, nullptr, | |
| 219 "Overflow in space %d (%d roots). Moving %d roots to space %d.", | |
| 220 space, | |
| 221 sorted_graph.num_roots_for_space (space), | |
| 222 roots_to_isolate.get_population (), | |
| 223 sorted_graph.next_space ()); | |
| 224 | |
| 225 sorted_graph.isolate_subgraph (roots_to_isolate); | |
| 226 sorted_graph.move_to_new_space (roots_to_isolate); | |
| 227 | |
| 228 return true; | |
| 229 } | |
| 230 | |
| 231 static inline | |
| 232 bool _process_overflows (const hb_vector_t<graph::overflow_record_t>& overflows, | |
| 233 hb_set_t& priority_bumped_parents, | |
| 234 graph_t& sorted_graph) | |
| 235 { | |
| 236 bool resolution_attempted = false; | |
| 237 | |
| 238 // Try resolving the furthest overflows first. | |
| 239 for (int i = overflows.length - 1; i >= 0; i--) | |
| 240 { | |
| 241 const graph::overflow_record_t& r = overflows[i]; | |
| 242 const auto& child = sorted_graph.vertices_[r.child]; | |
| 243 if (child.is_shared ()) | |
| 244 { | |
| 245 // The child object is shared, we may be able to eliminate the overflow | |
| 246 // by duplicating it. | |
| 247 if (sorted_graph.duplicate (r.parent, r.child) == (unsigned) -1) continue; | |
| 248 return true; | |
| 249 } | |
| 250 | |
| 251 if (child.is_leaf () && !priority_bumped_parents.has (r.parent)) | |
| 252 { | |
| 253 // This object is too far from it's parent, attempt to move it closer. | |
| 254 // | |
| 255 // TODO(garretrieger): initially limiting this to leaf's since they can be | |
| 256 // moved closer with fewer consequences. However, this can | |
| 257 // likely can be used for non-leafs as well. | |
| 258 // TODO(garretrieger): also try lowering priority of the parent. Make it | |
| 259 // get placed further up in the ordering, closer to it's children. | |
| 260 // this is probably preferable if the total size of the parent object | |
| 261 // is < then the total size of the children (and the parent can be moved). | |
| 262 // Since in that case moving the parent will cause a smaller increase in | |
| 263 // the length of other offsets. | |
| 264 if (sorted_graph.raise_childrens_priority (r.parent)) { | |
| 265 priority_bumped_parents.add (r.parent); | |
| 266 resolution_attempted = true; | |
| 267 } | |
| 268 continue; | |
| 269 } | |
| 270 | |
| 271 // TODO(garretrieger): add additional offset resolution strategies | |
| 272 // - Promotion to extension lookups. | |
| 273 // - Table splitting. | |
| 274 } | |
| 275 | |
| 276 return resolution_attempted; | |
| 277 } | |
| 278 | |
| 279 inline bool | |
| 280 hb_resolve_graph_overflows (hb_tag_t table_tag, | |
| 281 unsigned max_rounds , | |
| 282 bool recalculate_extensions, | |
| 283 graph_t& sorted_graph /* IN/OUT */) | |
| 284 { | |
| 285 sorted_graph.sort_shortest_distance (); | |
| 286 if (sorted_graph.in_error ()) | |
| 287 { | |
| 288 DEBUG_MSG (SUBSET_REPACK, nullptr, "Sorted graph in error state after initial sort."); | |
| 289 return false; | |
| 290 } | |
| 291 | |
| 292 bool will_overflow = graph::will_overflow (sorted_graph); | |
| 293 if (!will_overflow) | |
| 294 return true; | |
| 295 | |
| 296 graph::gsubgpos_graph_context_t ext_context (table_tag, sorted_graph); | |
| 297 if ((table_tag == HB_OT_TAG_GPOS | |
| 298 || table_tag == HB_OT_TAG_GSUB) | |
| 299 && will_overflow) | |
| 300 { | |
| 301 if (recalculate_extensions) | |
| 302 { | |
| 303 DEBUG_MSG (SUBSET_REPACK, nullptr, "Splitting subtables if needed."); | |
| 304 if (!_presplit_subtables_if_needed (ext_context)) { | |
| 305 DEBUG_MSG (SUBSET_REPACK, nullptr, "Subtable splitting failed."); | |
| 306 return false; | |
| 307 } | |
| 308 | |
| 309 DEBUG_MSG (SUBSET_REPACK, nullptr, "Promoting lookups to extensions if needed."); | |
| 310 if (!_promote_extensions_if_needed (ext_context)) { | |
| 311 DEBUG_MSG (SUBSET_REPACK, nullptr, "Extensions promotion failed."); | |
| 312 return false; | |
| 313 } | |
| 314 } | |
| 315 | |
| 316 DEBUG_MSG (SUBSET_REPACK, nullptr, "Assigning spaces to 32 bit subgraphs."); | |
| 317 if (sorted_graph.assign_spaces ()) | |
| 318 sorted_graph.sort_shortest_distance (); | |
| 319 else | |
| 320 sorted_graph.sort_shortest_distance_if_needed (); | |
| 321 } | |
| 322 | |
| 323 unsigned round = 0; | |
| 324 hb_vector_t<graph::overflow_record_t> overflows; | |
| 325 // TODO(garretrieger): select a good limit for max rounds. | |
| 326 while (!sorted_graph.in_error () | |
| 327 && graph::will_overflow (sorted_graph, &overflows) | |
| 328 && round < max_rounds) { | |
| 329 DEBUG_MSG (SUBSET_REPACK, nullptr, "=== Overflow resolution round %d ===", round); | |
| 330 print_overflows (sorted_graph, overflows); | |
| 331 | |
| 332 hb_set_t priority_bumped_parents; | |
| 333 | |
| 334 if (!_try_isolating_subgraphs (overflows, sorted_graph)) | |
| 335 { | |
| 336 // Don't count space isolation towards round limit. Only increment | |
| 337 // round counter if space isolation made no changes. | |
| 338 round++; | |
| 339 if (!_process_overflows (overflows, priority_bumped_parents, sorted_graph)) | |
| 340 { | |
| 341 DEBUG_MSG (SUBSET_REPACK, nullptr, "No resolution available :("); | |
| 342 break; | |
| 343 } | |
| 344 } | |
| 345 | |
| 346 sorted_graph.sort_shortest_distance (); | |
| 347 } | |
| 348 | |
| 349 if (sorted_graph.in_error ()) | |
| 350 { | |
| 351 DEBUG_MSG (SUBSET_REPACK, nullptr, "Sorted graph in error state."); | |
| 352 return false; | |
| 353 } | |
| 354 | |
| 355 if (graph::will_overflow (sorted_graph)) | |
| 356 { | |
| 357 DEBUG_MSG (SUBSET_REPACK, nullptr, "Offset overflow resolution failed."); | |
| 358 return false; | |
| 359 } | |
| 360 | |
| 361 return true; | |
| 362 } | |
| 363 | |
| 364 /* | |
| 365 * Attempts to modify the topological sorting of the provided object graph to | |
| 366 * eliminate offset overflows in the links between objects of the graph. If a | |
| 367 * non-overflowing ordering is found the updated graph is serialized it into the | |
| 368 * provided serialization context. | |
| 369 * | |
| 370 * If necessary the structure of the graph may be modified in ways that do not | |
| 371 * affect the functionality of the graph. For example shared objects may be | |
| 372 * duplicated. | |
| 373 * | |
| 374 * For a detailed writeup describing how the algorithm operates see: | |
| 375 * docs/repacker.md | |
| 376 */ | |
| 377 template<typename T> | |
| 378 inline hb_blob_t* | |
| 379 hb_resolve_overflows (const T& packed, | |
| 380 hb_tag_t table_tag, | |
| 381 unsigned max_rounds = 20, | |
| 382 bool recalculate_extensions = false) { | |
| 383 graph_t sorted_graph (packed); | |
| 384 if (sorted_graph.in_error ()) | |
| 385 { | |
| 386 // Invalid graph definition. | |
| 387 return nullptr; | |
| 388 } | |
| 389 | |
| 390 if (!sorted_graph.is_fully_connected ()) | |
| 391 { | |
| 392 sorted_graph.print_orphaned_nodes (); | |
| 393 return nullptr; | |
| 394 } | |
| 395 | |
| 396 if (sorted_graph.in_error ()) | |
| 397 { | |
| 398 // Allocations failed somewhere | |
| 399 DEBUG_MSG (SUBSET_REPACK, nullptr, | |
| 400 "Graph is in error, likely due to a memory allocation error."); | |
| 401 return nullptr; | |
| 402 } | |
| 403 | |
| 404 if (!hb_resolve_graph_overflows (table_tag, max_rounds, recalculate_extensions, sorted_graph)) | |
| 405 return nullptr; | |
| 406 | |
| 407 return graph::serialize (sorted_graph); | |
| 408 } | |
| 409 | |
| 410 #endif /* HB_REPACKER_HH */ |
