Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/harfbuzz/src/hb-sanitize.hh @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright © 2007,2008,2009,2010 Red Hat, Inc. | |
| 3 * Copyright © 2012,2018 Google, Inc. | |
| 4 * | |
| 5 * This is part of HarfBuzz, a text shaping library. | |
| 6 * | |
| 7 * Permission is hereby granted, without written agreement and without | |
| 8 * license or royalty fees, to use, copy, modify, and distribute this | |
| 9 * software and its documentation for any purpose, provided that the | |
| 10 * above copyright notice and the following two paragraphs appear in | |
| 11 * all copies of this software. | |
| 12 * | |
| 13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | |
| 14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | |
| 15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | |
| 16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | |
| 17 * DAMAGE. | |
| 18 * | |
| 19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | |
| 20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | |
| 21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS | |
| 22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | |
| 23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
| 24 * | |
| 25 * Red Hat Author(s): Behdad Esfahbod | |
| 26 * Google Author(s): Behdad Esfahbod | |
| 27 */ | |
| 28 | |
| 29 #ifndef HB_SANITIZE_HH | |
| 30 #define HB_SANITIZE_HH | |
| 31 | |
| 32 #include "hb.hh" | |
| 33 #include "hb-blob.hh" | |
| 34 #include "hb-dispatch.hh" | |
| 35 | |
| 36 | |
| 37 /* | |
| 38 * Sanitize | |
| 39 * | |
| 40 * | |
| 41 * === Introduction === | |
| 42 * | |
| 43 * The sanitize machinery is at the core of our zero-cost font loading. We | |
| 44 * mmap() font file into memory and create a blob out of it. Font subtables | |
| 45 * are returned as a readonly sub-blob of the main font blob. These table | |
| 46 * blobs are then sanitized before use, to ensure invalid memory access does | |
| 47 * not happen. The toplevel sanitize API use is like, eg. to load the 'head' | |
| 48 * table: | |
| 49 * | |
| 50 * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face); | |
| 51 * | |
| 52 * The blob then can be converted to a head table struct with: | |
| 53 * | |
| 54 * const head *head_table = head_blob->as<head> (); | |
| 55 * | |
| 56 * What the reference_table does is, to call hb_face_reference_table() to load | |
| 57 * the table blob, sanitize it and return either the sanitized blob, or empty | |
| 58 * blob if sanitization failed. The blob->as() function returns the null | |
| 59 * object of its template type argument if the blob is empty. Otherwise, it | |
| 60 * just casts the blob contents to the desired type. | |
| 61 * | |
| 62 * Sanitizing a blob of data with a type T works as follows (with minor | |
| 63 * simplification): | |
| 64 * | |
| 65 * - Cast blob content to T*, call sanitize() method of it, | |
| 66 * - If sanitize succeeded, return blob. | |
| 67 * - Otherwise, if blob is not writable, try making it writable, | |
| 68 * or copy if cannot be made writable in-place, | |
| 69 * - Call sanitize() again. Return blob if sanitize succeeded. | |
| 70 * - Return empty blob otherwise. | |
| 71 * | |
| 72 * | |
| 73 * === The sanitize() contract === | |
| 74 * | |
| 75 * The sanitize() method of each object type shall return true if it's safe to | |
| 76 * call other methods of the object, and %false otherwise. | |
| 77 * | |
| 78 * Note that what sanitize() checks for might align with what the specification | |
| 79 * describes as valid table data, but does not have to be. In particular, we | |
| 80 * do NOT want to be pedantic and concern ourselves with validity checks that | |
| 81 * are irrelevant to our use of the table. On the contrary, we want to be | |
| 82 * lenient with error handling and accept invalid data to the extent that it | |
| 83 * does not impose extra burden on us. | |
| 84 * | |
| 85 * Based on the sanitize contract, one can see that what we check for depends | |
| 86 * on how we use the data in other table methods. Ie. if other table methods | |
| 87 * assume that offsets do NOT point out of the table data block, then that's | |
| 88 * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On | |
| 89 * the other hand, if other methods do such checks themselves, then sanitize() | |
| 90 * does not have to bother with them (glyf/local work this way). The choice | |
| 91 * depends on the table structure and sanitize() performance. For example, to | |
| 92 * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard | |
| 93 * to avoid such costs during font loading. By postponing such checks to the | |
| 94 * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime | |
| 95 * cost to O(used-glyphs). As such, this is preferred. | |
| 96 * | |
| 97 * The same argument can be made re GSUB/GPOS/GDEF, but there, the table | |
| 98 * structure is so complicated that by checking all offsets at sanitize() time, | |
| 99 * we make the code much simpler in other methods, as offsets and referenced | |
| 100 * objects do not need to be validated at each use site. | |
| 101 */ | |
| 102 | |
| 103 /* This limits sanitizing time on really broken fonts. */ | |
| 104 #ifndef HB_SANITIZE_MAX_EDITS | |
| 105 #define HB_SANITIZE_MAX_EDITS 32 | |
| 106 #endif | |
| 107 #ifndef HB_SANITIZE_MAX_OPS_FACTOR | |
| 108 #define HB_SANITIZE_MAX_OPS_FACTOR 64 | |
| 109 #endif | |
| 110 #ifndef HB_SANITIZE_MAX_OPS_MIN | |
| 111 #define HB_SANITIZE_MAX_OPS_MIN 16384 | |
| 112 #endif | |
| 113 #ifndef HB_SANITIZE_MAX_OPS_MAX | |
| 114 #define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF | |
| 115 #endif | |
| 116 #ifndef HB_SANITIZE_MAX_SUBTABLES | |
| 117 #define HB_SANITIZE_MAX_SUBTABLES 0x4000 | |
| 118 #endif | |
| 119 | |
| 120 struct hb_sanitize_context_t : | |
| 121 hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE> | |
| 122 { | |
| 123 hb_sanitize_context_t () : | |
| 124 start (nullptr), end (nullptr), | |
| 125 max_ops (0), max_subtables (0), | |
| 126 recursion_depth (0), | |
| 127 writable (false), edit_count (0), | |
| 128 blob (nullptr), | |
| 129 num_glyphs (65536), | |
| 130 num_glyphs_set (false) {} | |
| 131 | |
| 132 const char *get_name () { return "SANITIZE"; } | |
| 133 template <typename T, typename F> | |
| 134 bool may_dispatch (const T *obj HB_UNUSED, const F *format) | |
| 135 { return format->sanitize (this); } | |
| 136 static return_t default_return_value () { return true; } | |
| 137 static return_t no_dispatch_return_value () { return false; } | |
| 138 bool stop_sublookup_iteration (const return_t r) const { return !r; } | |
| 139 | |
| 140 bool visit_subtables (unsigned count) | |
| 141 { | |
| 142 max_subtables += count; | |
| 143 return max_subtables < HB_SANITIZE_MAX_SUBTABLES; | |
| 144 } | |
| 145 | |
| 146 private: | |
| 147 template <typename T, typename ...Ts> auto | |
| 148 _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN | |
| 149 ( obj.sanitize (this, std::forward<Ts> (ds)...) ) | |
| 150 template <typename T, typename ...Ts> auto | |
| 151 _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN | |
| 152 ( obj.dispatch (this, std::forward<Ts> (ds)...) ) | |
| 153 public: | |
| 154 template <typename T, typename ...Ts> auto | |
| 155 dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN | |
| 156 ( _dispatch (obj, hb_prioritize, std::forward<Ts> (ds)...) ) | |
| 157 | |
| 158 | |
| 159 void init (hb_blob_t *b) | |
| 160 { | |
| 161 this->blob = hb_blob_reference (b); | |
| 162 this->writable = false; | |
| 163 } | |
| 164 | |
| 165 void set_num_glyphs (unsigned int num_glyphs_) | |
| 166 { | |
| 167 num_glyphs = num_glyphs_; | |
| 168 num_glyphs_set = true; | |
| 169 } | |
| 170 unsigned int get_num_glyphs () { return num_glyphs; } | |
| 171 | |
| 172 void set_max_ops (int max_ops_) { max_ops = max_ops_; } | |
| 173 | |
| 174 template <typename T> | |
| 175 void set_object (const T *obj) | |
| 176 { | |
| 177 reset_object (); | |
| 178 | |
| 179 if (!obj) return; | |
| 180 | |
| 181 const char *obj_start = (const char *) obj; | |
| 182 if (unlikely (obj_start < this->start || this->end <= obj_start)) | |
| 183 this->start = this->end = nullptr; | |
| 184 else | |
| 185 { | |
| 186 this->start = obj_start; | |
| 187 this->end = obj_start + hb_min (size_t (this->end - obj_start), obj->get_size ()); | |
| 188 } | |
| 189 } | |
| 190 | |
| 191 void reset_object () | |
| 192 { | |
| 193 this->start = this->blob->data; | |
| 194 this->end = this->start + this->blob->length; | |
| 195 assert (this->start <= this->end); /* Must not overflow. */ | |
| 196 } | |
| 197 | |
| 198 void start_processing () | |
| 199 { | |
| 200 reset_object (); | |
| 201 unsigned m; | |
| 202 if (unlikely (hb_unsigned_mul_overflows (this->end - this->start, HB_SANITIZE_MAX_OPS_FACTOR, &m))) | |
| 203 this->max_ops = HB_SANITIZE_MAX_OPS_MAX; | |
| 204 else | |
| 205 this->max_ops = hb_clamp (m, | |
| 206 (unsigned) HB_SANITIZE_MAX_OPS_MIN, | |
| 207 (unsigned) HB_SANITIZE_MAX_OPS_MAX); | |
| 208 this->edit_count = 0; | |
| 209 this->debug_depth = 0; | |
| 210 this->recursion_depth = 0; | |
| 211 | |
| 212 DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1, | |
| 213 "start [%p..%p] (%lu bytes)", | |
| 214 this->start, this->end, | |
| 215 (unsigned long) (this->end - this->start)); | |
| 216 } | |
| 217 | |
| 218 void end_processing () | |
| 219 { | |
| 220 DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1, | |
| 221 "end [%p..%p] %u edit requests", | |
| 222 this->start, this->end, this->edit_count); | |
| 223 | |
| 224 hb_blob_destroy (this->blob); | |
| 225 this->blob = nullptr; | |
| 226 this->start = this->end = nullptr; | |
| 227 } | |
| 228 | |
| 229 unsigned get_edit_count () { return edit_count; } | |
| 230 | |
| 231 bool check_range (const void *base, | |
| 232 unsigned int len) const | |
| 233 { | |
| 234 const char *p = (const char *) base; | |
| 235 bool ok = !len || | |
| 236 (this->start <= p && | |
| 237 p <= this->end && | |
| 238 (unsigned int) (this->end - p) >= len && | |
| 239 (this->max_ops -= len) > 0); | |
| 240 | |
| 241 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, | |
| 242 "check_range [%p..%p]" | |
| 243 " (%d bytes) in [%p..%p] -> %s", | |
| 244 p, p + len, len, | |
| 245 this->start, this->end, | |
| 246 ok ? "OK" : "OUT-OF-RANGE"); | |
| 247 | |
| 248 return likely (ok); | |
| 249 } | |
| 250 | |
| 251 template <typename T> | |
| 252 bool check_range (const T *base, | |
| 253 unsigned int a, | |
| 254 unsigned int b) const | |
| 255 { | |
| 256 unsigned m; | |
| 257 return !hb_unsigned_mul_overflows (a, b, &m) && | |
| 258 this->check_range (base, m); | |
| 259 } | |
| 260 | |
| 261 template <typename T> | |
| 262 bool check_range (const T *base, | |
| 263 unsigned int a, | |
| 264 unsigned int b, | |
| 265 unsigned int c) const | |
| 266 { | |
| 267 unsigned m; | |
| 268 return !hb_unsigned_mul_overflows (a, b, &m) && | |
| 269 this->check_range (base, m, c); | |
| 270 } | |
| 271 | |
| 272 template <typename T> | |
| 273 bool check_array (const T *base, unsigned int len) const | |
| 274 { | |
| 275 return this->check_range (base, len, hb_static_size (T)); | |
| 276 } | |
| 277 | |
| 278 template <typename T> | |
| 279 bool check_array (const T *base, | |
| 280 unsigned int a, | |
| 281 unsigned int b) const | |
| 282 { | |
| 283 return this->check_range (base, a, b, hb_static_size (T)); | |
| 284 } | |
| 285 | |
| 286 bool check_start_recursion (int max_depth) | |
| 287 { | |
| 288 if (unlikely (recursion_depth >= max_depth)) return false; | |
| 289 return ++recursion_depth; | |
| 290 } | |
| 291 | |
| 292 bool end_recursion (bool result) | |
| 293 { | |
| 294 recursion_depth--; | |
| 295 return result; | |
| 296 } | |
| 297 | |
| 298 template <typename Type> | |
| 299 bool check_struct (const Type *obj) const | |
| 300 { return likely (this->check_range (obj, obj->min_size)); } | |
| 301 | |
| 302 bool may_edit (const void *base, unsigned int len) | |
| 303 { | |
| 304 if (this->edit_count >= HB_SANITIZE_MAX_EDITS) | |
| 305 return false; | |
| 306 | |
| 307 const char *p = (const char *) base; | |
| 308 this->edit_count++; | |
| 309 | |
| 310 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0, | |
| 311 "may_edit(%u) [%p..%p] (%d bytes) in [%p..%p] -> %s", | |
| 312 this->edit_count, | |
| 313 p, p + len, len, | |
| 314 this->start, this->end, | |
| 315 this->writable ? "GRANTED" : "DENIED"); | |
| 316 | |
| 317 return this->writable; | |
| 318 } | |
| 319 | |
| 320 template <typename Type, typename ValueType> | |
| 321 bool try_set (const Type *obj, const ValueType &v) | |
| 322 { | |
| 323 if (this->may_edit (obj, hb_static_size (Type))) | |
| 324 { | |
| 325 * const_cast<Type *> (obj) = v; | |
| 326 return true; | |
| 327 } | |
| 328 return false; | |
| 329 } | |
| 330 | |
| 331 template <typename Type> | |
| 332 hb_blob_t *sanitize_blob (hb_blob_t *blob) | |
| 333 { | |
| 334 bool sane; | |
| 335 | |
| 336 init (blob); | |
| 337 | |
| 338 retry: | |
| 339 DEBUG_MSG_FUNC (SANITIZE, start, "start"); | |
| 340 | |
| 341 start_processing (); | |
| 342 | |
| 343 if (unlikely (!start)) | |
| 344 { | |
| 345 end_processing (); | |
| 346 return blob; | |
| 347 } | |
| 348 | |
| 349 Type *t = reinterpret_cast<Type *> (const_cast<char *> (start)); | |
| 350 | |
| 351 sane = t->sanitize (this); | |
| 352 if (sane) | |
| 353 { | |
| 354 if (edit_count) | |
| 355 { | |
| 356 DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %d edits; going for second round", edit_count); | |
| 357 | |
| 358 /* sanitize again to ensure no toe-stepping */ | |
| 359 edit_count = 0; | |
| 360 sane = t->sanitize (this); | |
| 361 if (edit_count) { | |
| 362 DEBUG_MSG_FUNC (SANITIZE, start, "requested %d edits in second round; FAILLING", edit_count); | |
| 363 sane = false; | |
| 364 } | |
| 365 } | |
| 366 } | |
| 367 else | |
| 368 { | |
| 369 if (edit_count && !writable) { | |
| 370 start = hb_blob_get_data_writable (blob, nullptr); | |
| 371 end = start + blob->length; | |
| 372 | |
| 373 if (start) | |
| 374 { | |
| 375 writable = true; | |
| 376 /* ok, we made it writable by relocating. try again */ | |
| 377 DEBUG_MSG_FUNC (SANITIZE, start, "retry"); | |
| 378 goto retry; | |
| 379 } | |
| 380 } | |
| 381 } | |
| 382 | |
| 383 end_processing (); | |
| 384 | |
| 385 DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED"); | |
| 386 if (sane) | |
| 387 { | |
| 388 hb_blob_make_immutable (blob); | |
| 389 return blob; | |
| 390 } | |
| 391 else | |
| 392 { | |
| 393 hb_blob_destroy (blob); | |
| 394 return hb_blob_get_empty (); | |
| 395 } | |
| 396 } | |
| 397 | |
| 398 template <typename Type> | |
| 399 hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag) | |
| 400 { | |
| 401 if (!num_glyphs_set) | |
| 402 set_num_glyphs (hb_face_get_glyph_count (face)); | |
| 403 return sanitize_blob<Type> (hb_face_reference_table (face, tableTag)); | |
| 404 } | |
| 405 | |
| 406 const char *start, *end; | |
| 407 mutable int max_ops, max_subtables; | |
| 408 private: | |
| 409 int recursion_depth; | |
| 410 bool writable; | |
| 411 unsigned int edit_count; | |
| 412 hb_blob_t *blob; | |
| 413 unsigned int num_glyphs; | |
| 414 bool num_glyphs_set; | |
| 415 }; | |
| 416 | |
| 417 struct hb_sanitize_with_object_t | |
| 418 { | |
| 419 template <typename T> | |
| 420 hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c) | |
| 421 { c->set_object (obj); } | |
| 422 ~hb_sanitize_with_object_t () | |
| 423 { c->reset_object (); } | |
| 424 | |
| 425 private: | |
| 426 hb_sanitize_context_t *c; | |
| 427 }; | |
| 428 | |
| 429 | |
| 430 #endif /* HB_SANITIZE_HH */ |
