Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/hashmap.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /* | |
| 28 * \file hashmap.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Hashmap creation, destruction | |
| 32 * L_HASHMAP *l_hmapCreate() | |
| 33 * void l_hmapDestroy() | |
| 34 * | |
| 35 * Hashmap: Accessors and modifiers | |
| 36 * L_HASHITEM *l_hmapLookup() | |
| 37 * l_int32 l_hmapRehash() | |
| 38 * | |
| 39 * (1) See also hashmap.h for a brief description of the design. | |
| 40 * (2) In a typical use, a set of objects (in an array or associated | |
| 41 * with image pixels) is represented by a hashmap. A uint64 key is | |
| 42 * produced for each object. This integer is then hashed into an | |
| 43 * index in a hashtable, using the mod function with the table size | |
| 44 * which is a prime number. Each entry in the hash table is a list | |
| 45 * of hash items. In lookup, the appropriate list is traversed, | |
| 46 * looking for the object key found earlier. | |
| 47 * (3) Hash functions that map points, strings and float64 to uint64 | |
| 48 * are given in utils1.c. | |
| 49 * (4) Use of the hashmap on points, strings and float64 data are | |
| 50 * given in ptafunc2.c, sarray2.c and dnafunc1.c. | |
| 51 * (5) Useful rule of thumb for hashing collisions: | |
| 52 * For a random hashing function (say, from strings to l_uint64), | |
| 53 * the probability of a collision increases as N^2 for N much | |
| 54 * less than 2^32. The quadratic behavior switches over to | |
| 55 * approaching 1.0 around 2^32, which is the square root of 2^64. | |
| 56 * So, for example, if you have 10^7 strings, the probability | |
| 57 * of a single collision using an l_uint64 key is on the order of | |
| 58 * (10^7/4x10^9)^2 ~ 10^-5. | |
| 59 * For a million strings, collisions are very rare (~10^-7 probability). | |
| 60 * </pre> | |
| 61 */ | |
| 62 | |
| 63 #ifdef HAVE_CONFIG_H | |
| 64 #include <config_auto.h> | |
| 65 #endif /* HAVE_CONFIG_H */ | |
| 66 | |
| 67 #include "allheaders.h" | |
| 68 | |
| 69 /* Limit on the hashtable size */ | |
| 70 static const l_uint32 MaxTabsize = 50000000; | |
| 71 /* Default values for creating the hashmap. */ | |
| 72 static const l_int32 DefaultInitNItems = 2000; | |
| 73 static const l_int32 DefaultMaxOcc = 2; | |
| 74 | |
| 75 /*--------------------------------------------------------------------------* | |
| 76 * Hashmap creation and destruction * | |
| 77 *--------------------------------------------------------------------------*/ | |
| 78 /*! | |
| 79 * \brief l_hmapCreate() | |
| 80 * | |
| 81 * \param[in] ninit initial estimate of the number of items to be stored; | |
| 82 * use 0 for default value. | |
| 83 * \param[in] maxocc max average occupancy of each list of hashitme; | |
| 84 * it should be in range [1 ... 5]; use 0 for default | |
| 85 * \return ptr to new hashmap, or NULL on error | |
| 86 * | |
| 87 * <pre> | |
| 88 * Notes: | |
| 89 * (1) If the maximum number n of items to be hashed is known in | |
| 90 * advance, suggested values are: | |
| 91 * %nint = 0.51 * n | |
| 92 * %maxocc = 2 | |
| 93 * With these values, the table will not need to be rehashed, | |
| 94 * even if all items have unique keys. | |
| 95 * (2) The actual initial size of the hashtab is the first prime | |
| 96 * number larger than %ninit/%maxocc. | |
| 97 * (3) Each entry into the hashtab points to alist of hash items | |
| 98 * (key, val, count). | |
| 99 * </pre> | |
| 100 */ | |
| 101 L_HASHMAP * | |
| 102 l_hmapCreate(l_int32 ninit, | |
| 103 l_int32 maxocc) | |
| 104 { | |
| 105 l_uint32 size, tabsize; | |
| 106 L_HASHMAP *hmap; | |
| 107 | |
| 108 ninit = L_MAX(ninit, DefaultInitNItems); | |
| 109 if (maxocc <= 0) maxocc = DefaultMaxOcc; | |
| 110 if (maxocc > 5) { | |
| 111 L_WARNING("maxocc = %d; non-optimal value. Set to default = %d\n", | |
| 112 __func__, maxocc, DefaultMaxOcc); | |
| 113 maxocc = DefaultMaxOcc; | |
| 114 } | |
| 115 size = ninit / maxocc; | |
| 116 if (size > MaxTabsize) { | |
| 117 L_ERROR("ninit/maxocc = %d > MaxTabsize = %d\n", __func__, | |
| 118 size, MaxTabsize); | |
| 119 return NULL; | |
| 120 } | |
| 121 | |
| 122 hmap = (L_HASHMAP *)LEPT_CALLOC(1, sizeof(L_HASHMAP)); | |
| 123 findNextLargerPrime(size, &tabsize); /* at least 101 */ | |
| 124 if ((hmap->hashtab = | |
| 125 (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *))) == NULL) { | |
| 126 LEPT_FREE(hmap); | |
| 127 return (L_HASHMAP *)ERROR_PTR("hashtab not made", __func__, NULL); | |
| 128 } | |
| 129 | |
| 130 hmap->nitems = 0; | |
| 131 hmap->ntogo = ninit; | |
| 132 hmap->maxocc = maxocc; | |
| 133 hmap->tabsize = tabsize; | |
| 134 return hmap; | |
| 135 } | |
| 136 | |
| 137 | |
| 138 /*! | |
| 139 * \brief l_hmapDestroy() | |
| 140 * | |
| 141 * \param[in,out] phmap to be nulled, if it exists | |
| 142 * \return void | |
| 143 */ | |
| 144 void | |
| 145 l_hmapDestroy(L_HASHMAP **phmap) | |
| 146 { | |
| 147 l_int32 i; | |
| 148 L_HASHITEM *hitem, *next; | |
| 149 L_HASHMAP *hmap; | |
| 150 | |
| 151 if (phmap == NULL) { | |
| 152 L_WARNING("ptr address is NULL!\n", __func__); | |
| 153 return; | |
| 154 } | |
| 155 | |
| 156 if ((hmap = *phmap) == NULL) | |
| 157 return; | |
| 158 | |
| 159 for (i = 0; i < hmap->tabsize; i++) { | |
| 160 for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) { | |
| 161 next = hitem->next; | |
| 162 LEPT_FREE(hitem); | |
| 163 } | |
| 164 } | |
| 165 LEPT_FREE(hmap->hashtab); | |
| 166 LEPT_FREE(hmap); | |
| 167 *phmap = NULL; | |
| 168 } | |
| 169 | |
| 170 | |
| 171 /*--------------------------------------------------------------------------* | |
| 172 * Hashmap: Accessors and modifiers * | |
| 173 *--------------------------------------------------------------------------*/ | |
| 174 /*! | |
| 175 * \brief l_hmapLookup() | |
| 176 * | |
| 177 * \param[in] hmap | |
| 178 * \param[in] key to be hashed into an index in the hashtab | |
| 179 * \param[in] val to be stored in the hitem if creating it | |
| 180 * \param[in] op L_HMAP_CHECK, L_HMAP_CREATE | |
| 181 * \return ptr hitem; or null either on error or if not found | |
| 182 * with op == L_HMAP_CHECK. | |
| 183 * | |
| 184 * <pre> | |
| 185 * Notes: | |
| 186 * (1) This lookup function will also create a new hitem if requested. | |
| 187 * (2) The %op parameter does the following: | |
| 188 * op == L_HMAP_CHECK: return the hitem, or null if not found | |
| 189 * op == L_HMAP_CREATE: if found, increment the count; otherwise, make | |
| 190 * and store a new hitem; always return the hitem. | |
| 191 * (3) The key is a uint64. It is made by hashing some data in the object. | |
| 192 * (4) The value is an index into an array of the objects from which | |
| 193 * the hashtable has been constructed. | |
| 194 * (5) If an hitem is found, a pointer to it is returned. It is owned | |
| 195 * by the hashtable; do not destroy it. | |
| 196 * </pre> | |
| 197 */ | |
| 198 L_HASHITEM * | |
| 199 l_hmapLookup(L_HASHMAP *hmap, | |
| 200 l_uint64 key, | |
| 201 l_uint64 val, | |
| 202 l_int32 op) | |
| 203 { | |
| 204 l_uint32 index; | |
| 205 L_HASHITEM *hlist, *hitem; | |
| 206 | |
| 207 if (!hmap) | |
| 208 return (L_HASHITEM *)ERROR_PTR("hmap not defined", __func__, NULL); | |
| 209 if (op != L_HMAP_CHECK && op != L_HMAP_CREATE) | |
| 210 return (L_HASHITEM *)ERROR_PTR("invalid op", __func__, NULL); | |
| 211 | |
| 212 /* If found, return a ptr to the hitem (not a copy) */ | |
| 213 index = key % hmap->tabsize; /* into hashtab */ | |
| 214 hlist = hmap->hashtab[index]; /* head of the list */ | |
| 215 for (hitem = hlist; hitem != NULL; hitem = hitem->next) { | |
| 216 if (key == hitem->key) { | |
| 217 if (op == L_HMAP_CREATE) hitem->count++; | |
| 218 return hitem; | |
| 219 } | |
| 220 } | |
| 221 if (op == L_HMAP_CHECK) return NULL; | |
| 222 | |
| 223 /* Not found and %op == L_HMAP_CREATE. | |
| 224 * Make a new hitem and add to the head of the list */ | |
| 225 hitem = (L_HASHITEM *)LEPT_CALLOC(1, sizeof(L_HASHITEM)); | |
| 226 hitem->key = key; | |
| 227 hitem->val = val; | |
| 228 hitem->count = 1; | |
| 229 hitem->next = hlist; | |
| 230 hmap->hashtab[index] = hitem; | |
| 231 hmap->nitems++; | |
| 232 hmap->ntogo--; | |
| 233 | |
| 234 /* If hmap is full based on average occupancy, rehash */ | |
| 235 if (hmap->ntogo == 0) | |
| 236 l_hmapRehash(hmap); | |
| 237 | |
| 238 return hitem; | |
| 239 } | |
| 240 | |
| 241 | |
| 242 /*! | |
| 243 * \brief l_hmapRehash() | |
| 244 * | |
| 245 * \param[in] hmap | |
| 246 * \return 0 if OK; 1 on error | |
| 247 * | |
| 248 * <pre> | |
| 249 * Notes: | |
| 250 * (1) This is called when the average occupancy reaches maxocc. | |
| 251 * It doubles the size of the hashtab, and reuses the hash items. | |
| 252 * </pre> | |
| 253 */ | |
| 254 l_ok | |
| 255 l_hmapRehash(L_HASHMAP *hmap) | |
| 256 { | |
| 257 l_int32 i, index; | |
| 258 l_uint32 tabsize; | |
| 259 L_HASHITEM *hstore, *hitem, *next; | |
| 260 | |
| 261 if (!hmap) | |
| 262 return ERROR_INT("hmap not defined", __func__, 1); | |
| 263 | |
| 264 /* Put hash items in temporary storage in a single list, | |
| 265 * successively adding each to the list head. */ | |
| 266 hstore = NULL; /* ptr to resulting list */ | |
| 267 for (i = 0; i < hmap->tabsize; i++) { | |
| 268 for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) { | |
| 269 next = hitem->next; | |
| 270 hitem->next = hstore; | |
| 271 hstore = hitem; | |
| 272 } | |
| 273 } | |
| 274 | |
| 275 /* Destroy the old hashtab and make a new one that is twice as big */ | |
| 276 LEPT_FREE(hmap->hashtab); | |
| 277 findNextLargerPrime(2 * hmap->tabsize, &tabsize); | |
| 278 hmap->tabsize = tabsize; | |
| 279 hmap->hashtab = (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *)); | |
| 280 if (hmap->hashtab == NULL) { | |
| 281 hmap->tabsize = 0; | |
| 282 return ERROR_INT("hashtab ptr array not made", __func__, 1); | |
| 283 } | |
| 284 hmap->ntogo = hmap->maxocc * tabsize - hmap->nitems; | |
| 285 | |
| 286 /* Populate with the stored hash items */ | |
| 287 for (hitem = hstore; hitem != NULL; hitem = next) { | |
| 288 next = hitem->next; | |
| 289 index = hitem->key % tabsize; /* into new hashtab */ | |
| 290 hitem->next = hmap->hashtab[index]; /* link to head of existing list */ | |
| 291 hmap->hashtab[index] = hitem; /* put at head */ | |
| 292 } | |
| 293 | |
| 294 return 0; | |
| 295 } |
