Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/leptonica/src/hashmap.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/leptonica/src/hashmap.c Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,295 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/* + * \file hashmap.c + * <pre> + * + * Hashmap creation, destruction + * L_HASHMAP *l_hmapCreate() + * void l_hmapDestroy() + * + * Hashmap: Accessors and modifiers + * L_HASHITEM *l_hmapLookup() + * l_int32 l_hmapRehash() + * + * (1) See also hashmap.h for a brief description of the design. + * (2) In a typical use, a set of objects (in an array or associated + * with image pixels) is represented by a hashmap. A uint64 key is + * produced for each object. This integer is then hashed into an + * index in a hashtable, using the mod function with the table size + * which is a prime number. Each entry in the hash table is a list + * of hash items. In lookup, the appropriate list is traversed, + * looking for the object key found earlier. + * (3) Hash functions that map points, strings and float64 to uint64 + * are given in utils1.c. + * (4) Use of the hashmap on points, strings and float64 data are + * given in ptafunc2.c, sarray2.c and dnafunc1.c. + * (5) Useful rule of thumb for hashing collisions: + * For a random hashing function (say, from strings to l_uint64), + * the probability of a collision increases as N^2 for N much + * less than 2^32. The quadratic behavior switches over to + * approaching 1.0 around 2^32, which is the square root of 2^64. + * So, for example, if you have 10^7 strings, the probability + * of a single collision using an l_uint64 key is on the order of + * (10^7/4x10^9)^2 ~ 10^-5. + * For a million strings, collisions are very rare (~10^-7 probability). + * </pre> + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include "allheaders.h" + + /* Limit on the hashtable size */ +static const l_uint32 MaxTabsize = 50000000; + /* Default values for creating the hashmap. */ +static const l_int32 DefaultInitNItems = 2000; +static const l_int32 DefaultMaxOcc = 2; + +/*--------------------------------------------------------------------------* + * Hashmap creation and destruction * + *--------------------------------------------------------------------------*/ +/*! + * \brief l_hmapCreate() + * + * \param[in] ninit initial estimate of the number of items to be stored; + * use 0 for default value. + * \param[in] maxocc max average occupancy of each list of hashitme; + * it should be in range [1 ... 5]; use 0 for default + * \return ptr to new hashmap, or NULL on error + * + * <pre> + * Notes: + * (1) If the maximum number n of items to be hashed is known in + * advance, suggested values are: + * %nint = 0.51 * n + * %maxocc = 2 + * With these values, the table will not need to be rehashed, + * even if all items have unique keys. + * (2) The actual initial size of the hashtab is the first prime + * number larger than %ninit/%maxocc. + * (3) Each entry into the hashtab points to alist of hash items + * (key, val, count). + * </pre> + */ +L_HASHMAP * +l_hmapCreate(l_int32 ninit, + l_int32 maxocc) +{ +l_uint32 size, tabsize; +L_HASHMAP *hmap; + + ninit = L_MAX(ninit, DefaultInitNItems); + if (maxocc <= 0) maxocc = DefaultMaxOcc; + if (maxocc > 5) { + L_WARNING("maxocc = %d; non-optimal value. Set to default = %d\n", + __func__, maxocc, DefaultMaxOcc); + maxocc = DefaultMaxOcc; + } + size = ninit / maxocc; + if (size > MaxTabsize) { + L_ERROR("ninit/maxocc = %d > MaxTabsize = %d\n", __func__, + size, MaxTabsize); + return NULL; + } + + hmap = (L_HASHMAP *)LEPT_CALLOC(1, sizeof(L_HASHMAP)); + findNextLargerPrime(size, &tabsize); /* at least 101 */ + if ((hmap->hashtab = + (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *))) == NULL) { + LEPT_FREE(hmap); + return (L_HASHMAP *)ERROR_PTR("hashtab not made", __func__, NULL); + } + + hmap->nitems = 0; + hmap->ntogo = ninit; + hmap->maxocc = maxocc; + hmap->tabsize = tabsize; + return hmap; +} + + +/*! + * \brief l_hmapDestroy() + * + * \param[in,out] phmap to be nulled, if it exists + * \return void + */ +void +l_hmapDestroy(L_HASHMAP **phmap) +{ +l_int32 i; +L_HASHITEM *hitem, *next; +L_HASHMAP *hmap; + + if (phmap == NULL) { + L_WARNING("ptr address is NULL!\n", __func__); + return; + } + + if ((hmap = *phmap) == NULL) + return; + + for (i = 0; i < hmap->tabsize; i++) { + for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) { + next = hitem->next; + LEPT_FREE(hitem); + } + } + LEPT_FREE(hmap->hashtab); + LEPT_FREE(hmap); + *phmap = NULL; +} + + +/*--------------------------------------------------------------------------* + * Hashmap: Accessors and modifiers * + *--------------------------------------------------------------------------*/ +/*! + * \brief l_hmapLookup() + * + * \param[in] hmap + * \param[in] key to be hashed into an index in the hashtab + * \param[in] val to be stored in the hitem if creating it + * \param[in] op L_HMAP_CHECK, L_HMAP_CREATE + * \return ptr hitem; or null either on error or if not found + * with op == L_HMAP_CHECK. + * + * <pre> + * Notes: + * (1) This lookup function will also create a new hitem if requested. + * (2) The %op parameter does the following: + * op == L_HMAP_CHECK: return the hitem, or null if not found + * op == L_HMAP_CREATE: if found, increment the count; otherwise, make + * and store a new hitem; always return the hitem. + * (3) The key is a uint64. It is made by hashing some data in the object. + * (4) The value is an index into an array of the objects from which + * the hashtable has been constructed. + * (5) If an hitem is found, a pointer to it is returned. It is owned + * by the hashtable; do not destroy it. + * </pre> + */ +L_HASHITEM * +l_hmapLookup(L_HASHMAP *hmap, + l_uint64 key, + l_uint64 val, + l_int32 op) +{ +l_uint32 index; +L_HASHITEM *hlist, *hitem; + + if (!hmap) + return (L_HASHITEM *)ERROR_PTR("hmap not defined", __func__, NULL); + if (op != L_HMAP_CHECK && op != L_HMAP_CREATE) + return (L_HASHITEM *)ERROR_PTR("invalid op", __func__, NULL); + + /* If found, return a ptr to the hitem (not a copy) */ + index = key % hmap->tabsize; /* into hashtab */ + hlist = hmap->hashtab[index]; /* head of the list */ + for (hitem = hlist; hitem != NULL; hitem = hitem->next) { + if (key == hitem->key) { + if (op == L_HMAP_CREATE) hitem->count++; + return hitem; + } + } + if (op == L_HMAP_CHECK) return NULL; + + /* Not found and %op == L_HMAP_CREATE. + * Make a new hitem and add to the head of the list */ + hitem = (L_HASHITEM *)LEPT_CALLOC(1, sizeof(L_HASHITEM)); + hitem->key = key; + hitem->val = val; + hitem->count = 1; + hitem->next = hlist; + hmap->hashtab[index] = hitem; + hmap->nitems++; + hmap->ntogo--; + + /* If hmap is full based on average occupancy, rehash */ + if (hmap->ntogo == 0) + l_hmapRehash(hmap); + + return hitem; +} + + +/*! + * \brief l_hmapRehash() + * + * \param[in] hmap + * \return 0 if OK; 1 on error + * + * <pre> + * Notes: + * (1) This is called when the average occupancy reaches maxocc. + * It doubles the size of the hashtab, and reuses the hash items. + * </pre> + */ +l_ok +l_hmapRehash(L_HASHMAP *hmap) +{ +l_int32 i, index; +l_uint32 tabsize; +L_HASHITEM *hstore, *hitem, *next; + + if (!hmap) + return ERROR_INT("hmap not defined", __func__, 1); + + /* Put hash items in temporary storage in a single list, + * successively adding each to the list head. */ + hstore = NULL; /* ptr to resulting list */ + for (i = 0; i < hmap->tabsize; i++) { + for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) { + next = hitem->next; + hitem->next = hstore; + hstore = hitem; + } + } + + /* Destroy the old hashtab and make a new one that is twice as big */ + LEPT_FREE(hmap->hashtab); + findNextLargerPrime(2 * hmap->tabsize, &tabsize); + hmap->tabsize = tabsize; + hmap->hashtab = (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *)); + if (hmap->hashtab == NULL) { + hmap->tabsize = 0; + return ERROR_INT("hashtab ptr array not made", __func__, 1); + } + hmap->ntogo = hmap->maxocc * tabsize - hmap->nitems; + + /* Populate with the stored hash items */ + for (hitem = hstore; hitem != NULL; hitem = next) { + next = hitem->next; + index = hitem->key % tabsize; /* into new hashtab */ + hitem->next = hmap->hashtab[index]; /* link to head of existing list */ + hmap->hashtab[index] = hitem; /* put at head */ + } + + return 0; +}
