comparison mupdf-source/thirdparty/leptonica/src/hashmap.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*
28 * \file hashmap.c
29 * <pre>
30 *
31 * Hashmap creation, destruction
32 * L_HASHMAP *l_hmapCreate()
33 * void l_hmapDestroy()
34 *
35 * Hashmap: Accessors and modifiers
36 * L_HASHITEM *l_hmapLookup()
37 * l_int32 l_hmapRehash()
38 *
39 * (1) See also hashmap.h for a brief description of the design.
40 * (2) In a typical use, a set of objects (in an array or associated
41 * with image pixels) is represented by a hashmap. A uint64 key is
42 * produced for each object. This integer is then hashed into an
43 * index in a hashtable, using the mod function with the table size
44 * which is a prime number. Each entry in the hash table is a list
45 * of hash items. In lookup, the appropriate list is traversed,
46 * looking for the object key found earlier.
47 * (3) Hash functions that map points, strings and float64 to uint64
48 * are given in utils1.c.
49 * (4) Use of the hashmap on points, strings and float64 data are
50 * given in ptafunc2.c, sarray2.c and dnafunc1.c.
51 * (5) Useful rule of thumb for hashing collisions:
52 * For a random hashing function (say, from strings to l_uint64),
53 * the probability of a collision increases as N^2 for N much
54 * less than 2^32. The quadratic behavior switches over to
55 * approaching 1.0 around 2^32, which is the square root of 2^64.
56 * So, for example, if you have 10^7 strings, the probability
57 * of a single collision using an l_uint64 key is on the order of
58 * (10^7/4x10^9)^2 ~ 10^-5.
59 * For a million strings, collisions are very rare (~10^-7 probability).
60 * </pre>
61 */
62
63 #ifdef HAVE_CONFIG_H
64 #include <config_auto.h>
65 #endif /* HAVE_CONFIG_H */
66
67 #include "allheaders.h"
68
69 /* Limit on the hashtable size */
70 static const l_uint32 MaxTabsize = 50000000;
71 /* Default values for creating the hashmap. */
72 static const l_int32 DefaultInitNItems = 2000;
73 static const l_int32 DefaultMaxOcc = 2;
74
75 /*--------------------------------------------------------------------------*
76 * Hashmap creation and destruction *
77 *--------------------------------------------------------------------------*/
78 /*!
79 * \brief l_hmapCreate()
80 *
81 * \param[in] ninit initial estimate of the number of items to be stored;
82 * use 0 for default value.
83 * \param[in] maxocc max average occupancy of each list of hashitme;
84 * it should be in range [1 ... 5]; use 0 for default
85 * \return ptr to new hashmap, or NULL on error
86 *
87 * <pre>
88 * Notes:
89 * (1) If the maximum number n of items to be hashed is known in
90 * advance, suggested values are:
91 * %nint = 0.51 * n
92 * %maxocc = 2
93 * With these values, the table will not need to be rehashed,
94 * even if all items have unique keys.
95 * (2) The actual initial size of the hashtab is the first prime
96 * number larger than %ninit/%maxocc.
97 * (3) Each entry into the hashtab points to alist of hash items
98 * (key, val, count).
99 * </pre>
100 */
101 L_HASHMAP *
102 l_hmapCreate(l_int32 ninit,
103 l_int32 maxocc)
104 {
105 l_uint32 size, tabsize;
106 L_HASHMAP *hmap;
107
108 ninit = L_MAX(ninit, DefaultInitNItems);
109 if (maxocc <= 0) maxocc = DefaultMaxOcc;
110 if (maxocc > 5) {
111 L_WARNING("maxocc = %d; non-optimal value. Set to default = %d\n",
112 __func__, maxocc, DefaultMaxOcc);
113 maxocc = DefaultMaxOcc;
114 }
115 size = ninit / maxocc;
116 if (size > MaxTabsize) {
117 L_ERROR("ninit/maxocc = %d > MaxTabsize = %d\n", __func__,
118 size, MaxTabsize);
119 return NULL;
120 }
121
122 hmap = (L_HASHMAP *)LEPT_CALLOC(1, sizeof(L_HASHMAP));
123 findNextLargerPrime(size, &tabsize); /* at least 101 */
124 if ((hmap->hashtab =
125 (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *))) == NULL) {
126 LEPT_FREE(hmap);
127 return (L_HASHMAP *)ERROR_PTR("hashtab not made", __func__, NULL);
128 }
129
130 hmap->nitems = 0;
131 hmap->ntogo = ninit;
132 hmap->maxocc = maxocc;
133 hmap->tabsize = tabsize;
134 return hmap;
135 }
136
137
138 /*!
139 * \brief l_hmapDestroy()
140 *
141 * \param[in,out] phmap to be nulled, if it exists
142 * \return void
143 */
144 void
145 l_hmapDestroy(L_HASHMAP **phmap)
146 {
147 l_int32 i;
148 L_HASHITEM *hitem, *next;
149 L_HASHMAP *hmap;
150
151 if (phmap == NULL) {
152 L_WARNING("ptr address is NULL!\n", __func__);
153 return;
154 }
155
156 if ((hmap = *phmap) == NULL)
157 return;
158
159 for (i = 0; i < hmap->tabsize; i++) {
160 for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
161 next = hitem->next;
162 LEPT_FREE(hitem);
163 }
164 }
165 LEPT_FREE(hmap->hashtab);
166 LEPT_FREE(hmap);
167 *phmap = NULL;
168 }
169
170
171 /*--------------------------------------------------------------------------*
172 * Hashmap: Accessors and modifiers *
173 *--------------------------------------------------------------------------*/
174 /*!
175 * \brief l_hmapLookup()
176 *
177 * \param[in] hmap
178 * \param[in] key to be hashed into an index in the hashtab
179 * \param[in] val to be stored in the hitem if creating it
180 * \param[in] op L_HMAP_CHECK, L_HMAP_CREATE
181 * \return ptr hitem; or null either on error or if not found
182 * with op == L_HMAP_CHECK.
183 *
184 * <pre>
185 * Notes:
186 * (1) This lookup function will also create a new hitem if requested.
187 * (2) The %op parameter does the following:
188 * op == L_HMAP_CHECK: return the hitem, or null if not found
189 * op == L_HMAP_CREATE: if found, increment the count; otherwise, make
190 * and store a new hitem; always return the hitem.
191 * (3) The key is a uint64. It is made by hashing some data in the object.
192 * (4) The value is an index into an array of the objects from which
193 * the hashtable has been constructed.
194 * (5) If an hitem is found, a pointer to it is returned. It is owned
195 * by the hashtable; do not destroy it.
196 * </pre>
197 */
198 L_HASHITEM *
199 l_hmapLookup(L_HASHMAP *hmap,
200 l_uint64 key,
201 l_uint64 val,
202 l_int32 op)
203 {
204 l_uint32 index;
205 L_HASHITEM *hlist, *hitem;
206
207 if (!hmap)
208 return (L_HASHITEM *)ERROR_PTR("hmap not defined", __func__, NULL);
209 if (op != L_HMAP_CHECK && op != L_HMAP_CREATE)
210 return (L_HASHITEM *)ERROR_PTR("invalid op", __func__, NULL);
211
212 /* If found, return a ptr to the hitem (not a copy) */
213 index = key % hmap->tabsize; /* into hashtab */
214 hlist = hmap->hashtab[index]; /* head of the list */
215 for (hitem = hlist; hitem != NULL; hitem = hitem->next) {
216 if (key == hitem->key) {
217 if (op == L_HMAP_CREATE) hitem->count++;
218 return hitem;
219 }
220 }
221 if (op == L_HMAP_CHECK) return NULL;
222
223 /* Not found and %op == L_HMAP_CREATE.
224 * Make a new hitem and add to the head of the list */
225 hitem = (L_HASHITEM *)LEPT_CALLOC(1, sizeof(L_HASHITEM));
226 hitem->key = key;
227 hitem->val = val;
228 hitem->count = 1;
229 hitem->next = hlist;
230 hmap->hashtab[index] = hitem;
231 hmap->nitems++;
232 hmap->ntogo--;
233
234 /* If hmap is full based on average occupancy, rehash */
235 if (hmap->ntogo == 0)
236 l_hmapRehash(hmap);
237
238 return hitem;
239 }
240
241
242 /*!
243 * \brief l_hmapRehash()
244 *
245 * \param[in] hmap
246 * \return 0 if OK; 1 on error
247 *
248 * <pre>
249 * Notes:
250 * (1) This is called when the average occupancy reaches maxocc.
251 * It doubles the size of the hashtab, and reuses the hash items.
252 * </pre>
253 */
254 l_ok
255 l_hmapRehash(L_HASHMAP *hmap)
256 {
257 l_int32 i, index;
258 l_uint32 tabsize;
259 L_HASHITEM *hstore, *hitem, *next;
260
261 if (!hmap)
262 return ERROR_INT("hmap not defined", __func__, 1);
263
264 /* Put hash items in temporary storage in a single list,
265 * successively adding each to the list head. */
266 hstore = NULL; /* ptr to resulting list */
267 for (i = 0; i < hmap->tabsize; i++) {
268 for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
269 next = hitem->next;
270 hitem->next = hstore;
271 hstore = hitem;
272 }
273 }
274
275 /* Destroy the old hashtab and make a new one that is twice as big */
276 LEPT_FREE(hmap->hashtab);
277 findNextLargerPrime(2 * hmap->tabsize, &tabsize);
278 hmap->tabsize = tabsize;
279 hmap->hashtab = (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *));
280 if (hmap->hashtab == NULL) {
281 hmap->tabsize = 0;
282 return ERROR_INT("hashtab ptr array not made", __func__, 1);
283 }
284 hmap->ntogo = hmap->maxocc * tabsize - hmap->nitems;
285
286 /* Populate with the stored hash items */
287 for (hitem = hstore; hitem != NULL; hitem = next) {
288 next = hitem->next;
289 index = hitem->key % tabsize; /* into new hashtab */
290 hitem->next = hmap->hashtab[index]; /* link to head of existing list */
291 hmap->hashtab[index] = hitem; /* put at head */
292 }
293
294 return 0;
295 }