diff mupdf-source/thirdparty/leptonica/src/hashmap.h @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/leptonica/src/hashmap.h	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,134 @@
+/*====================================================================*
+ -  Copyright (C) 2001 Leptonica.  All rights reserved.
+ -
+ -  Redistribution and use in source and binary forms, with or without
+ -  modification, are permitted provided that the following conditions
+ -  are met:
+ -  1. Redistributions of source code must retain the above copyright
+ -     notice, this list of conditions and the following disclaimer.
+ -  2. Redistributions in binary form must reproduce the above
+ -     copyright notice, this list of conditions and the following
+ -     disclaimer in the documentation and/or other materials
+ -     provided with the distribution.
+ -
+ -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
+ -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+#ifndef  LEPTONICA_HASHMAP_H
+#define  LEPTONICA_HASHMAP_H
+
+/*
+ * \file hashmap.h
+ *
+ * <pre>
+ *  Contains the following structs:
+ *      struct L_Hashmap
+ *      struct L_Hashitem
+ *
+ *  Goal:
+ *      You have a set of objects (integers, strings, pts, whatever),
+ *      and you want to store them in a data structure (L_Hashmap) that allows
+ *      O(1) to find, insert and count the occurrences of such an object.
+ *      The tool is a hash map.  This is not ordered, unlike the O(log n)
+ *      ordered map (L_Amap), which is implemented by an rbtree.
+ *
+ *  In slightly more detail:
+ *      Store the set of objects in an array, which in general can be
+ *      held in a pointer array (L_Ptra).  You need a hash function that
+ *      will generate a unique uint64 key from each object.  For our simple
+ *      built-in arrays, such as float, double and Pta (points), these hash
+ *      functions are in utils1.c.  Then for each object in the array,
+ *      you store the key and the index to the array of objects (the val)
+ *      in a list of hashitems in the hash table, where the specific
+ *      list is determined by the key (specifically, the mod of the key
+ *      with the size of the hashtable).
+ *
+ *  In yet more detail:
+ *  (1) The design loosely follows the design of a hashmap in "The Practice
+ *      of Programming by Brian Kernighan and Rob Pike, Addison Wesley, 1999.
+ *  (2) The L_Hashmap contains a hashtable with a prime number of pointers
+ *      to lists of hashitems.  The lookup function takes a key and a value,
+ *      which are both 64-bit unsigned integers.  The key has been generated
+ *      by hashing the input object in a way that avoids collisions between
+ *      different objects. The value is an integer that identifies the
+ *      object; typically it is the index into an array of objects.
+ *      The hashtable size is a prime number, and an index into the table
+ *      is made from the key by taking its mod with the hashtable size.
+ *      The index points to a list of hashitems, which have all been hashed
+ *      by the mod function into the same index in the table.
+ *      Because the key is expected to be randomly distributed in uint64,
+ *      the table indices should be uniformly distributed, resulting in
+ *      approximately the same number of items being placed in each of
+ *      these lists.  The list of hashitems is traversed, comparing the
+ *      input uint64 key in the lookup() function with the key stored in
+ *      each hashitem.  If a hashitem is found with a matching key,
+ *      return a pointer to that hashitem.  If not found and the op is
+ *      L_HASH_CREATE, make a new hash item, add it to the list, and
+ *      return a pointer to it.
+ *  (3) The count field in the hashitem gives the number of times the
+ *      key has been seen when storing key/value pairs.
+ *  (4) The val field is the index into an array of the objects.  When
+ *      the hashmap is initially made, it is the index of the first item
+ *      seen with its key.
+ *  (5) For the hashmap to work efficiently, the lists must not become too
+ *      long.  Because in general you do not know the number of objects
+ *      in advance, it is important to be able to dynamically resize
+ *      the hashtable as it grows.  The hashmap is initialized with
+ *      room for some number of hashitems and the maximum average list
+ *      size.  These two numbers determine the size of the hashtable,
+ *      which is constrained to be a prime number.  As the hashtable grows,
+ *      if the average occupancy exceeds the input %maxocc, the hashtable
+ *      size is approximately doubled and the existing items are re-hashed
+ *      into it, mod the new (prime number) table size.
+ * </pre>
+ */
+
+/*------------------------------------------------------------------------*
+ *                           Hash map structs                             *
+ *------------------------------------------------------------------------*/
+/*! General hash map */
+struct L_Hashmap
+{
+    l_int32              nitems;    /*!< number of stored items              */
+    l_int32              ntogo;     /*!< number of items to be stored        */
+                                    /*!< before resizing the hashmap         */
+    l_int32              maxocc;    /*!< max average occupancy allowed       */
+    struct L_Hashitem  **hashtab;   /*!< array of hash item ptrs             */
+    l_int32              tabsize;   /*!< size of array of hash item ptrs     */
+};
+typedef struct L_Hashmap  L_HASHMAP;
+
+/*! Hash item, containing storage for the key, value and count.  The key
+    is a l_uint64, which is hashed by the mod function to find the index
+    into the hashtab. */
+struct L_Hashitem
+{
+    l_uint64            key;    /*!< key is hashed into index into hashtab   */
+    l_uint64            val;    /*!< number stored associated with the key   */
+    l_int32             count;  /*!< number of elements seen with this key   */
+    struct L_Hashitem  *next;   /*!< ptr to the next in the list             */
+};
+typedef struct L_Hashitem  L_HASHITEM;
+
+
+/*------------------------------------------------------------------------*
+ *                            Hashmap flags                               *
+ *------------------------------------------------------------------------*/
+/*! Hashmap Lookup */
+enum {
+    L_UNDEFINED = 0,         /*!< invalid operation                         */
+    L_HMAP_CHECK = 1,        /*!< check if this key/val has been stored     */
+    L_HMAP_CREATE = 2        /*!< create and store a hashitem if not found  */
+};
+
+#endif  /* LEPTONICA_HASHMAP_H */