view mupdf-source/thirdparty/leptonica/src/hashmap.h @ 32:72c1b70d4f5c

Also apply -Werror=implicit-function-declaration
author Franz Glasner <fzglas.hg@dom66.de>
date Sun, 21 Sep 2025 15:10:12 +0200
parents b50eed0cc0ef
children
line wrap: on
line source

/*====================================================================*
 -  Copyright (C) 2001 Leptonica.  All rights reserved.
 -
 -  Redistribution and use in source and binary forms, with or without
 -  modification, are permitted provided that the following conditions
 -  are met:
 -  1. Redistributions of source code must retain the above copyright
 -     notice, this list of conditions and the following disclaimer.
 -  2. Redistributions in binary form must reproduce the above
 -     copyright notice, this list of conditions and the following
 -     disclaimer in the documentation and/or other materials
 -     provided with the distribution.
 -
 -  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 -  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 -  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 -  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL ANY
 -  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 -  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 -  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 -  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 -  OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 -  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 -  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *====================================================================*/

#ifndef  LEPTONICA_HASHMAP_H
#define  LEPTONICA_HASHMAP_H

/*
 * \file hashmap.h
 *
 * <pre>
 *  Contains the following structs:
 *      struct L_Hashmap
 *      struct L_Hashitem
 *
 *  Goal:
 *      You have a set of objects (integers, strings, pts, whatever),
 *      and you want to store them in a data structure (L_Hashmap) that allows
 *      O(1) to find, insert and count the occurrences of such an object.
 *      The tool is a hash map.  This is not ordered, unlike the O(log n)
 *      ordered map (L_Amap), which is implemented by an rbtree.
 *
 *  In slightly more detail:
 *      Store the set of objects in an array, which in general can be
 *      held in a pointer array (L_Ptra).  You need a hash function that
 *      will generate a unique uint64 key from each object.  For our simple
 *      built-in arrays, such as float, double and Pta (points), these hash
 *      functions are in utils1.c.  Then for each object in the array,
 *      you store the key and the index to the array of objects (the val)
 *      in a list of hashitems in the hash table, where the specific
 *      list is determined by the key (specifically, the mod of the key
 *      with the size of the hashtable).
 *
 *  In yet more detail:
 *  (1) The design loosely follows the design of a hashmap in "The Practice
 *      of Programming by Brian Kernighan and Rob Pike, Addison Wesley, 1999.
 *  (2) The L_Hashmap contains a hashtable with a prime number of pointers
 *      to lists of hashitems.  The lookup function takes a key and a value,
 *      which are both 64-bit unsigned integers.  The key has been generated
 *      by hashing the input object in a way that avoids collisions between
 *      different objects. The value is an integer that identifies the
 *      object; typically it is the index into an array of objects.
 *      The hashtable size is a prime number, and an index into the table
 *      is made from the key by taking its mod with the hashtable size.
 *      The index points to a list of hashitems, which have all been hashed
 *      by the mod function into the same index in the table.
 *      Because the key is expected to be randomly distributed in uint64,
 *      the table indices should be uniformly distributed, resulting in
 *      approximately the same number of items being placed in each of
 *      these lists.  The list of hashitems is traversed, comparing the
 *      input uint64 key in the lookup() function with the key stored in
 *      each hashitem.  If a hashitem is found with a matching key,
 *      return a pointer to that hashitem.  If not found and the op is
 *      L_HASH_CREATE, make a new hash item, add it to the list, and
 *      return a pointer to it.
 *  (3) The count field in the hashitem gives the number of times the
 *      key has been seen when storing key/value pairs.
 *  (4) The val field is the index into an array of the objects.  When
 *      the hashmap is initially made, it is the index of the first item
 *      seen with its key.
 *  (5) For the hashmap to work efficiently, the lists must not become too
 *      long.  Because in general you do not know the number of objects
 *      in advance, it is important to be able to dynamically resize
 *      the hashtable as it grows.  The hashmap is initialized with
 *      room for some number of hashitems and the maximum average list
 *      size.  These two numbers determine the size of the hashtable,
 *      which is constrained to be a prime number.  As the hashtable grows,
 *      if the average occupancy exceeds the input %maxocc, the hashtable
 *      size is approximately doubled and the existing items are re-hashed
 *      into it, mod the new (prime number) table size.
 * </pre>
 */

/*------------------------------------------------------------------------*
 *                           Hash map structs                             *
 *------------------------------------------------------------------------*/
/*! General hash map */
struct L_Hashmap
{
    l_int32              nitems;    /*!< number of stored items              */
    l_int32              ntogo;     /*!< number of items to be stored        */
                                    /*!< before resizing the hashmap         */
    l_int32              maxocc;    /*!< max average occupancy allowed       */
    struct L_Hashitem  **hashtab;   /*!< array of hash item ptrs             */
    l_int32              tabsize;   /*!< size of array of hash item ptrs     */
};
typedef struct L_Hashmap  L_HASHMAP;

/*! Hash item, containing storage for the key, value and count.  The key
    is a l_uint64, which is hashed by the mod function to find the index
    into the hashtab. */
struct L_Hashitem
{
    l_uint64            key;    /*!< key is hashed into index into hashtab   */
    l_uint64            val;    /*!< number stored associated with the key   */
    l_int32             count;  /*!< number of elements seen with this key   */
    struct L_Hashitem  *next;   /*!< ptr to the next in the list             */
};
typedef struct L_Hashitem  L_HASHITEM;


/*------------------------------------------------------------------------*
 *                            Hashmap flags                               *
 *------------------------------------------------------------------------*/
/*! Hashmap Lookup */
enum {
    L_UNDEFINED = 0,         /*!< invalid operation                         */
    L_HMAP_CHECK = 1,        /*!< check if this key/val has been stored     */
    L_HMAP_CREATE = 2        /*!< create and store a hashitem if not found  */
};

#endif  /* LEPTONICA_HASHMAP_H */