Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/source/fitz/hash.c @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/source/fitz/hash.c Mon Sep 15 11:44:09 2025 +0200 @@ -0,0 +1,329 @@ +// Copyright (C) 2004-2021 Artifex Software, Inc. +// +// This file is part of MuPDF. +// +// MuPDF is free software: you can redistribute it and/or modify it under the +// terms of the GNU Affero General Public License as published by the Free +// Software Foundation, either version 3 of the License, or (at your option) +// any later version. +// +// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more +// details. +// +// You should have received a copy of the GNU Affero General Public License +// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> +// +// Alternative licensing terms are available from the licensor. +// For commercial licensing, see <https://www.artifex.com/> or contact +// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, +// CA 94129, USA, for further information. + +#include "mupdf/fitz.h" + +#include <string.h> +#include <assert.h> + +/* + Simple hashtable with open addressing linear probe. + Unlike text book examples, removing entries works + correctly in this implementation, so it won't start + exhibiting bad behaviour if entries are inserted + and removed frequently. +*/ + +typedef struct +{ + unsigned char key[FZ_HASH_TABLE_KEY_LENGTH]; + void *val; +} fz_hash_entry; + +struct fz_hash_table +{ + int keylen; + int size; + int load; + int lock; /* -1 or the lock used to protect this hash table */ + fz_hash_table_drop_fn *drop_val; + fz_hash_entry *ents; +}; + +static unsigned hash(const unsigned char *s, int len) +{ + unsigned val = 0; + int i; + for (i = 0; i < len; i++) + { + val += s[i]; + val += (val << 10); + val ^= (val >> 6); + } + val += (val << 3); + val ^= (val >> 11); + val += (val << 15); + return val; +} + +fz_hash_table * +fz_new_hash_table(fz_context *ctx, int initialsize, int keylen, int lock, fz_hash_table_drop_fn *drop_val) +{ + fz_hash_table *table; + + if (keylen > FZ_HASH_TABLE_KEY_LENGTH) + fz_throw(ctx, FZ_ERROR_ARGUMENT, "hash table key length too large"); + + table = fz_malloc_struct(ctx, fz_hash_table); + table->keylen = keylen; + table->size = initialsize; + table->load = 0; + table->lock = lock; + table->drop_val = drop_val; + fz_try(ctx) + { + table->ents = Memento_label(fz_malloc_array(ctx, table->size, fz_hash_entry), "hash_entries"); + memset(table->ents, 0, sizeof(fz_hash_entry) * table->size); + } + fz_catch(ctx) + { + fz_free(ctx, table); + fz_rethrow(ctx); + } + + return table; +} + +void +fz_drop_hash_table(fz_context *ctx, fz_hash_table *table) +{ + if (!table) + return; + + if (table->drop_val) + { + int i, n = table->size; + for (i = 0; i < n; ++i) + { + void *v = table->ents[i].val; + if (v) + table->drop_val(ctx, v); + } + } + + fz_free(ctx, table->ents); + fz_free(ctx, table); +} + +static void * +do_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val) +{ + fz_hash_entry *ents; + unsigned size; + unsigned pos; + + ents = table->ents; + size = table->size; + pos = hash(key, table->keylen) % size; + + if (table->lock >= 0) + fz_assert_lock_held(ctx, table->lock); + + while (1) + { + if (!ents[pos].val) + { + memcpy(ents[pos].key, key, table->keylen); + ents[pos].val = val; + table->load ++; + return NULL; + } + + if (memcmp(key, ents[pos].key, table->keylen) == 0) + { + /* This is legal, but should rarely happen. */ + return ents[pos].val; + } + + pos = (pos + 1) % size; + } +} + +/* Entered with the lock taken, held throughout and at exit, UNLESS the lock + * is the alloc lock in which case it may be momentarily dropped. */ +static void +fz_resize_hash(fz_context *ctx, fz_hash_table *table, int newsize) +{ + fz_hash_entry *oldents = table->ents; + fz_hash_entry *newents; + int oldsize = table->size; + int oldload = table->load; + int i; + + if (newsize < oldload * 8 / 10) + { + fz_warn(ctx, "assert: resize hash too small"); + return; + } + + if (table->lock == FZ_LOCK_ALLOC) + fz_unlock(ctx, table->lock); + newents = Memento_label(fz_malloc_no_throw(ctx, newsize * sizeof (fz_hash_entry)), "hash_entries"); + if (table->lock == FZ_LOCK_ALLOC) + fz_lock(ctx, table->lock); + if (table->lock >= 0) + { + if (table->size >= newsize) + { + /* Someone else fixed it before we could lock! */ + if (table->lock == FZ_LOCK_ALLOC) + fz_unlock(ctx, table->lock); + fz_free(ctx, newents); + if (table->lock == FZ_LOCK_ALLOC) + fz_lock(ctx, table->lock); + return; + } + } + if (newents == NULL) + fz_throw(ctx, FZ_ERROR_SYSTEM, "hash table resize failed; out of memory (%d entries)", newsize); + table->ents = newents; + memset(table->ents, 0, sizeof(fz_hash_entry) * newsize); + table->size = newsize; + table->load = 0; + + for (i = 0; i < oldsize; i++) + { + if (oldents[i].val) + { + do_hash_insert(ctx, table, oldents[i].key, oldents[i].val); + } + } + + if (table->lock == FZ_LOCK_ALLOC) + fz_unlock(ctx, table->lock); + fz_free(ctx, oldents); + if (table->lock == FZ_LOCK_ALLOC) + fz_lock(ctx, table->lock); +} + +void * +fz_hash_find(fz_context *ctx, fz_hash_table *table, const void *key) +{ + fz_hash_entry *ents = table->ents; + unsigned size = table->size; + unsigned pos = hash(key, table->keylen) % size; + + if (table->lock >= 0) + fz_assert_lock_held(ctx, table->lock); + + while (1) + { + if (!ents[pos].val) + return NULL; + + if (memcmp(key, ents[pos].key, table->keylen) == 0) + return ents[pos].val; + + pos = (pos + 1) % size; + } +} + +void * +fz_hash_insert(fz_context *ctx, fz_hash_table *table, const void *key, void *val) +{ + if (table->load > table->size * 8 / 10) + fz_resize_hash(ctx, table, table->size * 2); + return do_hash_insert(ctx, table, key, val); +} + +static void +do_removal(fz_context *ctx, fz_hash_table *table, unsigned hole) +{ + fz_hash_entry *ents = table->ents; + unsigned size = table->size; + unsigned look, code; + + if (table->lock >= 0) + fz_assert_lock_held(ctx, table->lock); + + ents[hole].val = NULL; + + look = hole + 1; + if (look == size) + look = 0; + + while (ents[look].val) + { + code = hash(ents[look].key, table->keylen) % size; + if ((code <= hole && hole < look) || + (look < code && code <= hole) || + (hole < look && look < code)) + { + ents[hole] = ents[look]; + ents[look].val = NULL; + hole = look; + } + + look++; + if (look == size) + look = 0; + } + + table->load --; +} + +void +fz_hash_remove(fz_context *ctx, fz_hash_table *table, const void *key) +{ + fz_hash_entry *ents = table->ents; + unsigned size = table->size; + unsigned pos = hash(key, table->keylen) % size; + + if (table->lock >= 0) + fz_assert_lock_held(ctx, table->lock); + + while (1) + { + if (!ents[pos].val) + { + fz_warn(ctx, "assert: remove non-existent hash entry"); + return; + } + + if (memcmp(key, ents[pos].key, table->keylen) == 0) + { + do_removal(ctx, table, pos); + return; + } + + pos++; + if (pos == size) + pos = 0; + } +} + +void +fz_hash_for_each(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_for_each_fn *callback) +{ + int i; + for (i = 0; i < table->size; ++i) + if (table->ents[i].val) + callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val); +} + +void +fz_hash_filter(fz_context *ctx, fz_hash_table *table, void *state, fz_hash_table_filter_fn *callback) +{ + int i; +restart: + for (i = 0; i < table->size; ++i) + { + if (table->ents[i].val) + { + if (callback(ctx, state, table->ents[i].key, table->keylen, table->ents[i].val)) + { + do_removal(ctx, table, i); + goto restart; /* we may have moved some slots around, so just restart the scan */ + } + } + } +}
