Mercurial > hgrepos > Python2 > PyMuPDF
view mupdf-source/thirdparty/leptonica/src/dnafunc1.c @ 32:72c1b70d4f5c
Also apply -Werror=implicit-function-declaration
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Sun, 21 Sep 2025 15:10:12 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line source
/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file dnafunc1.c * <pre> * * Rearrangements * l_int32 *l_dnaJoin() * l_int32 *l_dnaaFlattenToDna() * L_DNA *l_dnaSelectRange() * * Conversion between numa and dna * NUMA *l_dnaConvertToNuma() * L_DNA *numaConvertToDna() * * Conversion from pix data to dna * L_DNA *pixConvertDataToDna() * * Set operations using aset (rbtree) * L_ASET *l_asetCreateFromDna() * L_DNA *l_dnaRemoveDupsByAset() * L_DNA *l_dnaUnionByAset() * L_DNA *l_dnaIntersectionByAset() * * Hashmap operations * L_HASHMAP *l_hmapCreateFromDna() * l_int32 l_dnaRemoveDupsByHmap() * l_int32 l_dnaUnionByHmap() * l_int32 l_dnaIntersectionByHmap() * l_int32 l_dnaMakeHistoByHmap() * * Miscellaneous operations * L_DNA *l_dnaDiffAdjValues() * * * We have two implementations of set operations on an array of doubles: * * (1) Using an underlying tree (rbtree) * The key for each float64 value is the value itself. * No collisions can occur. The tree is sorted by the keys. * Lookup is done in O(log n) by traversing from the root, * looking for the key. * * (2) Building a hashmap from the keys (hashmap) * The keys are made from each float64 by casting into a uint64. * The key is then hashed into a hashtable. Collisions of hashkeys are * very rare, and the hashtable is designed to allow more than one * hashitem in a table entry. The hashitems are put in a list at * each hashtable entry, which is traversed looking for the key. * </pre> */ #ifdef HAVE_CONFIG_H #include <config_auto.h> #endif /* HAVE_CONFIG_H */ #include "allheaders.h" #include "array_internal.h" /*----------------------------------------------------------------------* * Rearrangements * *----------------------------------------------------------------------*/ /*! * \brief l_dnaJoin() * * \param[in] dad dest dna; add to this one * \param[in] das [optional] source dna; add from this one * \param[in] istart starting index in das * \param[in] iend ending index in das; use -1 to cat all * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) istart < 0 is taken to mean 'read from the start' (istart = 0) * (2) iend < 0 means 'read to the end' * (3) if das == NULL, this is a no-op * </pre> */ l_ok l_dnaJoin(L_DNA *dad, L_DNA *das, l_int32 istart, l_int32 iend) { l_int32 n, i; l_float64 val; if (!dad) return ERROR_INT("dad not defined", __func__, 1); if (!das) return 0; if (istart < 0) istart = 0; n = l_dnaGetCount(das); if (iend < 0 || iend >= n) iend = n - 1; if (istart > iend) return ERROR_INT("istart > iend; nothing to add", __func__, 1); for (i = istart; i <= iend; i++) { l_dnaGetDValue(das, i, &val); if (l_dnaAddNumber(dad, val) == 1) { L_ERROR("failed to add double at i = %d\n", __func__, i); return 1; } } return 0; } /*! * \brief l_dnaaFlattenToDna() * * \param[in] daa * \return dad, or NULL on error * * <pre> * Notes: * (1) This 'flattens' the dnaa to a dna, by joining successively * each dna in the dnaa. * (2) It leaves the input dnaa unchanged. * </pre> */ L_DNA * l_dnaaFlattenToDna(L_DNAA *daa) { l_int32 i, nalloc; L_DNA *da, *dad; L_DNA **array; if (!daa) return (L_DNA *)ERROR_PTR("daa not defined", __func__, NULL); nalloc = daa->nalloc; array = daa->dna; dad = l_dnaCreate(0); for (i = 0; i < nalloc; i++) { da = array[i]; if (!da) continue; l_dnaJoin(dad, da, 0, -1); } return dad; } /*! * \brief l_dnaSelectRange() * * \param[in] das * \param[in] first use 0 to select from the beginning * \param[in] last use -1 to select to the end * \return dad, or NULL on error */ L_DNA * l_dnaSelectRange(L_DNA *das, l_int32 first, l_int32 last) { l_int32 n, i; l_float64 dval; L_DNA *dad; if (!das) return (L_DNA *)ERROR_PTR("das not defined", __func__, NULL); if ((n = l_dnaGetCount(das)) == 0) { L_WARNING("das is empty\n", __func__); return l_dnaCopy(das); } first = L_MAX(0, first); if (last < 0) last = n - 1; if (first >= n) return (L_DNA *)ERROR_PTR("invalid first", __func__, NULL); if (last >= n) { L_WARNING("last = %d is beyond max index = %d; adjusting\n", __func__, last, n - 1); last = n - 1; } if (first > last) return (L_DNA *)ERROR_PTR("first > last", __func__, NULL); dad = l_dnaCreate(last - first + 1); for (i = first; i <= last; i++) { l_dnaGetDValue(das, i, &dval); l_dnaAddNumber(dad, dval); } return dad; } /*----------------------------------------------------------------------* * Conversion between numa and dna * *----------------------------------------------------------------------*/ /*! * \brief l_dnaConvertToNuma() * * \param[in] da * \return na, or NULL on error */ NUMA * l_dnaConvertToNuma(L_DNA *da) { l_int32 i, n; l_float64 val; NUMA *na; if (!da) return (NUMA *)ERROR_PTR("da not defined", __func__, NULL); n = l_dnaGetCount(da); na = numaCreate(n); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &val); numaAddNumber(na, val); } return na; } /*! * \brief numaConvertToDna * * \param[in] na * \return da, or NULL on error */ L_DNA * numaConvertToDna(NUMA *na) { l_int32 i, n; l_float32 val; L_DNA *da; if (!na) return (L_DNA *)ERROR_PTR("na not defined", __func__, NULL); n = numaGetCount(na); da = l_dnaCreate(n); for (i = 0; i < n; i++) { numaGetFValue(na, i, &val); l_dnaAddNumber(da, val); } return da; } /*----------------------------------------------------------------------* * Conversion from pix data to dna * *----------------------------------------------------------------------*/ /*! * \brief pixConvertDataToDna() * * \param[in] pix 32 bpp RGB(A) * \return da, or NULL on error * * <pre> * Notes: * (1) This writes the RGBA pixel values into the dna, in row-major order. * </pre> */ L_DNA * pixConvertDataToDna(PIX *pix) { l_int32 i, j, w, h, wpl; l_uint32 *data, *line; L_DNA *da; if (!pix) return (L_DNA *)ERROR_PTR("pix not defined", __func__, NULL); if (pixGetDepth(pix) != 32) return (L_DNA *)ERROR_PTR("pix not 32 bpp", __func__, NULL); pixGetDimensions(pix, &w, &h, NULL); data = pixGetData(pix); wpl = pixGetWpl(pix); da = l_dnaCreate(w * h); for (i = 0; i < h; i++) { line = data + i * wpl; for (j = 0; j < w; j++) l_dnaAddNumber(da, (l_float64)line[j]); } return da; } /*----------------------------------------------------------------------* * Set operations using aset (rbtree) * *----------------------------------------------------------------------*/ /*! * \brief l_asetCreateFromDna() * * \param[in] da source dna * \return set using the doubles in %da as keys */ L_ASET * l_asetCreateFromDna(L_DNA *da) { l_int32 i, n; l_float64 val; L_ASET *set; RB_TYPE key; if (!da) return (L_ASET *)ERROR_PTR("da not defined", __func__, NULL); set = l_asetCreate(L_FLOAT_TYPE); n = l_dnaGetCount(da); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &val); key.ftype = val; l_asetInsert(set, key); } return set; } /*! * \brief l_dnaRemoveDupsByAset() * * \param[in] das * \param[out] pdad with duplicated removed * \return 0 if OK; 1 on error */ l_ok l_dnaRemoveDupsByAset(L_DNA *das, L_DNA **pdad) { l_int32 i, n; l_float64 val; L_DNA *dad; L_ASET *set; RB_TYPE key; if (!pdad) return ERROR_INT("&dad not defined", __func__, 1); *pdad = NULL; if (!das) return ERROR_INT("das not defined", __func__, 1); set = l_asetCreate(L_FLOAT_TYPE); dad = l_dnaCreate(0); *pdad = dad; n = l_dnaGetCount(das); for (i = 0; i < n; i++) { l_dnaGetDValue(das, i, &val); key.ftype = val; if (!l_asetFind(set, key)) { l_dnaAddNumber(dad, val); l_asetInsert(set, key); } } l_asetDestroy(&set); return 0; } /*! * \brief l_dnaUnionByAset() * * \param[in] da1 * \param[in] da2 * \param[out] pdad union of the two arrays * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) See sarrayUnionByAset() for the approach. * (2) Here, the key in building the sorted tree is the number itself. * (3) Operations using an underlying tree are O(nlogn), which is * typically less efficient than hashing, which is O(n). * </pre> */ l_ok l_dnaUnionByAset(L_DNA *da1, L_DNA *da2, L_DNA **pdad) { L_DNA *da3; if (!pdad) return ERROR_INT("&dad not defined", __func__, 1); if (!da1) return ERROR_INT("da1 not defined", __func__, 1); if (!da2) return ERROR_INT("da2 not defined", __func__, 1); /* Join */ da3 = l_dnaCopy(da1); if (l_dnaJoin(da3, da2, 0, -1) == 1) { l_dnaDestroy(&da3); return ERROR_INT("join failed for da3", __func__, 1); } /* Eliminate duplicates */ l_dnaRemoveDupsByAset(da3, pdad); l_dnaDestroy(&da3); return 0; } /*! * \brief l_dnaIntersectionByAset() * * \param[in] da1 * \param[in] da2 * \param[out] pdad intersection of the two arrays * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) See sarrayIntersection() for the approach. * (2) Here, the key in building the sorted tree is the number itself. * (3) Operations using an underlying tree are O(nlogn), which is * typically less efficient than hashing, which is O(n). * </pre> */ l_ok l_dnaIntersectionByAset(L_DNA *da1, L_DNA *da2, L_DNA **pdad) { l_int32 n1, n2, i, n; l_float64 val; L_ASET *set1, *set2; RB_TYPE key; L_DNA *da_small, *da_big, *dad; if (!pdad) return ERROR_INT("&dad not defined", __func__, 1); *pdad = NULL; if (!da1) return ERROR_INT("&da1 not defined", __func__, 1); if (!da2) return ERROR_INT("&da2 not defined", __func__, 1); /* Put the elements of the largest array into a set */ n1 = l_dnaGetCount(da1); n2 = l_dnaGetCount(da2); da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ set1 = l_asetCreateFromDna(da_big); /* Build up the intersection of doubles */ dad = l_dnaCreate(0); *pdad = dad; n = l_dnaGetCount(da_small); set2 = l_asetCreate(L_FLOAT_TYPE); for (i = 0; i < n; i++) { l_dnaGetDValue(da_small, i, &val); key.ftype = val; if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { l_dnaAddNumber(dad, val); l_asetInsert(set2, key); } } l_asetDestroy(&set1); l_asetDestroy(&set2); return 0; } /*--------------------------------------------------------------------------* * Hashmap operations * *--------------------------------------------------------------------------*/ /*! * \brief l_hmapCreateFromDna() * * \param[in] da input dna * \return hmap hashmap, or NULL on error * * <pre> * Notes: * (1) Derive the hash keys from the values in %da. * (2) The indices into %da are stored in the val field of the hashitems. * This is necessary so that %hmap and %da can be used together. * </pre> */ L_HASHMAP * l_hmapCreateFromDna(L_DNA *da) { l_int32 i, n; l_uint64 key; l_float64 dval; L_HASHMAP *hmap; if (!da) return (L_HASHMAP *)ERROR_PTR("da not defined", __func__, NULL); n = l_dnaGetCount(da); hmap = l_hmapCreate(0, 0); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &dval); l_hashFloat64ToUint64(dval, &key); l_hmapLookup(hmap, key, i, L_HMAP_CREATE); } return hmap; } /*! * \brief l_dnaRemoveDupsByHmap() * * \param[in] das * \param[out] pdad hash set of unique values * \param[out] phmap [optional] hashmap used for lookup * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Generates the set of (unique) values from %das. * (2) The values in the hashitems are indices into %das. * </pre> */ l_ok l_dnaRemoveDupsByHmap(L_DNA *das, L_DNA **pdad, L_HASHMAP **phmap) { l_int32 i, tabsize; l_float64 dval; L_DNA *dad; L_HASHITEM *hitem; L_HASHMAP *hmap; if (phmap) *phmap = NULL; if (!pdad) return ERROR_INT("&dad not defined", __func__, 1); *pdad = NULL; if (!das) return ERROR_INT("das not defined", __func__, 1); /* Traverse the hashtable lists */ if ((hmap = l_hmapCreateFromDna(das)) == NULL) return ERROR_INT("hmap not made", __func__, 1); dad = l_dnaCreate(0); *pdad = dad; tabsize = hmap->tabsize; for (i = 0; i < tabsize; i++) { hitem = hmap->hashtab[i]; while (hitem) { l_dnaGetDValue(das, hitem->val, &dval); l_dnaAddNumber(dad, dval); hitem = hitem->next; } } if (phmap) *phmap = hmap; else l_hmapDestroy(&hmap); return 0; } /*! * \brief l_dnaUnionByHmap() * * \param[in] da1 * \param[in] da2 * \param[out] pdad union of the array values * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Make dna with numbers found in either of the input arrays. * </pre> */ l_ok l_dnaUnionByHmap(L_DNA *da1, L_DNA *da2, L_DNA **pdad) { L_DNA *da3; if (!pdad) return ERROR_INT("&dad not defined", __func__, 1); *pdad = NULL; if (!da1) return ERROR_INT("da1 not defined", __func__, 1); if (!da2) return ERROR_INT("da2 not defined", __func__, 1); da3 = l_dnaCopy(da1); if (l_dnaJoin(da3, da2, 0, -1) == 1) { l_dnaDestroy(&da3); return ERROR_INT("da3 join failed", __func__, 1); } l_dnaRemoveDupsByHmap(da3, pdad, NULL); l_dnaDestroy(&da3); return 0; } /*! * \brief l_dnaIntersectionByHmap() * * \param[in] da1 * \param[in] da2 * \param[out] pdad intersection of the array values * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Make dna with numbers common to both input arrays. * (2) Use the values in the dna as the hash keys. * </pre> */ l_ok l_dnaIntersectionByHmap(L_DNA *da1, L_DNA *da2, L_DNA **pdad) { l_int32 i, n1, n2, n; l_uint64 key; l_float64 dval; L_DNA *da_small, *da_big, *dad; L_HASHITEM *hitem; L_HASHMAP *hmap; if (!pdad) return ERROR_INT("&dad not defined", __func__, 1); *pdad = NULL; if (!da1) return ERROR_INT("da1 not defined", __func__, 1); if (!da2) return ERROR_INT("da2 not defined", __func__, 1); /* Make a hashmap for the elements of the biggest array */ n1 = l_dnaGetCount(da1); n2 = l_dnaGetCount(da2); da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ if ((hmap = l_hmapCreateFromDna(da_big)) == NULL) return ERROR_INT("hmap not made", __func__, 1); /* Go through the smallest array, doing a lookup of its dval into * the big array hashmap. If an hitem is returned, check the count. * If the count is 0, ignore; otherwise, add the dval to the * output dad and set the count in the hitem to 0, indicating * that the dval has already been added. */ dad = l_dnaCreate(0); *pdad = dad; n = l_dnaGetCount(da_small); for (i = 0; i < n; i++) { l_dnaGetDValue(da_small, i, &dval); l_hashFloat64ToUint64(dval, &key); hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK); if (!hitem || hitem->count == 0) continue; l_dnaAddNumber(dad, dval); hitem->count = 0; } l_hmapDestroy(&hmap); return 0; } /*! * \brief l_dnaMakeHistoByHmap() * * \param[in] das * \param[out] pdav array (set) of unique values * \param[out] pdac array of counts, aligned with the array of values * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Generates a histogram represented by two aligned arrays: * value and count. * </pre> */ l_ok l_dnaMakeHistoByHmap(L_DNA *das, L_DNA **pdav, L_DNA **pdac) { l_int32 i, tabsize; l_float64 dval; L_DNA *dac, *dav; L_HASHITEM *hitem; L_HASHMAP *hmap; if (pdav) *pdav = NULL; if (pdac) *pdac = NULL; if (!das) return ERROR_INT("das not defined", __func__, 1); if (!pdav) return ERROR_INT("&dav not defined", __func__, 1); if (!pdac) return ERROR_INT("&dac not defined", __func__, 1); /* Traverse the hashtable lists */ if ((hmap = l_hmapCreateFromDna(das)) == NULL) return ERROR_INT("hmap not made", __func__, 1); dav = l_dnaCreate(0); *pdav = dav; dac = l_dnaCreate(0); *pdac = dac; tabsize = hmap->tabsize; for (i = 0; i < tabsize; i++) { hitem = hmap->hashtab[i]; while (hitem) { l_dnaGetDValue(das, hitem->val, &dval); l_dnaAddNumber(dav, dval); l_dnaAddNumber(dac, hitem->count); hitem = hitem->next; } } l_hmapDestroy(&hmap); return 0; } /*----------------------------------------------------------------------* * Miscellaneous operations * *----------------------------------------------------------------------*/ /*! * \brief l_dnaDiffAdjValues() * * \param[in] das input l_dna * \return dad of difference values val[i+1] - val[i], * or NULL on error */ L_DNA * l_dnaDiffAdjValues(L_DNA *das) { l_int32 i, n, prev, cur; L_DNA *dad; if (!das) return (L_DNA *)ERROR_PTR("das not defined", __func__, NULL); n = l_dnaGetCount(das); dad = l_dnaCreate(n - 1); prev = 0; for (i = 1; i < n; i++) { l_dnaGetIValue(das, i, &cur); l_dnaAddNumber(dad, cur - prev); prev = cur; } return dad; }
