Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/dnafunc1.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file dnafunc1.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Rearrangements | |
| 32 * l_int32 *l_dnaJoin() | |
| 33 * l_int32 *l_dnaaFlattenToDna() | |
| 34 * L_DNA *l_dnaSelectRange() | |
| 35 * | |
| 36 * Conversion between numa and dna | |
| 37 * NUMA *l_dnaConvertToNuma() | |
| 38 * L_DNA *numaConvertToDna() | |
| 39 * | |
| 40 * Conversion from pix data to dna | |
| 41 * L_DNA *pixConvertDataToDna() | |
| 42 * | |
| 43 * Set operations using aset (rbtree) | |
| 44 * L_ASET *l_asetCreateFromDna() | |
| 45 * L_DNA *l_dnaRemoveDupsByAset() | |
| 46 * L_DNA *l_dnaUnionByAset() | |
| 47 * L_DNA *l_dnaIntersectionByAset() | |
| 48 * | |
| 49 * Hashmap operations | |
| 50 * L_HASHMAP *l_hmapCreateFromDna() | |
| 51 * l_int32 l_dnaRemoveDupsByHmap() | |
| 52 * l_int32 l_dnaUnionByHmap() | |
| 53 * l_int32 l_dnaIntersectionByHmap() | |
| 54 * l_int32 l_dnaMakeHistoByHmap() | |
| 55 * | |
| 56 * Miscellaneous operations | |
| 57 * L_DNA *l_dnaDiffAdjValues() | |
| 58 * | |
| 59 * | |
| 60 * We have two implementations of set operations on an array of doubles: | |
| 61 * | |
| 62 * (1) Using an underlying tree (rbtree) | |
| 63 * The key for each float64 value is the value itself. | |
| 64 * No collisions can occur. The tree is sorted by the keys. | |
| 65 * Lookup is done in O(log n) by traversing from the root, | |
| 66 * looking for the key. | |
| 67 * | |
| 68 * (2) Building a hashmap from the keys (hashmap) | |
| 69 * The keys are made from each float64 by casting into a uint64. | |
| 70 * The key is then hashed into a hashtable. Collisions of hashkeys are | |
| 71 * very rare, and the hashtable is designed to allow more than one | |
| 72 * hashitem in a table entry. The hashitems are put in a list at | |
| 73 * each hashtable entry, which is traversed looking for the key. | |
| 74 * </pre> | |
| 75 */ | |
| 76 | |
| 77 #ifdef HAVE_CONFIG_H | |
| 78 #include <config_auto.h> | |
| 79 #endif /* HAVE_CONFIG_H */ | |
| 80 | |
| 81 #include "allheaders.h" | |
| 82 #include "array_internal.h" | |
| 83 | |
| 84 | |
| 85 /*----------------------------------------------------------------------* | |
| 86 * Rearrangements * | |
| 87 *----------------------------------------------------------------------*/ | |
| 88 /*! | |
| 89 * \brief l_dnaJoin() | |
| 90 * | |
| 91 * \param[in] dad dest dna; add to this one | |
| 92 * \param[in] das [optional] source dna; add from this one | |
| 93 * \param[in] istart starting index in das | |
| 94 * \param[in] iend ending index in das; use -1 to cat all | |
| 95 * \return 0 if OK, 1 on error | |
| 96 * | |
| 97 * <pre> | |
| 98 * Notes: | |
| 99 * (1) istart < 0 is taken to mean 'read from the start' (istart = 0) | |
| 100 * (2) iend < 0 means 'read to the end' | |
| 101 * (3) if das == NULL, this is a no-op | |
| 102 * </pre> | |
| 103 */ | |
| 104 l_ok | |
| 105 l_dnaJoin(L_DNA *dad, | |
| 106 L_DNA *das, | |
| 107 l_int32 istart, | |
| 108 l_int32 iend) | |
| 109 { | |
| 110 l_int32 n, i; | |
| 111 l_float64 val; | |
| 112 | |
| 113 if (!dad) | |
| 114 return ERROR_INT("dad not defined", __func__, 1); | |
| 115 if (!das) | |
| 116 return 0; | |
| 117 | |
| 118 if (istart < 0) | |
| 119 istart = 0; | |
| 120 n = l_dnaGetCount(das); | |
| 121 if (iend < 0 || iend >= n) | |
| 122 iend = n - 1; | |
| 123 if (istart > iend) | |
| 124 return ERROR_INT("istart > iend; nothing to add", __func__, 1); | |
| 125 | |
| 126 for (i = istart; i <= iend; i++) { | |
| 127 l_dnaGetDValue(das, i, &val); | |
| 128 if (l_dnaAddNumber(dad, val) == 1) { | |
| 129 L_ERROR("failed to add double at i = %d\n", __func__, i); | |
| 130 return 1; | |
| 131 } | |
| 132 | |
| 133 } | |
| 134 return 0; | |
| 135 } | |
| 136 | |
| 137 | |
| 138 /*! | |
| 139 * \brief l_dnaaFlattenToDna() | |
| 140 * | |
| 141 * \param[in] daa | |
| 142 * \return dad, or NULL on error | |
| 143 * | |
| 144 * <pre> | |
| 145 * Notes: | |
| 146 * (1) This 'flattens' the dnaa to a dna, by joining successively | |
| 147 * each dna in the dnaa. | |
| 148 * (2) It leaves the input dnaa unchanged. | |
| 149 * </pre> | |
| 150 */ | |
| 151 L_DNA * | |
| 152 l_dnaaFlattenToDna(L_DNAA *daa) | |
| 153 { | |
| 154 l_int32 i, nalloc; | |
| 155 L_DNA *da, *dad; | |
| 156 L_DNA **array; | |
| 157 | |
| 158 if (!daa) | |
| 159 return (L_DNA *)ERROR_PTR("daa not defined", __func__, NULL); | |
| 160 | |
| 161 nalloc = daa->nalloc; | |
| 162 array = daa->dna; | |
| 163 dad = l_dnaCreate(0); | |
| 164 for (i = 0; i < nalloc; i++) { | |
| 165 da = array[i]; | |
| 166 if (!da) continue; | |
| 167 l_dnaJoin(dad, da, 0, -1); | |
| 168 } | |
| 169 | |
| 170 return dad; | |
| 171 } | |
| 172 | |
| 173 | |
| 174 /*! | |
| 175 * \brief l_dnaSelectRange() | |
| 176 * | |
| 177 * \param[in] das | |
| 178 * \param[in] first use 0 to select from the beginning | |
| 179 * \param[in] last use -1 to select to the end | |
| 180 * \return dad, or NULL on error | |
| 181 */ | |
| 182 L_DNA * | |
| 183 l_dnaSelectRange(L_DNA *das, | |
| 184 l_int32 first, | |
| 185 l_int32 last) | |
| 186 { | |
| 187 l_int32 n, i; | |
| 188 l_float64 dval; | |
| 189 L_DNA *dad; | |
| 190 | |
| 191 if (!das) | |
| 192 return (L_DNA *)ERROR_PTR("das not defined", __func__, NULL); | |
| 193 if ((n = l_dnaGetCount(das)) == 0) { | |
| 194 L_WARNING("das is empty\n", __func__); | |
| 195 return l_dnaCopy(das); | |
| 196 } | |
| 197 first = L_MAX(0, first); | |
| 198 if (last < 0) last = n - 1; | |
| 199 if (first >= n) | |
| 200 return (L_DNA *)ERROR_PTR("invalid first", __func__, NULL); | |
| 201 if (last >= n) { | |
| 202 L_WARNING("last = %d is beyond max index = %d; adjusting\n", | |
| 203 __func__, last, n - 1); | |
| 204 last = n - 1; | |
| 205 } | |
| 206 if (first > last) | |
| 207 return (L_DNA *)ERROR_PTR("first > last", __func__, NULL); | |
| 208 | |
| 209 dad = l_dnaCreate(last - first + 1); | |
| 210 for (i = first; i <= last; i++) { | |
| 211 l_dnaGetDValue(das, i, &dval); | |
| 212 l_dnaAddNumber(dad, dval); | |
| 213 } | |
| 214 return dad; | |
| 215 } | |
| 216 | |
| 217 | |
| 218 /*----------------------------------------------------------------------* | |
| 219 * Conversion between numa and dna * | |
| 220 *----------------------------------------------------------------------*/ | |
| 221 /*! | |
| 222 * \brief l_dnaConvertToNuma() | |
| 223 * | |
| 224 * \param[in] da | |
| 225 * \return na, or NULL on error | |
| 226 */ | |
| 227 NUMA * | |
| 228 l_dnaConvertToNuma(L_DNA *da) | |
| 229 { | |
| 230 l_int32 i, n; | |
| 231 l_float64 val; | |
| 232 NUMA *na; | |
| 233 | |
| 234 if (!da) | |
| 235 return (NUMA *)ERROR_PTR("da not defined", __func__, NULL); | |
| 236 | |
| 237 n = l_dnaGetCount(da); | |
| 238 na = numaCreate(n); | |
| 239 for (i = 0; i < n; i++) { | |
| 240 l_dnaGetDValue(da, i, &val); | |
| 241 numaAddNumber(na, val); | |
| 242 } | |
| 243 return na; | |
| 244 } | |
| 245 | |
| 246 | |
| 247 /*! | |
| 248 * \brief numaConvertToDna | |
| 249 * | |
| 250 * \param[in] na | |
| 251 * \return da, or NULL on error | |
| 252 */ | |
| 253 L_DNA * | |
| 254 numaConvertToDna(NUMA *na) | |
| 255 { | |
| 256 l_int32 i, n; | |
| 257 l_float32 val; | |
| 258 L_DNA *da; | |
| 259 | |
| 260 if (!na) | |
| 261 return (L_DNA *)ERROR_PTR("na not defined", __func__, NULL); | |
| 262 | |
| 263 n = numaGetCount(na); | |
| 264 da = l_dnaCreate(n); | |
| 265 for (i = 0; i < n; i++) { | |
| 266 numaGetFValue(na, i, &val); | |
| 267 l_dnaAddNumber(da, val); | |
| 268 } | |
| 269 return da; | |
| 270 } | |
| 271 | |
| 272 | |
| 273 /*----------------------------------------------------------------------* | |
| 274 * Conversion from pix data to dna * | |
| 275 *----------------------------------------------------------------------*/ | |
| 276 /*! | |
| 277 * \brief pixConvertDataToDna() | |
| 278 * | |
| 279 * \param[in] pix 32 bpp RGB(A) | |
| 280 * \return da, or NULL on error | |
| 281 * | |
| 282 * <pre> | |
| 283 * Notes: | |
| 284 * (1) This writes the RGBA pixel values into the dna, in row-major order. | |
| 285 * </pre> | |
| 286 */ | |
| 287 L_DNA * | |
| 288 pixConvertDataToDna(PIX *pix) | |
| 289 { | |
| 290 l_int32 i, j, w, h, wpl; | |
| 291 l_uint32 *data, *line; | |
| 292 L_DNA *da; | |
| 293 | |
| 294 if (!pix) | |
| 295 return (L_DNA *)ERROR_PTR("pix not defined", __func__, NULL); | |
| 296 if (pixGetDepth(pix) != 32) | |
| 297 return (L_DNA *)ERROR_PTR("pix not 32 bpp", __func__, NULL); | |
| 298 | |
| 299 pixGetDimensions(pix, &w, &h, NULL); | |
| 300 data = pixGetData(pix); | |
| 301 wpl = pixGetWpl(pix); | |
| 302 da = l_dnaCreate(w * h); | |
| 303 for (i = 0; i < h; i++) { | |
| 304 line = data + i * wpl; | |
| 305 for (j = 0; j < w; j++) | |
| 306 l_dnaAddNumber(da, (l_float64)line[j]); | |
| 307 } | |
| 308 return da; | |
| 309 } | |
| 310 | |
| 311 | |
| 312 /*----------------------------------------------------------------------* | |
| 313 * Set operations using aset (rbtree) * | |
| 314 *----------------------------------------------------------------------*/ | |
| 315 /*! | |
| 316 * \brief l_asetCreateFromDna() | |
| 317 * | |
| 318 * \param[in] da source dna | |
| 319 * \return set using the doubles in %da as keys | |
| 320 */ | |
| 321 L_ASET * | |
| 322 l_asetCreateFromDna(L_DNA *da) | |
| 323 { | |
| 324 l_int32 i, n; | |
| 325 l_float64 val; | |
| 326 L_ASET *set; | |
| 327 RB_TYPE key; | |
| 328 | |
| 329 if (!da) | |
| 330 return (L_ASET *)ERROR_PTR("da not defined", __func__, NULL); | |
| 331 | |
| 332 set = l_asetCreate(L_FLOAT_TYPE); | |
| 333 n = l_dnaGetCount(da); | |
| 334 for (i = 0; i < n; i++) { | |
| 335 l_dnaGetDValue(da, i, &val); | |
| 336 key.ftype = val; | |
| 337 l_asetInsert(set, key); | |
| 338 } | |
| 339 | |
| 340 return set; | |
| 341 } | |
| 342 | |
| 343 | |
| 344 /*! | |
| 345 * \brief l_dnaRemoveDupsByAset() | |
| 346 * | |
| 347 * \param[in] das | |
| 348 * \param[out] pdad with duplicated removed | |
| 349 * \return 0 if OK; 1 on error | |
| 350 */ | |
| 351 l_ok | |
| 352 l_dnaRemoveDupsByAset(L_DNA *das, | |
| 353 L_DNA **pdad) | |
| 354 { | |
| 355 l_int32 i, n; | |
| 356 l_float64 val; | |
| 357 L_DNA *dad; | |
| 358 L_ASET *set; | |
| 359 RB_TYPE key; | |
| 360 | |
| 361 if (!pdad) | |
| 362 return ERROR_INT("&dad not defined", __func__, 1); | |
| 363 *pdad = NULL; | |
| 364 if (!das) | |
| 365 return ERROR_INT("das not defined", __func__, 1); | |
| 366 | |
| 367 set = l_asetCreate(L_FLOAT_TYPE); | |
| 368 dad = l_dnaCreate(0); | |
| 369 *pdad = dad; | |
| 370 n = l_dnaGetCount(das); | |
| 371 for (i = 0; i < n; i++) { | |
| 372 l_dnaGetDValue(das, i, &val); | |
| 373 key.ftype = val; | |
| 374 if (!l_asetFind(set, key)) { | |
| 375 l_dnaAddNumber(dad, val); | |
| 376 l_asetInsert(set, key); | |
| 377 } | |
| 378 } | |
| 379 | |
| 380 l_asetDestroy(&set); | |
| 381 return 0; | |
| 382 } | |
| 383 | |
| 384 | |
| 385 /*! | |
| 386 * \brief l_dnaUnionByAset() | |
| 387 * | |
| 388 * \param[in] da1 | |
| 389 * \param[in] da2 | |
| 390 * \param[out] pdad union of the two arrays | |
| 391 * \return 0 if OK; 1 on error | |
| 392 * | |
| 393 * <pre> | |
| 394 * Notes: | |
| 395 * (1) See sarrayUnionByAset() for the approach. | |
| 396 * (2) Here, the key in building the sorted tree is the number itself. | |
| 397 * (3) Operations using an underlying tree are O(nlogn), which is | |
| 398 * typically less efficient than hashing, which is O(n). | |
| 399 * </pre> | |
| 400 */ | |
| 401 l_ok | |
| 402 l_dnaUnionByAset(L_DNA *da1, | |
| 403 L_DNA *da2, | |
| 404 L_DNA **pdad) | |
| 405 { | |
| 406 L_DNA *da3; | |
| 407 | |
| 408 if (!pdad) | |
| 409 return ERROR_INT("&dad not defined", __func__, 1); | |
| 410 if (!da1) | |
| 411 return ERROR_INT("da1 not defined", __func__, 1); | |
| 412 if (!da2) | |
| 413 return ERROR_INT("da2 not defined", __func__, 1); | |
| 414 | |
| 415 /* Join */ | |
| 416 da3 = l_dnaCopy(da1); | |
| 417 if (l_dnaJoin(da3, da2, 0, -1) == 1) { | |
| 418 l_dnaDestroy(&da3); | |
| 419 return ERROR_INT("join failed for da3", __func__, 1); | |
| 420 } | |
| 421 | |
| 422 /* Eliminate duplicates */ | |
| 423 l_dnaRemoveDupsByAset(da3, pdad); | |
| 424 l_dnaDestroy(&da3); | |
| 425 return 0; | |
| 426 } | |
| 427 | |
| 428 | |
| 429 /*! | |
| 430 * \brief l_dnaIntersectionByAset() | |
| 431 * | |
| 432 * \param[in] da1 | |
| 433 * \param[in] da2 | |
| 434 * \param[out] pdad intersection of the two arrays | |
| 435 * \return 0 if OK; 1 on error | |
| 436 * | |
| 437 * <pre> | |
| 438 * Notes: | |
| 439 * (1) See sarrayIntersection() for the approach. | |
| 440 * (2) Here, the key in building the sorted tree is the number itself. | |
| 441 * (3) Operations using an underlying tree are O(nlogn), which is | |
| 442 * typically less efficient than hashing, which is O(n). | |
| 443 * </pre> | |
| 444 */ | |
| 445 l_ok | |
| 446 l_dnaIntersectionByAset(L_DNA *da1, | |
| 447 L_DNA *da2, | |
| 448 L_DNA **pdad) | |
| 449 { | |
| 450 l_int32 n1, n2, i, n; | |
| 451 l_float64 val; | |
| 452 L_ASET *set1, *set2; | |
| 453 RB_TYPE key; | |
| 454 L_DNA *da_small, *da_big, *dad; | |
| 455 | |
| 456 if (!pdad) | |
| 457 return ERROR_INT("&dad not defined", __func__, 1); | |
| 458 *pdad = NULL; | |
| 459 if (!da1) | |
| 460 return ERROR_INT("&da1 not defined", __func__, 1); | |
| 461 if (!da2) | |
| 462 return ERROR_INT("&da2 not defined", __func__, 1); | |
| 463 | |
| 464 /* Put the elements of the largest array into a set */ | |
| 465 n1 = l_dnaGetCount(da1); | |
| 466 n2 = l_dnaGetCount(da2); | |
| 467 da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ | |
| 468 da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ | |
| 469 set1 = l_asetCreateFromDna(da_big); | |
| 470 | |
| 471 /* Build up the intersection of doubles */ | |
| 472 dad = l_dnaCreate(0); | |
| 473 *pdad = dad; | |
| 474 n = l_dnaGetCount(da_small); | |
| 475 set2 = l_asetCreate(L_FLOAT_TYPE); | |
| 476 for (i = 0; i < n; i++) { | |
| 477 l_dnaGetDValue(da_small, i, &val); | |
| 478 key.ftype = val; | |
| 479 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { | |
| 480 l_dnaAddNumber(dad, val); | |
| 481 l_asetInsert(set2, key); | |
| 482 } | |
| 483 } | |
| 484 | |
| 485 l_asetDestroy(&set1); | |
| 486 l_asetDestroy(&set2); | |
| 487 return 0; | |
| 488 } | |
| 489 | |
| 490 | |
| 491 /*--------------------------------------------------------------------------* | |
| 492 * Hashmap operations * | |
| 493 *--------------------------------------------------------------------------*/ | |
| 494 /*! | |
| 495 * \brief l_hmapCreateFromDna() | |
| 496 * | |
| 497 * \param[in] da input dna | |
| 498 * \return hmap hashmap, or NULL on error | |
| 499 * | |
| 500 * <pre> | |
| 501 * Notes: | |
| 502 * (1) Derive the hash keys from the values in %da. | |
| 503 * (2) The indices into %da are stored in the val field of the hashitems. | |
| 504 * This is necessary so that %hmap and %da can be used together. | |
| 505 * </pre> | |
| 506 */ | |
| 507 L_HASHMAP * | |
| 508 l_hmapCreateFromDna(L_DNA *da) | |
| 509 { | |
| 510 l_int32 i, n; | |
| 511 l_uint64 key; | |
| 512 l_float64 dval; | |
| 513 L_HASHMAP *hmap; | |
| 514 | |
| 515 if (!da) | |
| 516 return (L_HASHMAP *)ERROR_PTR("da not defined", __func__, NULL); | |
| 517 | |
| 518 n = l_dnaGetCount(da); | |
| 519 hmap = l_hmapCreate(0, 0); | |
| 520 for (i = 0; i < n; i++) { | |
| 521 l_dnaGetDValue(da, i, &dval); | |
| 522 l_hashFloat64ToUint64(dval, &key); | |
| 523 l_hmapLookup(hmap, key, i, L_HMAP_CREATE); | |
| 524 } | |
| 525 return hmap; | |
| 526 } | |
| 527 | |
| 528 | |
| 529 /*! | |
| 530 * \brief l_dnaRemoveDupsByHmap() | |
| 531 * | |
| 532 * \param[in] das | |
| 533 * \param[out] pdad hash set of unique values | |
| 534 * \param[out] phmap [optional] hashmap used for lookup | |
| 535 * \return 0 if OK; 1 on error | |
| 536 * | |
| 537 * <pre> | |
| 538 * Notes: | |
| 539 * (1) Generates the set of (unique) values from %das. | |
| 540 * (2) The values in the hashitems are indices into %das. | |
| 541 * </pre> | |
| 542 */ | |
| 543 l_ok | |
| 544 l_dnaRemoveDupsByHmap(L_DNA *das, | |
| 545 L_DNA **pdad, | |
| 546 L_HASHMAP **phmap) | |
| 547 { | |
| 548 l_int32 i, tabsize; | |
| 549 l_float64 dval; | |
| 550 L_DNA *dad; | |
| 551 L_HASHITEM *hitem; | |
| 552 L_HASHMAP *hmap; | |
| 553 | |
| 554 if (phmap) *phmap = NULL; | |
| 555 if (!pdad) | |
| 556 return ERROR_INT("&dad not defined", __func__, 1); | |
| 557 *pdad = NULL; | |
| 558 if (!das) | |
| 559 return ERROR_INT("das not defined", __func__, 1); | |
| 560 | |
| 561 /* Traverse the hashtable lists */ | |
| 562 if ((hmap = l_hmapCreateFromDna(das)) == NULL) | |
| 563 return ERROR_INT("hmap not made", __func__, 1); | |
| 564 dad = l_dnaCreate(0); | |
| 565 *pdad = dad; | |
| 566 tabsize = hmap->tabsize; | |
| 567 for (i = 0; i < tabsize; i++) { | |
| 568 hitem = hmap->hashtab[i]; | |
| 569 while (hitem) { | |
| 570 l_dnaGetDValue(das, hitem->val, &dval); | |
| 571 l_dnaAddNumber(dad, dval); | |
| 572 hitem = hitem->next; | |
| 573 } | |
| 574 } | |
| 575 | |
| 576 if (phmap) | |
| 577 *phmap = hmap; | |
| 578 else | |
| 579 l_hmapDestroy(&hmap); | |
| 580 return 0; | |
| 581 } | |
| 582 | |
| 583 | |
| 584 /*! | |
| 585 * \brief l_dnaUnionByHmap() | |
| 586 * | |
| 587 * \param[in] da1 | |
| 588 * \param[in] da2 | |
| 589 * \param[out] pdad union of the array values | |
| 590 * \return 0 if OK; 1 on error | |
| 591 * | |
| 592 * <pre> | |
| 593 * Notes: | |
| 594 * (1) Make dna with numbers found in either of the input arrays. | |
| 595 * </pre> | |
| 596 */ | |
| 597 l_ok | |
| 598 l_dnaUnionByHmap(L_DNA *da1, | |
| 599 L_DNA *da2, | |
| 600 L_DNA **pdad) | |
| 601 { | |
| 602 L_DNA *da3; | |
| 603 | |
| 604 if (!pdad) | |
| 605 return ERROR_INT("&dad not defined", __func__, 1); | |
| 606 *pdad = NULL; | |
| 607 if (!da1) | |
| 608 return ERROR_INT("da1 not defined", __func__, 1); | |
| 609 if (!da2) | |
| 610 return ERROR_INT("da2 not defined", __func__, 1); | |
| 611 | |
| 612 da3 = l_dnaCopy(da1); | |
| 613 if (l_dnaJoin(da3, da2, 0, -1) == 1) { | |
| 614 l_dnaDestroy(&da3); | |
| 615 return ERROR_INT("da3 join failed", __func__, 1); | |
| 616 } | |
| 617 l_dnaRemoveDupsByHmap(da3, pdad, NULL); | |
| 618 l_dnaDestroy(&da3); | |
| 619 return 0; | |
| 620 } | |
| 621 | |
| 622 | |
| 623 /*! | |
| 624 * \brief l_dnaIntersectionByHmap() | |
| 625 * | |
| 626 * \param[in] da1 | |
| 627 * \param[in] da2 | |
| 628 * \param[out] pdad intersection of the array values | |
| 629 * \return 0 if OK; 1 on error | |
| 630 * | |
| 631 * <pre> | |
| 632 * Notes: | |
| 633 * (1) Make dna with numbers common to both input arrays. | |
| 634 * (2) Use the values in the dna as the hash keys. | |
| 635 * </pre> | |
| 636 */ | |
| 637 l_ok | |
| 638 l_dnaIntersectionByHmap(L_DNA *da1, | |
| 639 L_DNA *da2, | |
| 640 L_DNA **pdad) | |
| 641 { | |
| 642 l_int32 i, n1, n2, n; | |
| 643 l_uint64 key; | |
| 644 l_float64 dval; | |
| 645 L_DNA *da_small, *da_big, *dad; | |
| 646 L_HASHITEM *hitem; | |
| 647 L_HASHMAP *hmap; | |
| 648 | |
| 649 if (!pdad) | |
| 650 return ERROR_INT("&dad not defined", __func__, 1); | |
| 651 *pdad = NULL; | |
| 652 if (!da1) | |
| 653 return ERROR_INT("da1 not defined", __func__, 1); | |
| 654 if (!da2) | |
| 655 return ERROR_INT("da2 not defined", __func__, 1); | |
| 656 | |
| 657 /* Make a hashmap for the elements of the biggest array */ | |
| 658 n1 = l_dnaGetCount(da1); | |
| 659 n2 = l_dnaGetCount(da2); | |
| 660 da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ | |
| 661 da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ | |
| 662 if ((hmap = l_hmapCreateFromDna(da_big)) == NULL) | |
| 663 return ERROR_INT("hmap not made", __func__, 1); | |
| 664 | |
| 665 /* Go through the smallest array, doing a lookup of its dval into | |
| 666 * the big array hashmap. If an hitem is returned, check the count. | |
| 667 * If the count is 0, ignore; otherwise, add the dval to the | |
| 668 * output dad and set the count in the hitem to 0, indicating | |
| 669 * that the dval has already been added. */ | |
| 670 dad = l_dnaCreate(0); | |
| 671 *pdad = dad; | |
| 672 n = l_dnaGetCount(da_small); | |
| 673 for (i = 0; i < n; i++) { | |
| 674 l_dnaGetDValue(da_small, i, &dval); | |
| 675 l_hashFloat64ToUint64(dval, &key); | |
| 676 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK); | |
| 677 if (!hitem || hitem->count == 0) | |
| 678 continue; | |
| 679 l_dnaAddNumber(dad, dval); | |
| 680 hitem->count = 0; | |
| 681 } | |
| 682 l_hmapDestroy(&hmap); | |
| 683 return 0; | |
| 684 } | |
| 685 | |
| 686 | |
| 687 /*! | |
| 688 * \brief l_dnaMakeHistoByHmap() | |
| 689 * | |
| 690 * \param[in] das | |
| 691 * \param[out] pdav array (set) of unique values | |
| 692 * \param[out] pdac array of counts, aligned with the array of values | |
| 693 * \return 0 if OK; 1 on error | |
| 694 * | |
| 695 * <pre> | |
| 696 * Notes: | |
| 697 * (1) Generates a histogram represented by two aligned arrays: | |
| 698 * value and count. | |
| 699 * </pre> | |
| 700 */ | |
| 701 l_ok | |
| 702 l_dnaMakeHistoByHmap(L_DNA *das, | |
| 703 L_DNA **pdav, | |
| 704 L_DNA **pdac) | |
| 705 { | |
| 706 l_int32 i, tabsize; | |
| 707 l_float64 dval; | |
| 708 L_DNA *dac, *dav; | |
| 709 L_HASHITEM *hitem; | |
| 710 L_HASHMAP *hmap; | |
| 711 | |
| 712 if (pdav) *pdav = NULL; | |
| 713 if (pdac) *pdac = NULL; | |
| 714 if (!das) | |
| 715 return ERROR_INT("das not defined", __func__, 1); | |
| 716 if (!pdav) | |
| 717 return ERROR_INT("&dav not defined", __func__, 1); | |
| 718 if (!pdac) | |
| 719 return ERROR_INT("&dac not defined", __func__, 1); | |
| 720 | |
| 721 /* Traverse the hashtable lists */ | |
| 722 if ((hmap = l_hmapCreateFromDna(das)) == NULL) | |
| 723 return ERROR_INT("hmap not made", __func__, 1); | |
| 724 dav = l_dnaCreate(0); | |
| 725 *pdav = dav; | |
| 726 dac = l_dnaCreate(0); | |
| 727 *pdac = dac; | |
| 728 tabsize = hmap->tabsize; | |
| 729 for (i = 0; i < tabsize; i++) { | |
| 730 hitem = hmap->hashtab[i]; | |
| 731 while (hitem) { | |
| 732 l_dnaGetDValue(das, hitem->val, &dval); | |
| 733 l_dnaAddNumber(dav, dval); | |
| 734 l_dnaAddNumber(dac, hitem->count); | |
| 735 hitem = hitem->next; | |
| 736 } | |
| 737 } | |
| 738 | |
| 739 l_hmapDestroy(&hmap); | |
| 740 return 0; | |
| 741 } | |
| 742 | |
| 743 | |
| 744 /*----------------------------------------------------------------------* | |
| 745 * Miscellaneous operations * | |
| 746 *----------------------------------------------------------------------*/ | |
| 747 /*! | |
| 748 * \brief l_dnaDiffAdjValues() | |
| 749 * | |
| 750 * \param[in] das input l_dna | |
| 751 * \return dad of difference values val[i+1] - val[i], | |
| 752 * or NULL on error | |
| 753 */ | |
| 754 L_DNA * | |
| 755 l_dnaDiffAdjValues(L_DNA *das) | |
| 756 { | |
| 757 l_int32 i, n, prev, cur; | |
| 758 L_DNA *dad; | |
| 759 | |
| 760 if (!das) | |
| 761 return (L_DNA *)ERROR_PTR("das not defined", __func__, NULL); | |
| 762 n = l_dnaGetCount(das); | |
| 763 dad = l_dnaCreate(n - 1); | |
| 764 prev = 0; | |
| 765 for (i = 1; i < n; i++) { | |
| 766 l_dnaGetIValue(das, i, &cur); | |
| 767 l_dnaAddNumber(dad, cur - prev); | |
| 768 prev = cur; | |
| 769 } | |
| 770 return dad; | |
| 771 } | |
| 772 |
