Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/ptafunc2.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file ptafunc2.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * -------------------------------------- | |
| 32 * This file has these Pta utilities: | |
| 33 * - sorting | |
| 34 * - ordered set operations | |
| 35 * - hash map operations | |
| 36 * -------------------------------------- | |
| 37 * | |
| 38 * Sorting | |
| 39 * PTA *ptaSort() | |
| 40 * l_int32 ptaGetSortIndex() | |
| 41 * PTA *ptaSortByIndex() | |
| 42 * PTAA *ptaaSortByIndex() | |
| 43 * l_int32 ptaGetRankValue() | |
| 44 * PTA *ptaSort2d() | |
| 45 * l_int32 ptaEqual() | |
| 46 * | |
| 47 * Set operations using aset (rbtree) | |
| 48 * L_ASET *l_asetCreateFromPta() | |
| 49 * PTA *ptaRemoveDupsByAset() | |
| 50 * PTA *ptaUnionByAset() | |
| 51 * PTA *ptaIntersectionByAset() | |
| 52 * | |
| 53 * Hashmap operations | |
| 54 * L_HASHMAP *l_hmapCreateFromPta() | |
| 55 * l_int32 ptaRemoveDupsByHmap() | |
| 56 * l_int32 ptaUnionByHmap() | |
| 57 * l_int32 ptaIntersectionByHmap() | |
| 58 * | |
| 59 * We have two implementations of set operations on an array of points: | |
| 60 * | |
| 61 * (1) Using an underlying tree (rbtree) | |
| 62 * This uses a good 64 bit hashing function for the key, | |
| 63 * that is not expected to have hash collisions (and we do | |
| 64 * not test for them). The tree is built up of the keys, | |
| 65 * values, and is traversed looking for the key in O(log n). | |
| 66 * | |
| 67 * (2) Building a hashmap from the keys (hashmap) | |
| 68 * This uses a fast 64 bit hashing function for the key, which | |
| 69 * is then hashed into a hashtable. Collisions of hashkeys are | |
| 70 * very rare, but the hashtable is designed to allow more than one | |
| 71 * hashitem in a table entry. The hashitems are put in a list at | |
| 72 * each hashtable entry, which is traversed looking for the key. | |
| 73 * | |
| 74 * </pre> | |
| 75 */ | |
| 76 | |
| 77 #ifdef HAVE_CONFIG_H | |
| 78 #include <config_auto.h> | |
| 79 #endif /* HAVE_CONFIG_H */ | |
| 80 | |
| 81 #include "allheaders.h" | |
| 82 | |
| 83 /*---------------------------------------------------------------------* | |
| 84 * Sorting * | |
| 85 *---------------------------------------------------------------------*/ | |
| 86 /*! | |
| 87 * \brief ptaSort() | |
| 88 * | |
| 89 * \param[in] ptas | |
| 90 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y | |
| 91 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING | |
| 92 * \param[out] pnaindex [optional] index of sorted order into | |
| 93 * original array | |
| 94 * \return ptad sorted version of ptas, or NULL on error | |
| 95 */ | |
| 96 PTA * | |
| 97 ptaSort(PTA *ptas, | |
| 98 l_int32 sorttype, | |
| 99 l_int32 sortorder, | |
| 100 NUMA **pnaindex) | |
| 101 { | |
| 102 PTA *ptad; | |
| 103 NUMA *naindex; | |
| 104 | |
| 105 if (pnaindex) *pnaindex = NULL; | |
| 106 if (!ptas) | |
| 107 return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL); | |
| 108 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) | |
| 109 return (PTA *)ERROR_PTR("invalid sort type", __func__, NULL); | |
| 110 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) | |
| 111 return (PTA *)ERROR_PTR("invalid sort order", __func__, NULL); | |
| 112 | |
| 113 if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0) | |
| 114 return (PTA *)ERROR_PTR("naindex not made", __func__, NULL); | |
| 115 | |
| 116 ptad = ptaSortByIndex(ptas, naindex); | |
| 117 if (pnaindex) | |
| 118 *pnaindex = naindex; | |
| 119 else | |
| 120 numaDestroy(&naindex); | |
| 121 if (!ptad) | |
| 122 return (PTA *)ERROR_PTR("ptad not made", __func__, NULL); | |
| 123 return ptad; | |
| 124 } | |
| 125 | |
| 126 | |
| 127 /*! | |
| 128 * \brief ptaGetSortIndex() | |
| 129 * | |
| 130 * \param[in] ptas | |
| 131 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y | |
| 132 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING | |
| 133 * \param[out] pnaindex index of sorted order into original array | |
| 134 * \return 0 if OK, 1 on error | |
| 135 */ | |
| 136 l_ok | |
| 137 ptaGetSortIndex(PTA *ptas, | |
| 138 l_int32 sorttype, | |
| 139 l_int32 sortorder, | |
| 140 NUMA **pnaindex) | |
| 141 { | |
| 142 l_int32 i, n; | |
| 143 l_float32 x, y; | |
| 144 NUMA *na, *nai; | |
| 145 | |
| 146 if (!pnaindex) | |
| 147 return ERROR_INT("&naindex not defined", __func__, 1); | |
| 148 *pnaindex = NULL; | |
| 149 if (!ptas) | |
| 150 return ERROR_INT("ptas not defined", __func__, 1); | |
| 151 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) | |
| 152 return ERROR_INT("invalid sort type", __func__, 1); | |
| 153 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) | |
| 154 return ERROR_INT("invalid sort order", __func__, 1); | |
| 155 | |
| 156 /* Build up numa of specific data */ | |
| 157 n = ptaGetCount(ptas); | |
| 158 if ((na = numaCreate(n)) == NULL) | |
| 159 return ERROR_INT("na not made", __func__, 1); | |
| 160 for (i = 0; i < n; i++) { | |
| 161 ptaGetPt(ptas, i, &x, &y); | |
| 162 if (sorttype == L_SORT_BY_X) | |
| 163 numaAddNumber(na, x); | |
| 164 else | |
| 165 numaAddNumber(na, y); | |
| 166 } | |
| 167 | |
| 168 /* Get the sort index for data array */ | |
| 169 nai = numaGetSortIndex(na, sortorder); | |
| 170 numaDestroy(&na); | |
| 171 if (!nai) | |
| 172 return ERROR_INT("naindex not made", __func__, 1); | |
| 173 *pnaindex = nai; | |
| 174 return 0; | |
| 175 } | |
| 176 | |
| 177 | |
| 178 /*! | |
| 179 * \brief ptaSortByIndex() | |
| 180 * | |
| 181 * \param[in] ptas | |
| 182 * \param[in] naindex na that maps from the new pta to the input pta | |
| 183 * \return ptad sorted, or NULL on error | |
| 184 */ | |
| 185 PTA * | |
| 186 ptaSortByIndex(PTA *ptas, | |
| 187 NUMA *naindex) | |
| 188 { | |
| 189 l_int32 i, index, n; | |
| 190 l_float32 x, y; | |
| 191 PTA *ptad; | |
| 192 | |
| 193 if (!ptas) | |
| 194 return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL); | |
| 195 if (!naindex) | |
| 196 return (PTA *)ERROR_PTR("naindex not defined", __func__, NULL); | |
| 197 | |
| 198 /* Build up sorted pta using sort index */ | |
| 199 n = numaGetCount(naindex); | |
| 200 if ((ptad = ptaCreate(n)) == NULL) | |
| 201 return (PTA *)ERROR_PTR("ptad not made", __func__, NULL); | |
| 202 for (i = 0; i < n; i++) { | |
| 203 numaGetIValue(naindex, i, &index); | |
| 204 ptaGetPt(ptas, index, &x, &y); | |
| 205 ptaAddPt(ptad, x, y); | |
| 206 } | |
| 207 | |
| 208 return ptad; | |
| 209 } | |
| 210 | |
| 211 | |
| 212 /*! | |
| 213 * \brief ptaaSortByIndex() | |
| 214 * | |
| 215 * \param[in] ptaas | |
| 216 * \param[in] naindex na that maps from the new ptaa to the input ptaa | |
| 217 * \return ptaad sorted, or NULL on error | |
| 218 */ | |
| 219 PTAA * | |
| 220 ptaaSortByIndex(PTAA *ptaas, | |
| 221 NUMA *naindex) | |
| 222 { | |
| 223 l_int32 i, n, index; | |
| 224 PTA *pta; | |
| 225 PTAA *ptaad; | |
| 226 | |
| 227 if (!ptaas) | |
| 228 return (PTAA *)ERROR_PTR("ptaas not defined", __func__, NULL); | |
| 229 if (!naindex) | |
| 230 return (PTAA *)ERROR_PTR("naindex not defined", __func__, NULL); | |
| 231 | |
| 232 n = ptaaGetCount(ptaas); | |
| 233 if (numaGetCount(naindex) != n) | |
| 234 return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", __func__, NULL); | |
| 235 ptaad = ptaaCreate(n); | |
| 236 for (i = 0; i < n; i++) { | |
| 237 numaGetIValue(naindex, i, &index); | |
| 238 pta = ptaaGetPta(ptaas, index, L_COPY); | |
| 239 ptaaAddPta(ptaad, pta, L_INSERT); | |
| 240 } | |
| 241 | |
| 242 return ptaad; | |
| 243 } | |
| 244 | |
| 245 | |
| 246 /*! | |
| 247 * \brief ptaGetRankValue() | |
| 248 * | |
| 249 * \param[in] pta | |
| 250 * \param[in] fract use 0.0 for smallest, 1.0 for largest | |
| 251 * \param[in] ptasort [optional] version of %pta sorted by %sorttype | |
| 252 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y | |
| 253 * \param[out] pval rankval: the x or y value at %fract | |
| 254 * \return 0 if OK, 1 on error | |
| 255 */ | |
| 256 l_ok | |
| 257 ptaGetRankValue(PTA *pta, | |
| 258 l_float32 fract, | |
| 259 PTA *ptasort, | |
| 260 l_int32 sorttype, | |
| 261 l_float32 *pval) | |
| 262 { | |
| 263 l_int32 index, n; | |
| 264 PTA *ptas; | |
| 265 | |
| 266 if (!pval) | |
| 267 return ERROR_INT("&val not defined", __func__, 1); | |
| 268 *pval = 0.0; | |
| 269 if (!pta) | |
| 270 return ERROR_INT("pta not defined", __func__, 1); | |
| 271 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) | |
| 272 return ERROR_INT("invalid sort type", __func__, 1); | |
| 273 if (fract < 0.0 || fract > 1.0) | |
| 274 return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1); | |
| 275 if ((n = ptaGetCount(pta)) == 0) | |
| 276 return ERROR_INT("pta empty", __func__, 1); | |
| 277 | |
| 278 if (ptasort) | |
| 279 ptas = ptasort; | |
| 280 else | |
| 281 ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL); | |
| 282 | |
| 283 index = (l_int32)(fract * (l_float32)(n - 1) + 0.5); | |
| 284 if (sorttype == L_SORT_BY_X) | |
| 285 ptaGetPt(ptas, index, pval, NULL); | |
| 286 else /* sort by y */ | |
| 287 ptaGetPt(ptas, index, NULL, pval); | |
| 288 | |
| 289 if (!ptasort) ptaDestroy(&ptas); | |
| 290 return 0; | |
| 291 } | |
| 292 | |
| 293 | |
| 294 /*! | |
| 295 * \brief ptaSort2d() | |
| 296 * | |
| 297 * \param[in] ptas | |
| 298 * \return ptad, or NULL on error | |
| 299 * | |
| 300 * <pre> | |
| 301 * Notes: | |
| 302 * (1) Sort increasing by row-major, scanning down from the UL corner, | |
| 303 * where for each value of y, order the pts from left to right. | |
| 304 * </pre> | |
| 305 */ | |
| 306 PTA * | |
| 307 ptaSort2d(PTA *pta) | |
| 308 { | |
| 309 l_int32 index, i, j, n, nx, ny, start, end; | |
| 310 l_float32 x, y, yp, val; | |
| 311 NUMA *na1, *na2, *nas, *nax; | |
| 312 PTA *pta1, *ptad; | |
| 313 | |
| 314 if (!pta) | |
| 315 return (PTA *)ERROR_PTR("pta not defined", __func__, NULL); | |
| 316 | |
| 317 /* Sort by row-major (y first, then x). After sort by y, | |
| 318 * the x values at the same y are not sorted. */ | |
| 319 pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL); | |
| 320 | |
| 321 /* Find start and ending indices with the same y value */ | |
| 322 n = ptaGetCount(pta1); | |
| 323 na1 = numaCreate(0); /* holds start index of sequence with same y */ | |
| 324 na2 = numaCreate(0); /* holds end index of sequence with same y */ | |
| 325 numaAddNumber(na1, 0); | |
| 326 ptaGetPt(pta1, 0, &x, &yp); | |
| 327 for (i = 1; i < n; i++) { | |
| 328 ptaGetPt(pta1, i, &x, &y); | |
| 329 if (y != yp) { | |
| 330 numaAddNumber(na1, i); | |
| 331 numaAddNumber(na2, i - 1); | |
| 332 } | |
| 333 yp = y; | |
| 334 } | |
| 335 numaAddNumber(na2, n - 1); | |
| 336 | |
| 337 /* Sort by increasing x each set with the same y value */ | |
| 338 ptad = ptaCreate(n); | |
| 339 ny = numaGetCount(na1); /* number of distinct y values */ | |
| 340 for (i = 0, index = 0; i < ny; i++) { | |
| 341 numaGetIValue(na1, i, &start); | |
| 342 numaGetIValue(na2, i, &end); | |
| 343 nx = end - start + 1; /* number of points with current y value */ | |
| 344 if (nx == 1) { | |
| 345 ptaGetPt(pta1, index++, &x, &y); | |
| 346 ptaAddPt(ptad, x, y); | |
| 347 } else { | |
| 348 /* More than 1 point; extract and sort the x values */ | |
| 349 nax = numaCreate(nx); | |
| 350 for (j = 0; j < nx; j++) { | |
| 351 ptaGetPt(pta1, index + j, &x, &y); | |
| 352 numaAddNumber(nax, x); | |
| 353 } | |
| 354 nas = numaSort(NULL, nax, L_SORT_INCREASING); | |
| 355 /* Add the points with x sorted */ | |
| 356 for (j = 0; j < nx; j++) { | |
| 357 numaGetFValue(nas, j, &val); | |
| 358 ptaAddPt(ptad, val, y); | |
| 359 } | |
| 360 index += nx; | |
| 361 numaDestroy(&nax); | |
| 362 numaDestroy(&nas); | |
| 363 } | |
| 364 } | |
| 365 numaDestroy(&na1); | |
| 366 numaDestroy(&na2); | |
| 367 ptaDestroy(&pta1); | |
| 368 return ptad; | |
| 369 } | |
| 370 | |
| 371 | |
| 372 /*! | |
| 373 * \brief ptaEqual() | |
| 374 * | |
| 375 * \param[in] pta1 | |
| 376 * \param[in] pta2 | |
| 377 * \param[out] psame 1 if same; 0 if different | |
| 378 * \return 0 if OK; 1 on error | |
| 379 * | |
| 380 * <pre> | |
| 381 * Notes: | |
| 382 * (1) Equality is defined as having the same set of points, | |
| 383 * independent of the order in which they are presented. | |
| 384 * </pre> | |
| 385 */ | |
| 386 l_ok | |
| 387 ptaEqual(PTA *pta1, | |
| 388 PTA *pta2, | |
| 389 l_int32 *psame) | |
| 390 { | |
| 391 l_int32 i, n1, n2; | |
| 392 l_float32 x1, y1, x2, y2; | |
| 393 PTA *ptas1, *ptas2; | |
| 394 | |
| 395 if (!psame) | |
| 396 return ERROR_INT("&same not defined", __func__, 1); | |
| 397 *psame = 0.0f; | |
| 398 if (!pta1 || !pta2) | |
| 399 return ERROR_INT("pta1 and pta2 not both defined", __func__, 1); | |
| 400 | |
| 401 n1 = ptaGetCount(pta1); | |
| 402 n2 = ptaGetCount(pta2); | |
| 403 if (n1 != n2) return 0; | |
| 404 | |
| 405 /* 2d sort each and compare */ | |
| 406 ptas1 = ptaSort2d(pta1); | |
| 407 ptas2 = ptaSort2d(pta2); | |
| 408 for (i = 0; i < n1; i++) { | |
| 409 ptaGetPt(ptas1, i, &x1, &y1); | |
| 410 ptaGetPt(ptas2, i, &x2, &y2); | |
| 411 if (x1 != x2 || y1 != y2) { | |
| 412 ptaDestroy(&ptas1); | |
| 413 ptaDestroy(&ptas2); | |
| 414 return 0; | |
| 415 } | |
| 416 } | |
| 417 | |
| 418 *psame = 1; | |
| 419 ptaDestroy(&ptas1); | |
| 420 ptaDestroy(&ptas2); | |
| 421 return 0; | |
| 422 } | |
| 423 | |
| 424 | |
| 425 /*---------------------------------------------------------------------* | |
| 426 * Set operations using aset (rbtree) * | |
| 427 *---------------------------------------------------------------------*/ | |
| 428 /*! | |
| 429 * \brief l_asetCreateFromPta() | |
| 430 * | |
| 431 * \param[in] pta | |
| 432 * \return set using a 64-bit hash of (x,y) as the key | |
| 433 */ | |
| 434 L_ASET * | |
| 435 l_asetCreateFromPta(PTA *pta) | |
| 436 { | |
| 437 l_int32 i, n, x, y; | |
| 438 l_uint64 hash; | |
| 439 L_ASET *set; | |
| 440 RB_TYPE key; | |
| 441 | |
| 442 if (!pta) | |
| 443 return (L_ASET *)ERROR_PTR("pta not defined", __func__, NULL); | |
| 444 | |
| 445 set = l_asetCreate(L_UINT_TYPE); | |
| 446 n = ptaGetCount(pta); | |
| 447 for (i = 0; i < n; i++) { | |
| 448 ptaGetIPt(pta, i, &x, &y); | |
| 449 l_hashPtToUint64(x, y, &hash); | |
| 450 key.utype = hash; | |
| 451 l_asetInsert(set, key); | |
| 452 } | |
| 453 | |
| 454 return set; | |
| 455 } | |
| 456 | |
| 457 | |
| 458 /*! | |
| 459 * \brief ptaRemoveDupsByAset() | |
| 460 * | |
| 461 * \param[in] ptas assumed to be integer values | |
| 462 * \param[out] pptad assumed to be integer values | |
| 463 * \return 0 if OK; 1 on error | |
| 464 * | |
| 465 * <pre> | |
| 466 * Notes: | |
| 467 * (1) This is slower than ptaRemoveDupsByHmap(), mostly because | |
| 468 * of the nlogn sort to build up the rbtree. Do not use for | |
| 469 * large numbers of points (say, > 100K). | |
| 470 * </pre> | |
| 471 */ | |
| 472 l_ok | |
| 473 ptaRemoveDupsByAset(PTA *ptas, | |
| 474 PTA **pptad) | |
| 475 { | |
| 476 l_int32 i, n, x, y; | |
| 477 PTA *ptad; | |
| 478 l_uint64 hash; | |
| 479 L_ASET *set; | |
| 480 RB_TYPE key; | |
| 481 | |
| 482 if (!pptad) | |
| 483 return ERROR_INT("&ptad not defined", __func__, 1); | |
| 484 *pptad = NULL; | |
| 485 if (!ptas) | |
| 486 return ERROR_INT("ptas not defined", __func__, 1); | |
| 487 | |
| 488 set = l_asetCreate(L_UINT_TYPE); | |
| 489 n = ptaGetCount(ptas); | |
| 490 ptad = ptaCreate(n); | |
| 491 *pptad = ptad; | |
| 492 for (i = 0; i < n; i++) { | |
| 493 ptaGetIPt(ptas, i, &x, &y); | |
| 494 l_hashPtToUint64(x, y, &hash); | |
| 495 key.utype = hash; | |
| 496 if (!l_asetFind(set, key)) { | |
| 497 ptaAddPt(ptad, x, y); | |
| 498 l_asetInsert(set, key); | |
| 499 } | |
| 500 } | |
| 501 | |
| 502 l_asetDestroy(&set); | |
| 503 return 0; | |
| 504 } | |
| 505 | |
| 506 | |
| 507 /*! | |
| 508 * \brief ptaUnionByAset() | |
| 509 * | |
| 510 * \param[in] pta1 | |
| 511 * \param[in] pta2 | |
| 512 * \param[out] pptad union of the two point arrays | |
| 513 * \return 0 if OK; 1 on error | |
| 514 * | |
| 515 * <pre> | |
| 516 * Notes: | |
| 517 * (1) See sarrayRemoveDupsByAset() for the approach. | |
| 518 * (2) The key is a 64-bit hash from the (x,y) pair. | |
| 519 * (3) This is slower than ptaUnionByHmap(), mostly because of the | |
| 520 * nlogn sort to build up the rbtree. Do not use for large | |
| 521 * numbers of points (say, > 100K). | |
| 522 * (4) The *Aset() functions use the sorted l_Aset, which is just | |
| 523 * an rbtree in disguise. | |
| 524 * </pre> | |
| 525 */ | |
| 526 l_ok | |
| 527 ptaUnionByAset(PTA *pta1, | |
| 528 PTA *pta2, | |
| 529 PTA **pptad) | |
| 530 { | |
| 531 PTA *pta3; | |
| 532 | |
| 533 if (!pptad) | |
| 534 return ERROR_INT("&ptad not defined", __func__, 1); | |
| 535 *pptad = NULL; | |
| 536 if (!pta1) | |
| 537 return ERROR_INT("pta1 not defined", __func__, 1); | |
| 538 if (!pta2) | |
| 539 return ERROR_INT("pta2 not defined", __func__, 1); | |
| 540 | |
| 541 /* Join */ | |
| 542 pta3 = ptaCopy(pta1); | |
| 543 ptaJoin(pta3, pta2, 0, -1); | |
| 544 | |
| 545 /* Eliminate duplicates */ | |
| 546 ptaRemoveDupsByAset(pta3, pptad); | |
| 547 ptaDestroy(&pta3); | |
| 548 return 0; | |
| 549 } | |
| 550 | |
| 551 | |
| 552 /*! | |
| 553 * \brief ptaIntersectionByAset() | |
| 554 * | |
| 555 * \param[in] pta1 | |
| 556 * \param[in] pta2 | |
| 557 * \param[out] pptad intersection of the two point arrays | |
| 558 * \return 0 if OK; 1 on error | |
| 559 * | |
| 560 * <pre> | |
| 561 * Notes: | |
| 562 * (1) See sarrayIntersectionByAset() for the approach. | |
| 563 * (2) The key is a 64-bit hash from the (x,y) pair. | |
| 564 * (3) This is slower than ptaIntersectionByHmap(), mostly because | |
| 565 * of the nlogn sort to build up the rbtree. Do not use for | |
| 566 * large numbers of points (say, > 100K). | |
| 567 * </pre> | |
| 568 */ | |
| 569 l_ok | |
| 570 ptaIntersectionByAset(PTA *pta1, | |
| 571 PTA *pta2, | |
| 572 PTA **pptad) | |
| 573 { | |
| 574 l_int32 n1, n2, i, n, x, y; | |
| 575 l_uint64 hash; | |
| 576 L_ASET *set1, *set2; | |
| 577 RB_TYPE key; | |
| 578 PTA *pta_small, *pta_big, *ptad; | |
| 579 | |
| 580 if (!pptad) | |
| 581 return ERROR_INT("&ptad not defined", __func__, 1); | |
| 582 *pptad = NULL; | |
| 583 if (!pta1) | |
| 584 return ERROR_INT("pta1 not defined", __func__, 1); | |
| 585 if (!pta2) | |
| 586 return ERROR_INT("pta2 not defined", __func__, 1); | |
| 587 | |
| 588 /* Put the elements of the biggest array into a set */ | |
| 589 n1 = ptaGetCount(pta1); | |
| 590 n2 = ptaGetCount(pta2); | |
| 591 pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ | |
| 592 pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ | |
| 593 set1 = l_asetCreateFromPta(pta_big); | |
| 594 | |
| 595 /* Build up the intersection of points */ | |
| 596 ptad = ptaCreate(0); | |
| 597 *pptad = ptad; | |
| 598 n = ptaGetCount(pta_small); | |
| 599 set2 = l_asetCreate(L_UINT_TYPE); | |
| 600 for (i = 0; i < n; i++) { | |
| 601 ptaGetIPt(pta_small, i, &x, &y); | |
| 602 l_hashPtToUint64(x, y, &hash); | |
| 603 key.utype = hash; | |
| 604 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { | |
| 605 ptaAddPt(ptad, x, y); | |
| 606 l_asetInsert(set2, key); | |
| 607 } | |
| 608 } | |
| 609 | |
| 610 l_asetDestroy(&set1); | |
| 611 l_asetDestroy(&set2); | |
| 612 return 0; | |
| 613 } | |
| 614 | |
| 615 | |
| 616 /*--------------------------------------------------------------------------* | |
| 617 * Hashmap operations * | |
| 618 *--------------------------------------------------------------------------*/ | |
| 619 /*! | |
| 620 * \brief l_hmapCreateFromPta() | |
| 621 * | |
| 622 * \param[in] pta input pta | |
| 623 * \return hmap hashmap, or NULL on error | |
| 624 * | |
| 625 * <pre> | |
| 626 * Notes: | |
| 627 * (1) The indices into %pta are stored in the val field of the hashitems. | |
| 628 * This is necessary so that %hmap and %pta can be used together. | |
| 629 * </pre> | |
| 630 */ | |
| 631 L_HASHMAP * | |
| 632 l_hmapCreateFromPta(PTA *pta) | |
| 633 { | |
| 634 l_int32 i, n, x, y; | |
| 635 l_uint64 key; | |
| 636 L_HASHMAP *hmap; | |
| 637 | |
| 638 if (!pta) | |
| 639 return (L_HASHMAP *)ERROR_PTR("pta not defined", __func__, NULL); | |
| 640 | |
| 641 n = ptaGetCount(pta); | |
| 642 if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL) | |
| 643 return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL); | |
| 644 for (i = 0; i < n; i++) { | |
| 645 ptaGetIPt(pta, i, &x, &y); | |
| 646 l_hashPtToUint64(x, y, &key); | |
| 647 l_hmapLookup(hmap, key, i, L_HMAP_CREATE); | |
| 648 } | |
| 649 return hmap; | |
| 650 } | |
| 651 | |
| 652 | |
| 653 /*! | |
| 654 * \brief ptaRemoveDupsByHmap() | |
| 655 * | |
| 656 * \param[in] ptas | |
| 657 * \param[out] pptad set of unique values | |
| 658 * \param[out] phmap [optional] hashmap used for lookup | |
| 659 * \return 0 if OK; 1 on error | |
| 660 * | |
| 661 * <pre> | |
| 662 * Notes: | |
| 663 * (1) Generates a set of (unique) points from %ptas. | |
| 664 * </pre> | |
| 665 */ | |
| 666 l_ok | |
| 667 ptaRemoveDupsByHmap(PTA *ptas, | |
| 668 PTA **pptad, | |
| 669 L_HASHMAP **phmap) | |
| 670 { | |
| 671 l_int32 i, x, y, tabsize; | |
| 672 PTA *ptad; | |
| 673 L_HASHITEM *hitem; | |
| 674 L_HASHMAP *hmap; | |
| 675 | |
| 676 if (phmap) *phmap = NULL; | |
| 677 if (!pptad) | |
| 678 return ERROR_INT("&ptad not defined", __func__, 1); | |
| 679 *pptad = NULL; | |
| 680 if (!ptas) | |
| 681 return ERROR_INT("ptas not defined", __func__, 1); | |
| 682 | |
| 683 /* Traverse the hashtable lists */ | |
| 684 if ((hmap = l_hmapCreateFromPta(ptas)) == NULL) | |
| 685 return ERROR_INT("hmap not made", __func__, 1); | |
| 686 ptad = ptaCreate(0); | |
| 687 *pptad = ptad; | |
| 688 tabsize = hmap->tabsize; | |
| 689 for (i = 0; i < tabsize; i++) { | |
| 690 hitem = hmap->hashtab[i]; | |
| 691 while (hitem) { | |
| 692 ptaGetIPt(ptas, hitem->val, &x, &y); | |
| 693 ptaAddPt(ptad, x, y); | |
| 694 hitem = hitem->next; | |
| 695 } | |
| 696 } | |
| 697 | |
| 698 if (phmap) | |
| 699 *phmap = hmap; | |
| 700 else | |
| 701 l_hmapDestroy(&hmap); | |
| 702 return 0; | |
| 703 } | |
| 704 | |
| 705 | |
| 706 /*! | |
| 707 * \brief ptaUnionByHmap() | |
| 708 * | |
| 709 * \param[in] pta1 | |
| 710 * \param[in] pta2 | |
| 711 * \param[out] pptad union of the two point arrays | |
| 712 * \return 0 if OK; 1 on error | |
| 713 * | |
| 714 * <pre> | |
| 715 * Notes: | |
| 716 * (1) Make pta with points found in either of the input arrays. | |
| 717 * </pre> | |
| 718 */ | |
| 719 l_ok | |
| 720 ptaUnionByHmap(PTA *pta1, | |
| 721 PTA *pta2, | |
| 722 PTA **pptad) | |
| 723 { | |
| 724 PTA *pta3; | |
| 725 | |
| 726 if (!pptad) | |
| 727 return ERROR_INT("&ptad not defined", __func__, 1); | |
| 728 *pptad = NULL; | |
| 729 if (!pta1) | |
| 730 return ERROR_INT("pta1 not defined", __func__, 1); | |
| 731 if (!pta2) | |
| 732 return ERROR_INT("pta2 not defined", __func__, 1); | |
| 733 | |
| 734 pta3 = ptaCopy(pta1); | |
| 735 if (ptaJoin(pta3, pta2, 0, -1) == 1) { | |
| 736 ptaDestroy(&pta3); | |
| 737 return ERROR_INT("pta join failed", __func__, 1); | |
| 738 } | |
| 739 ptaRemoveDupsByHmap(pta3, pptad, NULL); | |
| 740 ptaDestroy(&pta3); | |
| 741 return 0; | |
| 742 } | |
| 743 | |
| 744 | |
| 745 /*! | |
| 746 * \brief ptaIntersectionByHmap() | |
| 747 * | |
| 748 * \param[in] pta1 | |
| 749 * \param[in] pta2 | |
| 750 * \param[out] pptad intersection of the two point arrays | |
| 751 * \return 0 if OK; 1 on error | |
| 752 * | |
| 753 * <pre> | |
| 754 * Notes: | |
| 755 * (1) Make pta with pts common to both input arrays. | |
| 756 * </pre> | |
| 757 */ | |
| 758 l_ok | |
| 759 ptaIntersectionByHmap(PTA *pta1, | |
| 760 PTA *pta2, | |
| 761 PTA **pptad) | |
| 762 { | |
| 763 l_int32 i, n1, n2, n, x, y; | |
| 764 l_uint64 key; | |
| 765 PTA *pta_small, *pta_big, *ptad; | |
| 766 L_HASHITEM *hitem; | |
| 767 L_HASHMAP *hmap; | |
| 768 | |
| 769 if (!pptad) | |
| 770 return ERROR_INT("&ptad not defined", __func__, 1); | |
| 771 *pptad = NULL; | |
| 772 if (!pta1) | |
| 773 return ERROR_INT("pta1 not defined", __func__, 1); | |
| 774 if (!pta2) | |
| 775 return ERROR_INT("pta2 not defined", __func__, 1); | |
| 776 | |
| 777 /* Make a hashmap for the elements of the biggest array */ | |
| 778 n1 = ptaGetCount(pta1); | |
| 779 n2 = ptaGetCount(pta2); | |
| 780 pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ | |
| 781 pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ | |
| 782 if ((hmap = l_hmapCreateFromPta(pta_big)) == NULL) | |
| 783 return ERROR_INT("hmap not made", __func__, 1); | |
| 784 | |
| 785 /* Go through the smallest array, doing a lookup of its (x,y) into | |
| 786 * the big array hashmap. If an hitem is returned, check the count. | |
| 787 * If the count is 0, ignore; otherwise, add the point to the | |
| 788 * output ptad and set the count in the hitem to 0, indicating | |
| 789 * that the point has already been added. */ | |
| 790 ptad = ptaCreate(0); | |
| 791 *pptad = ptad; | |
| 792 n = ptaGetCount(pta_small); | |
| 793 for (i = 0; i < n; i++) { | |
| 794 ptaGetIPt(pta_small, i, &x, &y); | |
| 795 l_hashPtToUint64(x, y, &key); | |
| 796 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK); | |
| 797 if (!hitem || hitem->count == 0) | |
| 798 continue; | |
| 799 ptaAddPt(ptad, x, y); | |
| 800 hitem->count = 0; | |
| 801 } | |
| 802 l_hmapDestroy(&hmap); | |
| 803 return 0; | |
| 804 } | |
| 805 | |
| 806 |
