comparison mupdf-source/thirdparty/leptonica/src/ptafunc2.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file ptafunc2.c
29 * <pre>
30 *
31 * --------------------------------------
32 * This file has these Pta utilities:
33 * - sorting
34 * - ordered set operations
35 * - hash map operations
36 * --------------------------------------
37 *
38 * Sorting
39 * PTA *ptaSort()
40 * l_int32 ptaGetSortIndex()
41 * PTA *ptaSortByIndex()
42 * PTAA *ptaaSortByIndex()
43 * l_int32 ptaGetRankValue()
44 * PTA *ptaSort2d()
45 * l_int32 ptaEqual()
46 *
47 * Set operations using aset (rbtree)
48 * L_ASET *l_asetCreateFromPta()
49 * PTA *ptaRemoveDupsByAset()
50 * PTA *ptaUnionByAset()
51 * PTA *ptaIntersectionByAset()
52 *
53 * Hashmap operations
54 * L_HASHMAP *l_hmapCreateFromPta()
55 * l_int32 ptaRemoveDupsByHmap()
56 * l_int32 ptaUnionByHmap()
57 * l_int32 ptaIntersectionByHmap()
58 *
59 * We have two implementations of set operations on an array of points:
60 *
61 * (1) Using an underlying tree (rbtree)
62 * This uses a good 64 bit hashing function for the key,
63 * that is not expected to have hash collisions (and we do
64 * not test for them). The tree is built up of the keys,
65 * values, and is traversed looking for the key in O(log n).
66 *
67 * (2) Building a hashmap from the keys (hashmap)
68 * This uses a fast 64 bit hashing function for the key, which
69 * is then hashed into a hashtable. Collisions of hashkeys are
70 * very rare, but the hashtable is designed to allow more than one
71 * hashitem in a table entry. The hashitems are put in a list at
72 * each hashtable entry, which is traversed looking for the key.
73 *
74 * </pre>
75 */
76
77 #ifdef HAVE_CONFIG_H
78 #include <config_auto.h>
79 #endif /* HAVE_CONFIG_H */
80
81 #include "allheaders.h"
82
83 /*---------------------------------------------------------------------*
84 * Sorting *
85 *---------------------------------------------------------------------*/
86 /*!
87 * \brief ptaSort()
88 *
89 * \param[in] ptas
90 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y
91 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING
92 * \param[out] pnaindex [optional] index of sorted order into
93 * original array
94 * \return ptad sorted version of ptas, or NULL on error
95 */
96 PTA *
97 ptaSort(PTA *ptas,
98 l_int32 sorttype,
99 l_int32 sortorder,
100 NUMA **pnaindex)
101 {
102 PTA *ptad;
103 NUMA *naindex;
104
105 if (pnaindex) *pnaindex = NULL;
106 if (!ptas)
107 return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL);
108 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)
109 return (PTA *)ERROR_PTR("invalid sort type", __func__, NULL);
110 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
111 return (PTA *)ERROR_PTR("invalid sort order", __func__, NULL);
112
113 if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0)
114 return (PTA *)ERROR_PTR("naindex not made", __func__, NULL);
115
116 ptad = ptaSortByIndex(ptas, naindex);
117 if (pnaindex)
118 *pnaindex = naindex;
119 else
120 numaDestroy(&naindex);
121 if (!ptad)
122 return (PTA *)ERROR_PTR("ptad not made", __func__, NULL);
123 return ptad;
124 }
125
126
127 /*!
128 * \brief ptaGetSortIndex()
129 *
130 * \param[in] ptas
131 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y
132 * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING
133 * \param[out] pnaindex index of sorted order into original array
134 * \return 0 if OK, 1 on error
135 */
136 l_ok
137 ptaGetSortIndex(PTA *ptas,
138 l_int32 sorttype,
139 l_int32 sortorder,
140 NUMA **pnaindex)
141 {
142 l_int32 i, n;
143 l_float32 x, y;
144 NUMA *na, *nai;
145
146 if (!pnaindex)
147 return ERROR_INT("&naindex not defined", __func__, 1);
148 *pnaindex = NULL;
149 if (!ptas)
150 return ERROR_INT("ptas not defined", __func__, 1);
151 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)
152 return ERROR_INT("invalid sort type", __func__, 1);
153 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
154 return ERROR_INT("invalid sort order", __func__, 1);
155
156 /* Build up numa of specific data */
157 n = ptaGetCount(ptas);
158 if ((na = numaCreate(n)) == NULL)
159 return ERROR_INT("na not made", __func__, 1);
160 for (i = 0; i < n; i++) {
161 ptaGetPt(ptas, i, &x, &y);
162 if (sorttype == L_SORT_BY_X)
163 numaAddNumber(na, x);
164 else
165 numaAddNumber(na, y);
166 }
167
168 /* Get the sort index for data array */
169 nai = numaGetSortIndex(na, sortorder);
170 numaDestroy(&na);
171 if (!nai)
172 return ERROR_INT("naindex not made", __func__, 1);
173 *pnaindex = nai;
174 return 0;
175 }
176
177
178 /*!
179 * \brief ptaSortByIndex()
180 *
181 * \param[in] ptas
182 * \param[in] naindex na that maps from the new pta to the input pta
183 * \return ptad sorted, or NULL on error
184 */
185 PTA *
186 ptaSortByIndex(PTA *ptas,
187 NUMA *naindex)
188 {
189 l_int32 i, index, n;
190 l_float32 x, y;
191 PTA *ptad;
192
193 if (!ptas)
194 return (PTA *)ERROR_PTR("ptas not defined", __func__, NULL);
195 if (!naindex)
196 return (PTA *)ERROR_PTR("naindex not defined", __func__, NULL);
197
198 /* Build up sorted pta using sort index */
199 n = numaGetCount(naindex);
200 if ((ptad = ptaCreate(n)) == NULL)
201 return (PTA *)ERROR_PTR("ptad not made", __func__, NULL);
202 for (i = 0; i < n; i++) {
203 numaGetIValue(naindex, i, &index);
204 ptaGetPt(ptas, index, &x, &y);
205 ptaAddPt(ptad, x, y);
206 }
207
208 return ptad;
209 }
210
211
212 /*!
213 * \brief ptaaSortByIndex()
214 *
215 * \param[in] ptaas
216 * \param[in] naindex na that maps from the new ptaa to the input ptaa
217 * \return ptaad sorted, or NULL on error
218 */
219 PTAA *
220 ptaaSortByIndex(PTAA *ptaas,
221 NUMA *naindex)
222 {
223 l_int32 i, n, index;
224 PTA *pta;
225 PTAA *ptaad;
226
227 if (!ptaas)
228 return (PTAA *)ERROR_PTR("ptaas not defined", __func__, NULL);
229 if (!naindex)
230 return (PTAA *)ERROR_PTR("naindex not defined", __func__, NULL);
231
232 n = ptaaGetCount(ptaas);
233 if (numaGetCount(naindex) != n)
234 return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", __func__, NULL);
235 ptaad = ptaaCreate(n);
236 for (i = 0; i < n; i++) {
237 numaGetIValue(naindex, i, &index);
238 pta = ptaaGetPta(ptaas, index, L_COPY);
239 ptaaAddPta(ptaad, pta, L_INSERT);
240 }
241
242 return ptaad;
243 }
244
245
246 /*!
247 * \brief ptaGetRankValue()
248 *
249 * \param[in] pta
250 * \param[in] fract use 0.0 for smallest, 1.0 for largest
251 * \param[in] ptasort [optional] version of %pta sorted by %sorttype
252 * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y
253 * \param[out] pval rankval: the x or y value at %fract
254 * \return 0 if OK, 1 on error
255 */
256 l_ok
257 ptaGetRankValue(PTA *pta,
258 l_float32 fract,
259 PTA *ptasort,
260 l_int32 sorttype,
261 l_float32 *pval)
262 {
263 l_int32 index, n;
264 PTA *ptas;
265
266 if (!pval)
267 return ERROR_INT("&val not defined", __func__, 1);
268 *pval = 0.0;
269 if (!pta)
270 return ERROR_INT("pta not defined", __func__, 1);
271 if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y)
272 return ERROR_INT("invalid sort type", __func__, 1);
273 if (fract < 0.0 || fract > 1.0)
274 return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1);
275 if ((n = ptaGetCount(pta)) == 0)
276 return ERROR_INT("pta empty", __func__, 1);
277
278 if (ptasort)
279 ptas = ptasort;
280 else
281 ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL);
282
283 index = (l_int32)(fract * (l_float32)(n - 1) + 0.5);
284 if (sorttype == L_SORT_BY_X)
285 ptaGetPt(ptas, index, pval, NULL);
286 else /* sort by y */
287 ptaGetPt(ptas, index, NULL, pval);
288
289 if (!ptasort) ptaDestroy(&ptas);
290 return 0;
291 }
292
293
294 /*!
295 * \brief ptaSort2d()
296 *
297 * \param[in] ptas
298 * \return ptad, or NULL on error
299 *
300 * <pre>
301 * Notes:
302 * (1) Sort increasing by row-major, scanning down from the UL corner,
303 * where for each value of y, order the pts from left to right.
304 * </pre>
305 */
306 PTA *
307 ptaSort2d(PTA *pta)
308 {
309 l_int32 index, i, j, n, nx, ny, start, end;
310 l_float32 x, y, yp, val;
311 NUMA *na1, *na2, *nas, *nax;
312 PTA *pta1, *ptad;
313
314 if (!pta)
315 return (PTA *)ERROR_PTR("pta not defined", __func__, NULL);
316
317 /* Sort by row-major (y first, then x). After sort by y,
318 * the x values at the same y are not sorted. */
319 pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
320
321 /* Find start and ending indices with the same y value */
322 n = ptaGetCount(pta1);
323 na1 = numaCreate(0); /* holds start index of sequence with same y */
324 na2 = numaCreate(0); /* holds end index of sequence with same y */
325 numaAddNumber(na1, 0);
326 ptaGetPt(pta1, 0, &x, &yp);
327 for (i = 1; i < n; i++) {
328 ptaGetPt(pta1, i, &x, &y);
329 if (y != yp) {
330 numaAddNumber(na1, i);
331 numaAddNumber(na2, i - 1);
332 }
333 yp = y;
334 }
335 numaAddNumber(na2, n - 1);
336
337 /* Sort by increasing x each set with the same y value */
338 ptad = ptaCreate(n);
339 ny = numaGetCount(na1); /* number of distinct y values */
340 for (i = 0, index = 0; i < ny; i++) {
341 numaGetIValue(na1, i, &start);
342 numaGetIValue(na2, i, &end);
343 nx = end - start + 1; /* number of points with current y value */
344 if (nx == 1) {
345 ptaGetPt(pta1, index++, &x, &y);
346 ptaAddPt(ptad, x, y);
347 } else {
348 /* More than 1 point; extract and sort the x values */
349 nax = numaCreate(nx);
350 for (j = 0; j < nx; j++) {
351 ptaGetPt(pta1, index + j, &x, &y);
352 numaAddNumber(nax, x);
353 }
354 nas = numaSort(NULL, nax, L_SORT_INCREASING);
355 /* Add the points with x sorted */
356 for (j = 0; j < nx; j++) {
357 numaGetFValue(nas, j, &val);
358 ptaAddPt(ptad, val, y);
359 }
360 index += nx;
361 numaDestroy(&nax);
362 numaDestroy(&nas);
363 }
364 }
365 numaDestroy(&na1);
366 numaDestroy(&na2);
367 ptaDestroy(&pta1);
368 return ptad;
369 }
370
371
372 /*!
373 * \brief ptaEqual()
374 *
375 * \param[in] pta1
376 * \param[in] pta2
377 * \param[out] psame 1 if same; 0 if different
378 * \return 0 if OK; 1 on error
379 *
380 * <pre>
381 * Notes:
382 * (1) Equality is defined as having the same set of points,
383 * independent of the order in which they are presented.
384 * </pre>
385 */
386 l_ok
387 ptaEqual(PTA *pta1,
388 PTA *pta2,
389 l_int32 *psame)
390 {
391 l_int32 i, n1, n2;
392 l_float32 x1, y1, x2, y2;
393 PTA *ptas1, *ptas2;
394
395 if (!psame)
396 return ERROR_INT("&same not defined", __func__, 1);
397 *psame = 0.0f;
398 if (!pta1 || !pta2)
399 return ERROR_INT("pta1 and pta2 not both defined", __func__, 1);
400
401 n1 = ptaGetCount(pta1);
402 n2 = ptaGetCount(pta2);
403 if (n1 != n2) return 0;
404
405 /* 2d sort each and compare */
406 ptas1 = ptaSort2d(pta1);
407 ptas2 = ptaSort2d(pta2);
408 for (i = 0; i < n1; i++) {
409 ptaGetPt(ptas1, i, &x1, &y1);
410 ptaGetPt(ptas2, i, &x2, &y2);
411 if (x1 != x2 || y1 != y2) {
412 ptaDestroy(&ptas1);
413 ptaDestroy(&ptas2);
414 return 0;
415 }
416 }
417
418 *psame = 1;
419 ptaDestroy(&ptas1);
420 ptaDestroy(&ptas2);
421 return 0;
422 }
423
424
425 /*---------------------------------------------------------------------*
426 * Set operations using aset (rbtree) *
427 *---------------------------------------------------------------------*/
428 /*!
429 * \brief l_asetCreateFromPta()
430 *
431 * \param[in] pta
432 * \return set using a 64-bit hash of (x,y) as the key
433 */
434 L_ASET *
435 l_asetCreateFromPta(PTA *pta)
436 {
437 l_int32 i, n, x, y;
438 l_uint64 hash;
439 L_ASET *set;
440 RB_TYPE key;
441
442 if (!pta)
443 return (L_ASET *)ERROR_PTR("pta not defined", __func__, NULL);
444
445 set = l_asetCreate(L_UINT_TYPE);
446 n = ptaGetCount(pta);
447 for (i = 0; i < n; i++) {
448 ptaGetIPt(pta, i, &x, &y);
449 l_hashPtToUint64(x, y, &hash);
450 key.utype = hash;
451 l_asetInsert(set, key);
452 }
453
454 return set;
455 }
456
457
458 /*!
459 * \brief ptaRemoveDupsByAset()
460 *
461 * \param[in] ptas assumed to be integer values
462 * \param[out] pptad assumed to be integer values
463 * \return 0 if OK; 1 on error
464 *
465 * <pre>
466 * Notes:
467 * (1) This is slower than ptaRemoveDupsByHmap(), mostly because
468 * of the nlogn sort to build up the rbtree. Do not use for
469 * large numbers of points (say, > 100K).
470 * </pre>
471 */
472 l_ok
473 ptaRemoveDupsByAset(PTA *ptas,
474 PTA **pptad)
475 {
476 l_int32 i, n, x, y;
477 PTA *ptad;
478 l_uint64 hash;
479 L_ASET *set;
480 RB_TYPE key;
481
482 if (!pptad)
483 return ERROR_INT("&ptad not defined", __func__, 1);
484 *pptad = NULL;
485 if (!ptas)
486 return ERROR_INT("ptas not defined", __func__, 1);
487
488 set = l_asetCreate(L_UINT_TYPE);
489 n = ptaGetCount(ptas);
490 ptad = ptaCreate(n);
491 *pptad = ptad;
492 for (i = 0; i < n; i++) {
493 ptaGetIPt(ptas, i, &x, &y);
494 l_hashPtToUint64(x, y, &hash);
495 key.utype = hash;
496 if (!l_asetFind(set, key)) {
497 ptaAddPt(ptad, x, y);
498 l_asetInsert(set, key);
499 }
500 }
501
502 l_asetDestroy(&set);
503 return 0;
504 }
505
506
507 /*!
508 * \brief ptaUnionByAset()
509 *
510 * \param[in] pta1
511 * \param[in] pta2
512 * \param[out] pptad union of the two point arrays
513 * \return 0 if OK; 1 on error
514 *
515 * <pre>
516 * Notes:
517 * (1) See sarrayRemoveDupsByAset() for the approach.
518 * (2) The key is a 64-bit hash from the (x,y) pair.
519 * (3) This is slower than ptaUnionByHmap(), mostly because of the
520 * nlogn sort to build up the rbtree. Do not use for large
521 * numbers of points (say, > 100K).
522 * (4) The *Aset() functions use the sorted l_Aset, which is just
523 * an rbtree in disguise.
524 * </pre>
525 */
526 l_ok
527 ptaUnionByAset(PTA *pta1,
528 PTA *pta2,
529 PTA **pptad)
530 {
531 PTA *pta3;
532
533 if (!pptad)
534 return ERROR_INT("&ptad not defined", __func__, 1);
535 *pptad = NULL;
536 if (!pta1)
537 return ERROR_INT("pta1 not defined", __func__, 1);
538 if (!pta2)
539 return ERROR_INT("pta2 not defined", __func__, 1);
540
541 /* Join */
542 pta3 = ptaCopy(pta1);
543 ptaJoin(pta3, pta2, 0, -1);
544
545 /* Eliminate duplicates */
546 ptaRemoveDupsByAset(pta3, pptad);
547 ptaDestroy(&pta3);
548 return 0;
549 }
550
551
552 /*!
553 * \brief ptaIntersectionByAset()
554 *
555 * \param[in] pta1
556 * \param[in] pta2
557 * \param[out] pptad intersection of the two point arrays
558 * \return 0 if OK; 1 on error
559 *
560 * <pre>
561 * Notes:
562 * (1) See sarrayIntersectionByAset() for the approach.
563 * (2) The key is a 64-bit hash from the (x,y) pair.
564 * (3) This is slower than ptaIntersectionByHmap(), mostly because
565 * of the nlogn sort to build up the rbtree. Do not use for
566 * large numbers of points (say, > 100K).
567 * </pre>
568 */
569 l_ok
570 ptaIntersectionByAset(PTA *pta1,
571 PTA *pta2,
572 PTA **pptad)
573 {
574 l_int32 n1, n2, i, n, x, y;
575 l_uint64 hash;
576 L_ASET *set1, *set2;
577 RB_TYPE key;
578 PTA *pta_small, *pta_big, *ptad;
579
580 if (!pptad)
581 return ERROR_INT("&ptad not defined", __func__, 1);
582 *pptad = NULL;
583 if (!pta1)
584 return ERROR_INT("pta1 not defined", __func__, 1);
585 if (!pta2)
586 return ERROR_INT("pta2 not defined", __func__, 1);
587
588 /* Put the elements of the biggest array into a set */
589 n1 = ptaGetCount(pta1);
590 n2 = ptaGetCount(pta2);
591 pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */
592 pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */
593 set1 = l_asetCreateFromPta(pta_big);
594
595 /* Build up the intersection of points */
596 ptad = ptaCreate(0);
597 *pptad = ptad;
598 n = ptaGetCount(pta_small);
599 set2 = l_asetCreate(L_UINT_TYPE);
600 for (i = 0; i < n; i++) {
601 ptaGetIPt(pta_small, i, &x, &y);
602 l_hashPtToUint64(x, y, &hash);
603 key.utype = hash;
604 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
605 ptaAddPt(ptad, x, y);
606 l_asetInsert(set2, key);
607 }
608 }
609
610 l_asetDestroy(&set1);
611 l_asetDestroy(&set2);
612 return 0;
613 }
614
615
616 /*--------------------------------------------------------------------------*
617 * Hashmap operations *
618 *--------------------------------------------------------------------------*/
619 /*!
620 * \brief l_hmapCreateFromPta()
621 *
622 * \param[in] pta input pta
623 * \return hmap hashmap, or NULL on error
624 *
625 * <pre>
626 * Notes:
627 * (1) The indices into %pta are stored in the val field of the hashitems.
628 * This is necessary so that %hmap and %pta can be used together.
629 * </pre>
630 */
631 L_HASHMAP *
632 l_hmapCreateFromPta(PTA *pta)
633 {
634 l_int32 i, n, x, y;
635 l_uint64 key;
636 L_HASHMAP *hmap;
637
638 if (!pta)
639 return (L_HASHMAP *)ERROR_PTR("pta not defined", __func__, NULL);
640
641 n = ptaGetCount(pta);
642 if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
643 return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL);
644 for (i = 0; i < n; i++) {
645 ptaGetIPt(pta, i, &x, &y);
646 l_hashPtToUint64(x, y, &key);
647 l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
648 }
649 return hmap;
650 }
651
652
653 /*!
654 * \brief ptaRemoveDupsByHmap()
655 *
656 * \param[in] ptas
657 * \param[out] pptad set of unique values
658 * \param[out] phmap [optional] hashmap used for lookup
659 * \return 0 if OK; 1 on error
660 *
661 * <pre>
662 * Notes:
663 * (1) Generates a set of (unique) points from %ptas.
664 * </pre>
665 */
666 l_ok
667 ptaRemoveDupsByHmap(PTA *ptas,
668 PTA **pptad,
669 L_HASHMAP **phmap)
670 {
671 l_int32 i, x, y, tabsize;
672 PTA *ptad;
673 L_HASHITEM *hitem;
674 L_HASHMAP *hmap;
675
676 if (phmap) *phmap = NULL;
677 if (!pptad)
678 return ERROR_INT("&ptad not defined", __func__, 1);
679 *pptad = NULL;
680 if (!ptas)
681 return ERROR_INT("ptas not defined", __func__, 1);
682
683 /* Traverse the hashtable lists */
684 if ((hmap = l_hmapCreateFromPta(ptas)) == NULL)
685 return ERROR_INT("hmap not made", __func__, 1);
686 ptad = ptaCreate(0);
687 *pptad = ptad;
688 tabsize = hmap->tabsize;
689 for (i = 0; i < tabsize; i++) {
690 hitem = hmap->hashtab[i];
691 while (hitem) {
692 ptaGetIPt(ptas, hitem->val, &x, &y);
693 ptaAddPt(ptad, x, y);
694 hitem = hitem->next;
695 }
696 }
697
698 if (phmap)
699 *phmap = hmap;
700 else
701 l_hmapDestroy(&hmap);
702 return 0;
703 }
704
705
706 /*!
707 * \brief ptaUnionByHmap()
708 *
709 * \param[in] pta1
710 * \param[in] pta2
711 * \param[out] pptad union of the two point arrays
712 * \return 0 if OK; 1 on error
713 *
714 * <pre>
715 * Notes:
716 * (1) Make pta with points found in either of the input arrays.
717 * </pre>
718 */
719 l_ok
720 ptaUnionByHmap(PTA *pta1,
721 PTA *pta2,
722 PTA **pptad)
723 {
724 PTA *pta3;
725
726 if (!pptad)
727 return ERROR_INT("&ptad not defined", __func__, 1);
728 *pptad = NULL;
729 if (!pta1)
730 return ERROR_INT("pta1 not defined", __func__, 1);
731 if (!pta2)
732 return ERROR_INT("pta2 not defined", __func__, 1);
733
734 pta3 = ptaCopy(pta1);
735 if (ptaJoin(pta3, pta2, 0, -1) == 1) {
736 ptaDestroy(&pta3);
737 return ERROR_INT("pta join failed", __func__, 1);
738 }
739 ptaRemoveDupsByHmap(pta3, pptad, NULL);
740 ptaDestroy(&pta3);
741 return 0;
742 }
743
744
745 /*!
746 * \brief ptaIntersectionByHmap()
747 *
748 * \param[in] pta1
749 * \param[in] pta2
750 * \param[out] pptad intersection of the two point arrays
751 * \return 0 if OK; 1 on error
752 *
753 * <pre>
754 * Notes:
755 * (1) Make pta with pts common to both input arrays.
756 * </pre>
757 */
758 l_ok
759 ptaIntersectionByHmap(PTA *pta1,
760 PTA *pta2,
761 PTA **pptad)
762 {
763 l_int32 i, n1, n2, n, x, y;
764 l_uint64 key;
765 PTA *pta_small, *pta_big, *ptad;
766 L_HASHITEM *hitem;
767 L_HASHMAP *hmap;
768
769 if (!pptad)
770 return ERROR_INT("&ptad not defined", __func__, 1);
771 *pptad = NULL;
772 if (!pta1)
773 return ERROR_INT("pta1 not defined", __func__, 1);
774 if (!pta2)
775 return ERROR_INT("pta2 not defined", __func__, 1);
776
777 /* Make a hashmap for the elements of the biggest array */
778 n1 = ptaGetCount(pta1);
779 n2 = ptaGetCount(pta2);
780 pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */
781 pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */
782 if ((hmap = l_hmapCreateFromPta(pta_big)) == NULL)
783 return ERROR_INT("hmap not made", __func__, 1);
784
785 /* Go through the smallest array, doing a lookup of its (x,y) into
786 * the big array hashmap. If an hitem is returned, check the count.
787 * If the count is 0, ignore; otherwise, add the point to the
788 * output ptad and set the count in the hitem to 0, indicating
789 * that the point has already been added. */
790 ptad = ptaCreate(0);
791 *pptad = ptad;
792 n = ptaGetCount(pta_small);
793 for (i = 0; i < n; i++) {
794 ptaGetIPt(pta_small, i, &x, &y);
795 l_hashPtToUint64(x, y, &key);
796 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
797 if (!hitem || hitem->count == 0)
798 continue;
799 ptaAddPt(ptad, x, y);
800 hitem->count = 0;
801 }
802 l_hmapDestroy(&hmap);
803 return 0;
804 }
805
806