Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/sarray2.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - | |
| 4 - Redistribution and use in source and binary forms, with or without | |
| 5 - modification, are permitted provided that the following conditions | |
| 6 - are met: | |
| 7 - 1. Redistributions of source code must retain the above copyright | |
| 8 - notice, this list of conditions and the following disclaimer. | |
| 9 - 2. Redistributions in binary form must reproduce the above | |
| 10 - copyright notice, this list of conditions and the following | |
| 11 - disclaimer in the documentation and/or other materials | |
| 12 - provided with the distribution. | |
| 13 - | |
| 14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY | |
| 18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
| 23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| 24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 *====================================================================*/ | |
| 26 | |
| 27 /*! | |
| 28 * \file sarray2.c | |
| 29 * <pre> | |
| 30 * | |
| 31 * Sort | |
| 32 * SARRAY *sarraySort() | |
| 33 * SARRAY *sarraySortByIndex() | |
| 34 * l_int32 stringCompareLexical() | |
| 35 * | |
| 36 * Set operations using aset (rbtree) | |
| 37 * L_ASET *l_asetCreateFromSarray() | |
| 38 * l_int32 sarrayRemoveDupsByAset() | |
| 39 * l_int32 sarrayUnionByAset() | |
| 40 * l_int32 sarrayIntersectionByAset() | |
| 41 * | |
| 42 * Hashmap operations | |
| 43 * L_HASHMAP *l_hmapCreateFromSarray() | |
| 44 * l_int32 sarrayRemoveDupsByHmap() | |
| 45 * l_int32 sarrayUnionByHmap() | |
| 46 * l_int32 sarrayIntersectionByHmap() | |
| 47 * | |
| 48 * Miscellaneous operations | |
| 49 * SARRAY *sarrayGenerateIntegers() | |
| 50 * l_int32 sarrayLookupCSKV() | |
| 51 * | |
| 52 * | |
| 53 * We have two implementations of set operations on an array of strings: | |
| 54 * | |
| 55 * (1) Using an underlying tree (rbtree). | |
| 56 * This uses a good 64 bit hashing function for the key, | |
| 57 * that is not expected to have hash collisions (and we do | |
| 58 * not test for them). The tree is built up of the hash | |
| 59 * values, and if the hash is found in the tree, it is | |
| 60 * assumed that the string has already been found. | |
| 61 * | |
| 62 * (2) Building a hashmap from the keys (hashmap). | |
| 63 * This uses a fast 64 bit hashing function for the key, which | |
| 64 * is then hashed into a hashtable. Collisions of hashkeys are | |
| 65 * very rare, but the hashtable is designed to allow more than one | |
| 66 * hashitem in a table entry. The hashitems are put in a list at | |
| 67 * each hashtable entry, which is traversed looking for the key. | |
| 68 * </pre> | |
| 69 */ | |
| 70 | |
| 71 #ifdef HAVE_CONFIG_H | |
| 72 #include <config_auto.h> | |
| 73 #endif /* HAVE_CONFIG_H */ | |
| 74 | |
| 75 #include <string.h> | |
| 76 #include "allheaders.h" | |
| 77 #include "array_internal.h" | |
| 78 | |
| 79 /*----------------------------------------------------------------------* | |
| 80 * Sort * | |
| 81 *----------------------------------------------------------------------*/ | |
| 82 /*! | |
| 83 * \brief sarraySort() | |
| 84 * | |
| 85 * \param[in] saout output sarray; can be NULL or equal to sain | |
| 86 * \param[in] sain input sarray | |
| 87 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING | |
| 88 * \return saout output sarray, sorted by ascii value, or NULL on error | |
| 89 * | |
| 90 * <pre> | |
| 91 * Notes: | |
| 92 * (1) Set saout = sain for in-place; otherwise, set naout = NULL. | |
| 93 * (2) Shell sort, modified from K&R, 2nd edition, p.62. | |
| 94 * Slow but simple O(n logn) sort. | |
| 95 * </pre> | |
| 96 */ | |
| 97 SARRAY * | |
| 98 sarraySort(SARRAY *saout, | |
| 99 SARRAY *sain, | |
| 100 l_int32 sortorder) | |
| 101 { | |
| 102 char **array; | |
| 103 char *tmp; | |
| 104 l_int32 n, i, j, gap; | |
| 105 | |
| 106 if (!sain) | |
| 107 return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL); | |
| 108 | |
| 109 /* Make saout if necessary; otherwise do in-place */ | |
| 110 if (!saout) | |
| 111 saout = sarrayCopy(sain); | |
| 112 else if (sain != saout) | |
| 113 return (SARRAY *)ERROR_PTR("invalid: not in-place", __func__, NULL); | |
| 114 array = saout->array; /* operate directly on the array */ | |
| 115 n = sarrayGetCount(saout); | |
| 116 | |
| 117 /* Shell sort */ | |
| 118 for (gap = n/2; gap > 0; gap = gap / 2) { | |
| 119 for (i = gap; i < n; i++) { | |
| 120 for (j = i - gap; j >= 0; j -= gap) { | |
| 121 if ((sortorder == L_SORT_INCREASING && | |
| 122 stringCompareLexical(array[j], array[j + gap])) || | |
| 123 (sortorder == L_SORT_DECREASING && | |
| 124 stringCompareLexical(array[j + gap], array[j]))) | |
| 125 { | |
| 126 tmp = array[j]; | |
| 127 array[j] = array[j + gap]; | |
| 128 array[j + gap] = tmp; | |
| 129 } | |
| 130 } | |
| 131 } | |
| 132 } | |
| 133 | |
| 134 return saout; | |
| 135 } | |
| 136 | |
| 137 | |
| 138 /*! | |
| 139 * \brief sarraySortByIndex() | |
| 140 * | |
| 141 * \param[in] sain | |
| 142 * \param[in] naindex na that maps from the new sarray to the input sarray | |
| 143 * \return saout sorted, or NULL on error | |
| 144 */ | |
| 145 SARRAY * | |
| 146 sarraySortByIndex(SARRAY *sain, | |
| 147 NUMA *naindex) | |
| 148 { | |
| 149 char *str; | |
| 150 l_int32 i, n, index; | |
| 151 SARRAY *saout; | |
| 152 | |
| 153 if (!sain) | |
| 154 return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL); | |
| 155 if (!naindex) | |
| 156 return (SARRAY *)ERROR_PTR("naindex not defined", __func__, NULL); | |
| 157 | |
| 158 n = sarrayGetCount(sain); | |
| 159 saout = sarrayCreate(n); | |
| 160 for (i = 0; i < n; i++) { | |
| 161 numaGetIValue(naindex, i, &index); | |
| 162 str = sarrayGetString(sain, index, L_COPY); | |
| 163 sarrayAddString(saout, str, L_INSERT); | |
| 164 } | |
| 165 | |
| 166 return saout; | |
| 167 } | |
| 168 | |
| 169 | |
| 170 /*! | |
| 171 * \brief stringCompareLexical() | |
| 172 * | |
| 173 * \param[in] str1 | |
| 174 * \param[in] str2 | |
| 175 * \return 1 if str1 > str2 lexically; 0 otherwise | |
| 176 * | |
| 177 * <pre> | |
| 178 * Notes: | |
| 179 * (1) If the lexical values are identical, return a 0, to | |
| 180 * indicate that no swapping is required to sort the strings. | |
| 181 * </pre> | |
| 182 */ | |
| 183 l_int32 | |
| 184 stringCompareLexical(const char *str1, | |
| 185 const char *str2) | |
| 186 { | |
| 187 l_int32 i, len1, len2, len; | |
| 188 | |
| 189 if (!str1) | |
| 190 return ERROR_INT("str1 not defined", __func__, 1); | |
| 191 if (!str2) | |
| 192 return ERROR_INT("str2 not defined", __func__, 1); | |
| 193 | |
| 194 len1 = strlen(str1); | |
| 195 len2 = strlen(str2); | |
| 196 len = L_MIN(len1, len2); | |
| 197 | |
| 198 for (i = 0; i < len; i++) { | |
| 199 if (str1[i] == str2[i]) | |
| 200 continue; | |
| 201 if (str1[i] > str2[i]) | |
| 202 return 1; | |
| 203 else | |
| 204 return 0; | |
| 205 } | |
| 206 | |
| 207 if (len1 > len2) | |
| 208 return 1; | |
| 209 else | |
| 210 return 0; | |
| 211 } | |
| 212 | |
| 213 | |
| 214 /*----------------------------------------------------------------------* | |
| 215 * Set operations using aset (rbtree) * | |
| 216 *----------------------------------------------------------------------*/ | |
| 217 /*! | |
| 218 * \brief l_asetCreateFromSarray() | |
| 219 * | |
| 220 * \param[in] sa | |
| 221 * \return set using a string hash into a uint64 as the key | |
| 222 */ | |
| 223 L_ASET * | |
| 224 l_asetCreateFromSarray(SARRAY *sa) | |
| 225 { | |
| 226 char *str; | |
| 227 l_int32 i, n; | |
| 228 l_uint64 hash; | |
| 229 L_ASET *set; | |
| 230 RB_TYPE key; | |
| 231 | |
| 232 if (!sa) | |
| 233 return (L_ASET *)ERROR_PTR("sa not defined", __func__, NULL); | |
| 234 | |
| 235 set = l_asetCreate(L_UINT_TYPE); | |
| 236 n = sarrayGetCount(sa); | |
| 237 for (i = 0; i < n; i++) { | |
| 238 str = sarrayGetString(sa, i, L_NOCOPY); | |
| 239 l_hashStringToUint64Fast(str, &hash); | |
| 240 key.utype = hash; | |
| 241 l_asetInsert(set, key); | |
| 242 } | |
| 243 | |
| 244 return set; | |
| 245 } | |
| 246 | |
| 247 | |
| 248 /*! | |
| 249 * \brief sarrayRemoveDupsByAset() | |
| 250 * | |
| 251 * \param[in] sas | |
| 252 * \param[out] psad with duplicates removed | |
| 253 * \return 0 if OK; 1 on error | |
| 254 * | |
| 255 * <pre> | |
| 256 * Notes: | |
| 257 * (1) This is O(nlogn), considerably slower than | |
| 258 * sarrayRemoveDupsByHmap() for large string arrays. | |
| 259 * (2) The key for each string is a 64-bit hash. | |
| 260 * (3) Build a set, using hashed strings as keys. As the set is | |
| 261 * built, first do a find; if not found, add the key to the | |
| 262 * set and add the string to the output sarray. | |
| 263 * </pre> | |
| 264 */ | |
| 265 l_ok | |
| 266 sarrayRemoveDupsByAset(SARRAY *sas, | |
| 267 SARRAY **psad) | |
| 268 { | |
| 269 char *str; | |
| 270 l_int32 i, n; | |
| 271 l_uint64 hash; | |
| 272 L_ASET *set; | |
| 273 RB_TYPE key; | |
| 274 SARRAY *sad; | |
| 275 | |
| 276 if (!psad) | |
| 277 return ERROR_INT("&sad not defined", __func__, 1); | |
| 278 *psad = NULL; | |
| 279 if (!sas) | |
| 280 return ERROR_INT("sas not defined", __func__, 1); | |
| 281 | |
| 282 set = l_asetCreate(L_UINT_TYPE); | |
| 283 sad = sarrayCreate(0); | |
| 284 *psad = sad; | |
| 285 n = sarrayGetCount(sas); | |
| 286 for (i = 0; i < n; i++) { | |
| 287 str = sarrayGetString(sas, i, L_NOCOPY); | |
| 288 l_hashStringToUint64Fast(str, &hash); | |
| 289 key.utype = hash; | |
| 290 if (!l_asetFind(set, key)) { | |
| 291 sarrayAddString(sad, str, L_COPY); | |
| 292 l_asetInsert(set, key); | |
| 293 } | |
| 294 } | |
| 295 | |
| 296 l_asetDestroy(&set); | |
| 297 return 0; | |
| 298 } | |
| 299 | |
| 300 | |
| 301 /*! | |
| 302 * \brief sarrayUnionByAset() | |
| 303 * | |
| 304 * \param[in] sa1 | |
| 305 * \param[in] sa2 | |
| 306 * \param[out] psad union of the two string arrays | |
| 307 * \return 0 if OK; 1 on error | |
| 308 * | |
| 309 * <pre> | |
| 310 * Notes: | |
| 311 * (1) Duplicates are removed from the concatenation of the two arrays. | |
| 312 * (2) The key for each string is a 64-bit hash. | |
| 313 * (2) Algorithm: Concatenate the two sarrays. Then build a set, | |
| 314 * using hashed strings as keys. As the set is built, first do | |
| 315 * a find; if not found, add the key to the set and add the string | |
| 316 * to the output sarray. This is O(nlogn). | |
| 317 * </pre> | |
| 318 */ | |
| 319 l_ok | |
| 320 sarrayUnionByAset(SARRAY *sa1, | |
| 321 SARRAY *sa2, | |
| 322 SARRAY **psad) | |
| 323 { | |
| 324 SARRAY *sa3; | |
| 325 | |
| 326 if (!psad) | |
| 327 return ERROR_INT("&sad not defined", __func__, 1); | |
| 328 *psad = NULL; | |
| 329 if (!sa1) | |
| 330 return ERROR_INT("sa1 not defined", __func__, 1); | |
| 331 if (!sa2) | |
| 332 return ERROR_INT("sa2 not defined", __func__, 1); | |
| 333 | |
| 334 /* Join */ | |
| 335 sa3 = sarrayCopy(sa1); | |
| 336 if (sarrayJoin(sa3, sa2) == 1) { | |
| 337 sarrayDestroy(&sa3); | |
| 338 return ERROR_INT("join failed for sa3", __func__, 1); | |
| 339 } | |
| 340 | |
| 341 /* Eliminate duplicates */ | |
| 342 sarrayRemoveDupsByAset(sa3, psad); | |
| 343 sarrayDestroy(&sa3); | |
| 344 return 0; | |
| 345 } | |
| 346 | |
| 347 | |
| 348 /*! | |
| 349 * \brief sarrayIntersectionByAset() | |
| 350 * | |
| 351 * \param[in] sa1 | |
| 352 * \param[in] sa2 | |
| 353 * \param[out] psad intersection of the two string arrays | |
| 354 * \return 0 if OK; 1 on error | |
| 355 * | |
| 356 * <pre> | |
| 357 * Notes: | |
| 358 * (1) Algorithm: put the larger sarray into a set, using the string | |
| 359 * hashes as the key values. Then run through the smaller sarray, | |
| 360 * building an output sarray and a second set from the strings | |
| 361 * in the larger array: if a string is in the first set but | |
| 362 * not in the second, add the string to the output sarray and hash | |
| 363 * it into the second set. The second set is required to make | |
| 364 * sure only one instance of each string is put into the output sarray. | |
| 365 * This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays. | |
| 366 * </pre> | |
| 367 */ | |
| 368 l_ok | |
| 369 sarrayIntersectionByAset(SARRAY *sa1, | |
| 370 SARRAY *sa2, | |
| 371 SARRAY **psad) | |
| 372 { | |
| 373 char *str; | |
| 374 l_int32 n1, n2, i, n; | |
| 375 l_uint64 hash; | |
| 376 L_ASET *set1, *set2; | |
| 377 RB_TYPE key; | |
| 378 SARRAY *sa_small, *sa_big, *sad; | |
| 379 | |
| 380 if (!psad) | |
| 381 return ERROR_INT("&sad not defined", __func__, 1); | |
| 382 *psad = NULL; | |
| 383 if (!sa1) | |
| 384 return ERROR_INT("sa1 not defined", __func__, 1); | |
| 385 if (!sa2) | |
| 386 return ERROR_INT("sa2 not defined", __func__, 1); | |
| 387 | |
| 388 /* Put the elements of the biggest array into a set */ | |
| 389 n1 = sarrayGetCount(sa1); | |
| 390 n2 = sarrayGetCount(sa2); | |
| 391 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */ | |
| 392 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */ | |
| 393 set1 = l_asetCreateFromSarray(sa_big); | |
| 394 | |
| 395 /* Build up the intersection of strings */ | |
| 396 sad = sarrayCreate(0); | |
| 397 *psad = sad; | |
| 398 n = sarrayGetCount(sa_small); | |
| 399 set2 = l_asetCreate(L_UINT_TYPE); | |
| 400 for (i = 0; i < n; i++) { | |
| 401 str = sarrayGetString(sa_small, i, L_NOCOPY); | |
| 402 l_hashStringToUint64Fast(str, &hash); | |
| 403 key.utype = hash; | |
| 404 if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { | |
| 405 sarrayAddString(sad, str, L_COPY); | |
| 406 l_asetInsert(set2, key); | |
| 407 } | |
| 408 } | |
| 409 | |
| 410 l_asetDestroy(&set1); | |
| 411 l_asetDestroy(&set2); | |
| 412 return 0; | |
| 413 } | |
| 414 | |
| 415 | |
| 416 /*----------------------------------------------------------------------* | |
| 417 * Hashmap operations * | |
| 418 *----------------------------------------------------------------------*/ | |
| 419 /*! | |
| 420 * \brief l_hmapCreateFromSarray() | |
| 421 * | |
| 422 * \param[in] sa input sarray | |
| 423 * \return hmap hashmap, or NULL on error | |
| 424 */ | |
| 425 L_HASHMAP * | |
| 426 l_hmapCreateFromSarray(SARRAY *sa) | |
| 427 { | |
| 428 l_int32 i, n; | |
| 429 l_uint64 key; | |
| 430 char *str; | |
| 431 L_HASHMAP *hmap; | |
| 432 | |
| 433 if (!sa) | |
| 434 return (L_HASHMAP *)ERROR_PTR("sa not defined", __func__, NULL); | |
| 435 | |
| 436 n = sarrayGetCount(sa); | |
| 437 if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL) | |
| 438 return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL); | |
| 439 for (i = 0; i < n; i++) { | |
| 440 str = sarrayGetString(sa, i, L_NOCOPY); | |
| 441 l_hashStringToUint64Fast(str, &key); | |
| 442 l_hmapLookup(hmap, key, i, L_HMAP_CREATE); | |
| 443 } | |
| 444 return hmap; | |
| 445 } | |
| 446 | |
| 447 | |
| 448 /*! | |
| 449 * \brief sarrayRemoveDupsByHmap() | |
| 450 * | |
| 451 * \param[in] sas | |
| 452 * \param[out] psad hash set of unique values | |
| 453 * \param[out] phmap [optional] hashmap used for lookup | |
| 454 * \return 0 if OK; 1 on error | |
| 455 */ | |
| 456 l_ok | |
| 457 sarrayRemoveDupsByHmap(SARRAY *sas, | |
| 458 SARRAY **psad, | |
| 459 L_HASHMAP **phmap) | |
| 460 { | |
| 461 l_int32 i, tabsize; | |
| 462 char *str; | |
| 463 SARRAY *sad; | |
| 464 L_HASHITEM *hitem; | |
| 465 L_HASHMAP *hmap; | |
| 466 | |
| 467 if (phmap) *phmap = NULL; | |
| 468 if (!psad) | |
| 469 return ERROR_INT("&sad not defined", __func__, 1); | |
| 470 *psad = NULL; | |
| 471 if (!sas) | |
| 472 return ERROR_INT("sas not defined", __func__, 1); | |
| 473 | |
| 474 /* Traverse the hashtable lists */ | |
| 475 if ((hmap = l_hmapCreateFromSarray(sas)) == NULL) | |
| 476 return ERROR_INT("hmap not made", __func__, 1); | |
| 477 sad = sarrayCreate(0); | |
| 478 *psad = sad; | |
| 479 tabsize = hmap->tabsize; | |
| 480 for (i = 0; i < tabsize; i++) { | |
| 481 hitem = hmap->hashtab[i]; | |
| 482 while (hitem) { | |
| 483 str = sarrayGetString(sas, hitem->val, L_COPY); | |
| 484 sarrayAddString(sad, str, L_INSERT); | |
| 485 hitem = hitem->next; | |
| 486 } | |
| 487 } | |
| 488 | |
| 489 if (phmap) | |
| 490 *phmap = hmap; | |
| 491 else | |
| 492 l_hmapDestroy(&hmap); | |
| 493 return 0; | |
| 494 } | |
| 495 | |
| 496 | |
| 497 /*! | |
| 498 * \brief sarrayUnionByHmap() | |
| 499 * | |
| 500 * \param[in] sa1 | |
| 501 * \param[in] sa2 | |
| 502 * \param[out] *psad union of the array values | |
| 503 * \return 0 if OK; 1 on error | |
| 504 */ | |
| 505 l_ok | |
| 506 sarrayUnionByHmap(SARRAY *sa1, | |
| 507 SARRAY *sa2, | |
| 508 SARRAY **psad) | |
| 509 { | |
| 510 SARRAY *sa3; | |
| 511 | |
| 512 if (!psad) | |
| 513 return ERROR_INT("&sad not defined", __func__, 1); | |
| 514 *psad = NULL; | |
| 515 if (!sa1) | |
| 516 return ERROR_INT("sa1 not defined", __func__, 1); | |
| 517 if (!sa2) | |
| 518 return ERROR_INT("sa2 not defined", __func__, 1); | |
| 519 | |
| 520 sa3 = sarrayCopy(sa1); | |
| 521 if (sarrayJoin(sa3, sa2) == 1) { | |
| 522 sarrayDestroy(&sa3); | |
| 523 return ERROR_INT("sa3 join failed", __func__, 1); | |
| 524 } | |
| 525 sarrayRemoveDupsByHmap(sa3, psad, NULL); | |
| 526 sarrayDestroy(&sa3); | |
| 527 return 0; | |
| 528 } | |
| 529 | |
| 530 | |
| 531 /*! | |
| 532 * \brief sarrayIntersectionByHmap() | |
| 533 * | |
| 534 * \param[in] sa1 | |
| 535 * \param[in] sa2 | |
| 536 * \param[out] *psad intersection of the array values | |
| 537 * \return 0 if OK; 1 on error | |
| 538 */ | |
| 539 l_ok | |
| 540 sarrayIntersectionByHmap(SARRAY *sa1, | |
| 541 SARRAY *sa2, | |
| 542 SARRAY **psad) | |
| 543 { | |
| 544 l_int32 i, n1, n2, n; | |
| 545 l_uint64 key; | |
| 546 char *str; | |
| 547 SARRAY *sa_small, *sa_big, *sa3, *sad; | |
| 548 L_HASHITEM *hitem; | |
| 549 L_HASHMAP *hmap; | |
| 550 | |
| 551 if (!psad) | |
| 552 return ERROR_INT("&sad not defined", __func__, 1); | |
| 553 *psad = NULL; | |
| 554 if (!sa1) | |
| 555 return ERROR_INT("sa1 not defined", __func__, 1); | |
| 556 if (!sa2) | |
| 557 return ERROR_INT("sa2 not defined", __func__, 1); | |
| 558 | |
| 559 /* Make a hashmap for the elements of the biggest array */ | |
| 560 n1 = sarrayGetCount(sa1); | |
| 561 n2 = sarrayGetCount(sa2); | |
| 562 sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */ | |
| 563 sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */ | |
| 564 if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL) | |
| 565 return ERROR_INT("hmap not made", __func__, 1); | |
| 566 | |
| 567 /* Remove duplicates from the smallest array. Alternatively, | |
| 568 * we can skip this step and avoid counting duplicates in | |
| 569 * sa_small by modifying the count fields in the sa_big hashitems; | |
| 570 * e.g., see l_hmapIntersectionDna(). */ | |
| 571 sarrayRemoveDupsByHmap(sa_small, &sa3, NULL); | |
| 572 | |
| 573 /* Go through sa3, the set of strings derived from the smallest array, | |
| 574 * hashing into the big array table. Any string found belongs to both, | |
| 575 * so add it to the output array. */ | |
| 576 sad = sarrayCreate(0); | |
| 577 *psad = sad; | |
| 578 n = sarrayGetCount(sa3); | |
| 579 for (i = 0; i < n; i++) { | |
| 580 str = sarrayGetString(sa3, i, L_NOCOPY); | |
| 581 l_hashStringToUint64Fast(str, &key); | |
| 582 hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK); | |
| 583 if (hitem) | |
| 584 sarrayAddString(sad, str, L_COPY); | |
| 585 } | |
| 586 l_hmapDestroy(&hmap); | |
| 587 sarrayDestroy(&sa3); | |
| 588 return 0; | |
| 589 } | |
| 590 | |
| 591 | |
| 592 /*----------------------------------------------------------------------* | |
| 593 * Miscellaneous operations * | |
| 594 *----------------------------------------------------------------------*/ | |
| 595 /*! | |
| 596 * \brief sarrayGenerateIntegers() | |
| 597 * | |
| 598 * \param[in] n | |
| 599 * \return sa of printed numbers, 1 - n, or NULL on error | |
| 600 */ | |
| 601 SARRAY * | |
| 602 sarrayGenerateIntegers(l_int32 n) | |
| 603 { | |
| 604 char buf[32]; | |
| 605 l_int32 i; | |
| 606 SARRAY *sa; | |
| 607 | |
| 608 if ((sa = sarrayCreate(n)) == NULL) | |
| 609 return (SARRAY *)ERROR_PTR("sa not made", __func__, NULL); | |
| 610 for (i = 0; i < n; i++) { | |
| 611 snprintf(buf, sizeof(buf), "%d", i); | |
| 612 sarrayAddString(sa, buf, L_COPY); | |
| 613 } | |
| 614 return sa; | |
| 615 } | |
| 616 | |
| 617 | |
| 618 /*! | |
| 619 * \brief sarrayLookupCSKV() | |
| 620 * | |
| 621 * \param[in] sa of strings, each being a comma-separated pair | |
| 622 * of strings, the first being a key and the | |
| 623 * second a value | |
| 624 * \param[in] keystring an input string to match with each key in %sa | |
| 625 * \param[out] pvalstring the returned value string corresponding to the | |
| 626 * input key string, if found; otherwise NULL | |
| 627 * \return 0 if OK, 1 on error | |
| 628 * | |
| 629 * <pre> | |
| 630 * Notes: | |
| 631 * (1) The input %sa can have other strings that are not in | |
| 632 * comma-separated key-value format. These will be ignored. | |
| 633 * (2) This returns a copy of the first value string in %sa whose | |
| 634 * key string matches the input %keystring. | |
| 635 * (3) White space is not ignored; all white space before the ',' | |
| 636 * is used for the keystring in matching. This allows the | |
| 637 * key and val strings to have white space (e.g., multiple words). | |
| 638 * </pre> | |
| 639 */ | |
| 640 l_ok | |
| 641 sarrayLookupCSKV(SARRAY *sa, | |
| 642 const char *keystring, | |
| 643 char **pvalstring) | |
| 644 { | |
| 645 char *key, *val, *str; | |
| 646 l_int32 i, n; | |
| 647 SARRAY *sa1; | |
| 648 | |
| 649 if (!pvalstring) | |
| 650 return ERROR_INT("&valstring not defined", __func__, 1); | |
| 651 *pvalstring = NULL; | |
| 652 if (!sa) | |
| 653 return ERROR_INT("sa not defined", __func__, 1); | |
| 654 if (!keystring) | |
| 655 return ERROR_INT("keystring not defined", __func__, 1); | |
| 656 | |
| 657 n = sarrayGetCount(sa); | |
| 658 for (i = 0; i < n; i++) { | |
| 659 str = sarrayGetString(sa, i, L_NOCOPY); | |
| 660 sa1 = sarrayCreate(2); | |
| 661 sarraySplitString(sa1, str, ","); | |
| 662 if (sarrayGetCount(sa1) != 2) { | |
| 663 sarrayDestroy(&sa1); | |
| 664 continue; | |
| 665 } | |
| 666 key = sarrayGetString(sa1, 0, L_NOCOPY); | |
| 667 val = sarrayGetString(sa1, 1, L_NOCOPY); | |
| 668 if (!strcmp(key, keystring)) { | |
| 669 *pvalstring = stringNew(val); | |
| 670 sarrayDestroy(&sa1); | |
| 671 return 0; | |
| 672 } | |
| 673 sarrayDestroy(&sa1); | |
| 674 } | |
| 675 | |
| 676 return 0; | |
| 677 } |
