comparison mupdf-source/thirdparty/leptonica/src/numafunc1.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file numafunc1.c
29 * <pre>
30 *
31 * --------------------------------------
32 * This file has these Numa utilities:
33 * - arithmetic operations
34 * - simple data analysis
35 * - generation of special sequences
36 * - permutations
37 * - interpolation
38 * - sorting
39 * - data analysis requiring sorting
40 * - joins and rearrangements
41 * --------------------------------------
42 *
43 * Arithmetic and logic
44 * NUMA *numaArithOp()
45 * NUMA *numaLogicalOp()
46 * NUMA *numaInvert()
47 * l_int32 numaSimilar()
48 * l_int32 numaAddToNumber()
49 *
50 * Simple extractions
51 * l_int32 numaGetMin()
52 * l_int32 numaGetMax()
53 * l_int32 numaGetSum()
54 * NUMA *numaGetPartialSums()
55 * l_int32 numaGetSumOnInterval()
56 * l_int32 numaHasOnlyIntegers()
57 * l_int32 numaGetMean()
58 * l_int32 numaGetMeanAbsval()
59 * NUMA *numaSubsample()
60 * NUMA *numaMakeDelta()
61 * NUMA *numaMakeSequence()
62 * NUMA *numaMakeConstant()
63 * NUMA *numaMakeAbsval()
64 * NUMA *numaAddBorder()
65 * NUMA *numaAddSpecifiedBorder()
66 * NUMA *numaRemoveBorder()
67 * l_int32 numaCountNonzeroRuns()
68 * l_int32 numaGetNonzeroRange()
69 * l_int32 numaGetCountRelativeToZero()
70 * NUMA *numaClipToInterval()
71 * NUMA *numaMakeThresholdIndicator()
72 * NUMA *numaUniformSampling()
73 * NUMA *numaReverse()
74 *
75 * Signal feature extraction
76 * NUMA *numaLowPassIntervals()
77 * NUMA *numaThresholdEdges()
78 * NUMA *numaGetSpanValues()
79 * NUMA *numaGetEdgeValues()
80 *
81 * Interpolation
82 * l_int32 numaInterpolateEqxVal()
83 * l_int32 numaInterpolateEqxInterval()
84 * l_int32 numaInterpolateArbxVal()
85 * l_int32 numaInterpolateArbxInterval()
86 *
87 * Functions requiring interpolation
88 * l_int32 numaFitMax()
89 * l_int32 numaDifferentiateInterval()
90 * l_int32 numaIntegrateInterval()
91 *
92 * Sorting
93 * NUMA *numaSortGeneral()
94 * NUMA *numaSortAutoSelect()
95 * NUMA *numaSortIndexAutoSelect()
96 * l_int32 numaChooseSortType()
97 * NUMA *numaSort()
98 * NUMA *numaBinSort()
99 * NUMA *numaGetSortIndex()
100 * NUMA *numaGetBinSortIndex()
101 * NUMA *numaSortByIndex()
102 * l_int32 numaIsSorted()
103 * l_int32 numaSortPair()
104 * NUMA *numaInvertMap()
105 * l_int32 numaAddSorted()
106 * l_int32 numaFindSortedLoc()
107 *
108 * Random permutation
109 * NUMA *numaPseudorandomSequence()
110 * NUMA *numaRandomPermutation()
111 *
112 * Functions requiring sorting
113 * l_int32 numaGetRankValue()
114 * l_int32 numaGetMedian()
115 * l_int32 numaGetBinnedMedian()
116 * l_int32 numaGetMeanDevFromMedian()
117 * l_int32 numaGetMedianDevFromMedian()
118 * l_int32 numaGetMode()
119 *
120 * Rearrangements
121 * l_int32 numaJoin()
122 * l_int32 numaaJoin()
123 * NUMA *numaaFlattenToNuma()
124 *
125 * Things to remember when using the Numa:
126 *
127 * (1) The numa is a struct, not an array. Always use accessors
128 * (see numabasic.c), never the fields directly.
129 *
130 * (2) The number array holds l_float32 values. It can also
131 * be used to store l_int32 values. See numabasic.c for
132 * details on using the accessors.
133 *
134 * (3) If you use numaCreate(), no numbers are stored and the size is 0.
135 * You have to add numbers to increase the size.
136 * If you want to start with a numa of a fixed size, with each
137 * entry initialized to the same value, use numaMakeConstant().
138 *
139 * (4) Occasionally, in the comments we denote the i-th element of a
140 * numa by na[i]. This is conceptual only -- the numa is not an array!
141 * </pre>
142 */
143
144 #ifdef HAVE_CONFIG_H
145 #include <config_auto.h>
146 #endif /* HAVE_CONFIG_H */
147
148 #include <math.h>
149 #include "allheaders.h"
150 #include "array_internal.h"
151
152 /*----------------------------------------------------------------------*
153 * Arithmetic and logical ops on Numas *
154 *----------------------------------------------------------------------*/
155 /*!
156 * \brief numaArithOp()
157 *
158 * \param[in] nad [optional] can be null or equal to na1 (in-place
159 * \param[in] na1
160 * \param[in] na2
161 * \param[in] op L_ARITH_ADD, L_ARITH_SUBTRACT,
162 * L_ARITH_MULTIPLY, L_ARITH_DIVIDE
163 * \return nad always: operation applied to na1 and na2
164 *
165 * <pre>
166 * Notes:
167 * (1) The sizes of na1 and na2 must be equal.
168 * (2) nad can only null or equal to na1.
169 * (3) To add a constant to a numa, or to multiply a numa by
170 * a constant, use numaTransform().
171 * </pre>
172 */
173 NUMA *
174 numaArithOp(NUMA *nad,
175 NUMA *na1,
176 NUMA *na2,
177 l_int32 op)
178 {
179 l_int32 i, n;
180 l_float32 val1, val2;
181
182 if (!na1 || !na2)
183 return (NUMA *)ERROR_PTR("na1, na2 not both defined", __func__, nad);
184 n = numaGetCount(na1);
185 if (n != numaGetCount(na2))
186 return (NUMA *)ERROR_PTR("na1, na2 sizes differ", __func__, nad);
187 if (nad && nad != na1)
188 return (NUMA *)ERROR_PTR("nad defined but not in-place", __func__, nad);
189 if (op != L_ARITH_ADD && op != L_ARITH_SUBTRACT &&
190 op != L_ARITH_MULTIPLY && op != L_ARITH_DIVIDE)
191 return (NUMA *)ERROR_PTR("invalid op", __func__, nad);
192 if (op == L_ARITH_DIVIDE) {
193 for (i = 0; i < n; i++) {
194 numaGetFValue(na2, i, &val2);
195 if (val2 == 0.0)
196 return (NUMA *)ERROR_PTR("na2 has 0 element", __func__, nad);
197 }
198 }
199
200 /* If nad is not identical to na1, make it an identical copy */
201 if (!nad)
202 nad = numaCopy(na1);
203
204 for (i = 0; i < n; i++) {
205 numaGetFValue(nad, i, &val1);
206 numaGetFValue(na2, i, &val2);
207 switch (op) {
208 case L_ARITH_ADD:
209 numaSetValue(nad, i, val1 + val2);
210 break;
211 case L_ARITH_SUBTRACT:
212 numaSetValue(nad, i, val1 - val2);
213 break;
214 case L_ARITH_MULTIPLY:
215 numaSetValue(nad, i, val1 * val2);
216 break;
217 case L_ARITH_DIVIDE:
218 numaSetValue(nad, i, val1 / val2);
219 break;
220 default:
221 lept_stderr(" Unknown arith op: %d\n", op);
222 return nad;
223 }
224 }
225
226 return nad;
227 }
228
229
230 /*!
231 * \brief numaLogicalOp()
232 *
233 * \param[in] nad [optional] can be null or equal to na1 (in-place
234 * \param[in] na1
235 * \param[in] na2
236 * \param[in] op L_UNION, L_INTERSECTION, L_SUBTRACTION, L_EXCLUSIVE_OR
237 * \return nad always: operation applied to na1 and na2
238 *
239 * <pre>
240 * Notes:
241 * (1) The sizes of na1 and na2 must be equal.
242 * (2) nad can only be null or equal to na1.
243 * (3) This is intended for use with indicator arrays (0s and 1s).
244 * Input data is extracted as integers (0 == false, anything
245 * else == true); output results are 0 and 1.
246 * (4) L_SUBTRACTION is subtraction of val2 from val1. For bit logical
247 * arithmetic this is (val1 & ~val2), but because these values
248 * are integers, we use (val1 && !val2).
249 * </pre>
250 */
251 NUMA *
252 numaLogicalOp(NUMA *nad,
253 NUMA *na1,
254 NUMA *na2,
255 l_int32 op)
256 {
257 l_int32 i, n, val1, val2, val;
258
259 if (!na1 || !na2)
260 return (NUMA *)ERROR_PTR("na1, na2 not both defined", __func__, nad);
261 n = numaGetCount(na1);
262 if (n != numaGetCount(na2))
263 return (NUMA *)ERROR_PTR("na1, na2 sizes differ", __func__, nad);
264 if (nad && nad != na1)
265 return (NUMA *)ERROR_PTR("nad defined; not in-place", __func__, nad);
266 if (op != L_UNION && op != L_INTERSECTION &&
267 op != L_SUBTRACTION && op != L_EXCLUSIVE_OR)
268 return (NUMA *)ERROR_PTR("invalid op", __func__, nad);
269
270 /* If nad is not identical to na1, make it an identical copy */
271 if (!nad)
272 nad = numaCopy(na1);
273
274 for (i = 0; i < n; i++) {
275 numaGetIValue(nad, i, &val1);
276 numaGetIValue(na2, i, &val2);
277 val1 = (val1 == 0) ? 0 : 1;
278 val2 = (val2 == 0) ? 0 : 1;
279 switch (op) {
280 case L_UNION:
281 val = (val1 || val2) ? 1 : 0;
282 numaSetValue(nad, i, val);
283 break;
284 case L_INTERSECTION:
285 val = (val1 && val2) ? 1 : 0;
286 numaSetValue(nad, i, val);
287 break;
288 case L_SUBTRACTION:
289 val = (val1 && !val2) ? 1 : 0;
290 numaSetValue(nad, i, val);
291 break;
292 case L_EXCLUSIVE_OR:
293 val = (val1 != val2) ? 1 : 0;
294 numaSetValue(nad, i, val);
295 break;
296 default:
297 lept_stderr(" Unknown logical op: %d\n", op);
298 return nad;
299 }
300 }
301
302 return nad;
303 }
304
305
306 /*!
307 * \brief numaInvert()
308 *
309 * \param[in] nad [optional] can be null or equal to nas (in-place
310 * \param[in] nas
311 * \return nad always: 'inverts' nas
312 *
313 * <pre>
314 * Notes:
315 * (1) This is intended for use with indicator arrays (0s and 1s).
316 * It gives a boolean-type output, taking the input as
317 * an integer and inverting it:
318 * 0 --> 1
319 * anything else --> 0
320 * </pre>
321 */
322 NUMA *
323 numaInvert(NUMA *nad,
324 NUMA *nas)
325 {
326 l_int32 i, n, val;
327
328 if (!nas)
329 return (NUMA *)ERROR_PTR("nas not defined", __func__, nad);
330 if (nad && nad != nas)
331 return (NUMA *)ERROR_PTR("nad defined; not in-place", __func__, nad);
332
333 if (!nad)
334 nad = numaCopy(nas);
335 n = numaGetCount(nad);
336 for (i = 0; i < n; i++) {
337 numaGetIValue(nad, i, &val);
338 if (!val)
339 val = 1;
340 else
341 val = 0;
342 numaSetValue(nad, i, val);
343 }
344 return nad;
345 }
346
347
348 /*!
349 * \brief numaSimilar()
350 *
351 * \param[in] na1
352 * \param[in] na2
353 * \param[in] maxdiff use 0.0 for exact equality
354 * \param[out] psimilar 1 if similar; 0 if different
355 * \return 0 if OK, 1 on error
356 *
357 * <pre>
358 * Notes:
359 * (1) Float values can differ slightly due to roundoff and
360 * accumulated errors. Using %maxdiff > 0.0 allows similar
361 * arrays to be identified.
362 * </pre>
363 */
364 l_int32
365 numaSimilar(NUMA *na1,
366 NUMA *na2,
367 l_float32 maxdiff,
368 l_int32 *psimilar)
369 {
370 l_int32 i, n;
371 l_float32 val1, val2;
372
373 if (!psimilar)
374 return ERROR_INT("&similar not defined", __func__, 1);
375 *psimilar = 0;
376 if (!na1 || !na2)
377 return ERROR_INT("na1 and na2 not both defined", __func__, 1);
378 maxdiff = L_ABS(maxdiff);
379
380 n = numaGetCount(na1);
381 if (n != numaGetCount(na2)) return 0;
382
383 for (i = 0; i < n; i++) {
384 numaGetFValue(na1, i, &val1);
385 numaGetFValue(na2, i, &val2);
386 if (L_ABS(val1 - val2) > maxdiff) return 0;
387 }
388
389 *psimilar = 1;
390 return 0;
391 }
392
393
394 /*!
395 * \brief numaAddToNumber()
396 *
397 * \param[in] na source numa
398 * \param[in] index element to be changed
399 * \param[in] val new value to be added
400 * \return 0 if OK, 1 on error
401 *
402 * <pre>
403 * Notes:
404 * (1) This is useful for accumulating sums, regardless of the index
405 * order in which the values are made available.
406 * (2) Before use, the numa has to be filled up to %index. This would
407 * typically be used by creating the numa with the full sized
408 * array, initialized to 0.0, using numaMakeConstant().
409 * </pre>
410 */
411 l_ok
412 numaAddToNumber(NUMA *na,
413 l_int32 index,
414 l_float32 val)
415 {
416 l_int32 n;
417
418 if (!na)
419 return ERROR_INT("na not defined", __func__, 1);
420 if ((n = numaGetCount(na)) == 0)
421 return ERROR_INT("na is empty", __func__, 1);
422 if (index < 0 || index >= n) {
423 L_ERROR("index %d not in [0,...,%d]\n", __func__, index, n - 1);
424 return 1;
425 }
426
427 na->array[index] += val;
428 return 0;
429 }
430
431
432 /*----------------------------------------------------------------------*
433 * Simple extractions *
434 *----------------------------------------------------------------------*/
435 /*!
436 * \brief numaGetMin()
437 *
438 * \param[in] na source numa
439 * \param[out] pminval [optional] min value
440 * \param[out] piminloc [optional] index of min location
441 * \return 0 if OK; 1 on error
442 */
443 l_ok
444 numaGetMin(NUMA *na,
445 l_float32 *pminval,
446 l_int32 *piminloc)
447 {
448 l_int32 i, n, iminloc;
449 l_float32 val, minval;
450
451 if (!pminval && !piminloc)
452 return ERROR_INT("nothing to do", __func__, 1);
453 if (pminval) *pminval = 0.0;
454 if (piminloc) *piminloc = 0;
455 if (!na)
456 return ERROR_INT("na not defined", __func__, 1);
457 if ((n = numaGetCount(na)) == 0)
458 return ERROR_INT("na is empty", __func__, 1);
459
460 minval = +1000000000.;
461 iminloc = 0;
462 for (i = 0; i < n; i++) {
463 numaGetFValue(na, i, &val);
464 if (val < minval) {
465 minval = val;
466 iminloc = i;
467 }
468 }
469
470 if (pminval) *pminval = minval;
471 if (piminloc) *piminloc = iminloc;
472 return 0;
473 }
474
475
476 /*!
477 * \brief numaGetMax()
478 *
479 * \param[in] na source numa
480 * \param[out] pmaxval [optional] max value
481 * \param[out] pimaxloc [optional] index of max location
482 * \return 0 if OK; 1 on error
483 */
484 l_ok
485 numaGetMax(NUMA *na,
486 l_float32 *pmaxval,
487 l_int32 *pimaxloc)
488 {
489 l_int32 i, n, imaxloc;
490 l_float32 val, maxval;
491
492 if (!pmaxval && !pimaxloc)
493 return ERROR_INT("nothing to do", __func__, 1);
494 if (pmaxval) *pmaxval = 0.0;
495 if (pimaxloc) *pimaxloc = 0;
496 if (!na)
497 return ERROR_INT("na not defined", __func__, 1);
498 if ((n = numaGetCount(na)) == 0)
499 return ERROR_INT("na is empty", __func__, 1);
500
501 maxval = -1000000000.;
502 imaxloc = 0;
503 for (i = 0; i < n; i++) {
504 numaGetFValue(na, i, &val);
505 if (val > maxval) {
506 maxval = val;
507 imaxloc = i;
508 }
509 }
510
511 if (pmaxval) *pmaxval = maxval;
512 if (pimaxloc) *pimaxloc = imaxloc;
513 return 0;
514 }
515
516
517 /*!
518 * \brief numaGetSum()
519 *
520 * \param[in] na source numa
521 * \param[out] psum sum of values
522 * \return 0 if OK, 1 on error
523 */
524 l_ok
525 numaGetSum(NUMA *na,
526 l_float32 *psum)
527 {
528 l_int32 i, n;
529 l_float32 val, sum;
530
531 if (!psum)
532 return ERROR_INT("&sum not defined", __func__, 1);
533 *psum = 0;
534 if (!na)
535 return ERROR_INT("na not defined", __func__, 1);
536
537 if ((n = numaGetCount(na)) == 0)
538 return ERROR_INT("na is empty", __func__, 1);
539 sum = 0.0;
540 for (i = 0; i < n; i++) {
541 numaGetFValue(na, i, &val);
542 sum += val;
543 }
544 *psum = sum;
545 return 0;
546 }
547
548
549 /*!
550 * \brief numaGetPartialSums()
551 *
552 * \param[in] na source numa
553 * \return nasum, or NULL on error
554 *
555 * <pre>
556 * Notes:
557 * (1) nasum[i] is the sum for all j <= i of na[j].
558 * So nasum[0] = na[0].
559 * (2) If you want to generate a rank function, where rank[0] - 0.0,
560 * insert a 0.0 at the beginning of the nasum array.
561 * </pre>
562 */
563 NUMA *
564 numaGetPartialSums(NUMA *na)
565 {
566 l_int32 i, n;
567 l_float32 val, sum;
568 NUMA *nasum;
569
570 if (!na)
571 return (NUMA *)ERROR_PTR("na not defined", __func__, NULL);
572
573 if ((n = numaGetCount(na)) == 0)
574 L_WARNING("na is empty\n", __func__);
575 nasum = numaCreate(n);
576 sum = 0.0;
577 for (i = 0; i < n; i++) {
578 numaGetFValue(na, i, &val);
579 sum += val;
580 numaAddNumber(nasum, sum);
581 }
582 return nasum;
583 }
584
585
586 /*!
587 * \brief numaGetSumOnInterval()
588 *
589 * \param[in] na source numa
590 * \param[in] first beginning index
591 * \param[in] last final index; use -1 to go to the end
592 * \param[out] psum sum of values in the index interval range
593 * \return 0 if OK, 1 on error
594 */
595 l_ok
596 numaGetSumOnInterval(NUMA *na,
597 l_int32 first,
598 l_int32 last,
599 l_float32 *psum)
600 {
601 l_int32 i, n;
602 l_float32 val, sum;
603
604 if (!psum)
605 return ERROR_INT("&sum not defined", __func__, 1);
606 *psum = 0.0;
607 if (!na)
608 return ERROR_INT("na not defined", __func__, 1);
609
610 sum = 0.0;
611 if ((n = numaGetCount(na)) == 0)
612 return ERROR_INT("na is empty", __func__, 1);
613 if (first < 0) first = 0;
614 if (first >= n || last < -1) /* not an error */
615 return 0;
616 if (last == -1)
617 last = n - 1;
618 last = L_MIN(last, n - 1);
619
620 for (i = first; i <= last; i++) {
621 numaGetFValue(na, i, &val);
622 sum += val;
623 }
624 *psum = sum;
625 return 0;
626 }
627
628
629 /*!
630 * \brief numaHasOnlyIntegers()
631 *
632 * \param[in] na source numa
633 * \param[out] pallints 1 if all sampled values are ints; else 0
634 * \return 0 if OK, 1 on error
635 */
636 l_ok
637 numaHasOnlyIntegers(NUMA *na,
638 l_int32 *pallints)
639 {
640 l_int32 i, n;
641 l_float32 val;
642
643 if (!pallints)
644 return ERROR_INT("&allints not defined", __func__, 1);
645 *pallints = TRUE;
646 if (!na)
647 return ERROR_INT("na not defined", __func__, 1);
648
649 if ((n = numaGetCount(na)) == 0)
650 return ERROR_INT("na is empty", __func__, 1);
651 for (i = 0; i < n; i ++) {
652 numaGetFValue(na, i, &val);
653 if (val != (l_int32)val) {
654 *pallints = FALSE;
655 return 0;
656 }
657 }
658 return 0;
659 }
660
661
662 /*!
663 * \brief numaGetMean()
664 *
665 * \param[in] na source numa
666 * \param[out] pave average of values
667 * \return 0 if OK, 1 on error
668 */
669 l_ok
670 numaGetMean(NUMA *na,
671 l_float32 *pave)
672 {
673 l_int32 n;
674 l_float32 sum;
675
676 if (!pave)
677 return ERROR_INT("&ave not defined", __func__, 1);
678 *pave = 0;
679 if (!na)
680 return ERROR_INT("na not defined", __func__, 1);
681 if ((n = numaGetCount(na)) == 0)
682 return ERROR_INT("na is empty", __func__, 1);
683
684 numaGetSum(na, &sum);
685 *pave = sum / n;
686 return 0;
687 }
688
689 /*!
690 * \brief numaGetMeanAbsval()
691 *
692 * \param[in] na source numa
693 * \param[out] paveabs average of absolute values
694 * \return 0 if OK, 1 on error
695 */
696 l_ok
697 numaGetMeanAbsval(NUMA *na,
698 l_float32 *paveabs)
699 {
700 l_int32 n;
701 NUMA *na1;
702
703 if (!paveabs)
704 return ERROR_INT("&aveabs not defined", __func__, 1);
705 *paveabs = 0;
706 if (!na)
707 return ERROR_INT("na not defined", __func__, 1);
708 if ((n = numaGetCount(na)) == 0)
709 return ERROR_INT("na is empty", __func__, 1);
710
711 na1 = numaMakeAbsval(NULL, na);
712 numaGetMean(na1, paveabs);
713 numaDestroy(&na1);
714 return 0;
715 }
716
717
718 /*!
719 * \brief numaSubsample()
720 *
721 * \param[in] nas
722 * \param[in] subfactor subsample factor, >= 1
723 * \return nad evenly sampled values from nas, or NULL on error
724 */
725 NUMA *
726 numaSubsample(NUMA *nas,
727 l_int32 subfactor)
728 {
729 l_int32 i, n;
730 l_float32 val;
731 NUMA *nad;
732
733 if (!nas)
734 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
735 if (subfactor < 1)
736 return (NUMA *)ERROR_PTR("subfactor < 1", __func__, NULL);
737
738 nad = numaCreate(0);
739 if ((n = numaGetCount(nas)) == 0)
740 L_WARNING("nas is empty\n", __func__);
741 for (i = 0; i < n; i++) {
742 if (i % subfactor != 0) continue;
743 numaGetFValue(nas, i, &val);
744 numaAddNumber(nad, val);
745 }
746
747 return nad;
748 }
749
750
751 /*!
752 * \brief numaMakeDelta()
753 *
754 * \param[in] nas input numa
755 * \return numa of difference values val[i+1] - val[i],
756 * or NULL on error
757 */
758 NUMA *
759 numaMakeDelta(NUMA *nas)
760 {
761 l_int32 i, n;
762 l_float32 prev, cur;
763 NUMA *nad;
764
765 if (!nas)
766 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
767 if ((n = numaGetCount(nas)) < 2) {
768 L_WARNING("n < 2; returning empty numa\n", __func__);
769 return numaCreate(1);
770 }
771
772 nad = numaCreate(n - 1);
773 numaGetFValue(nas, 0, &prev);
774 for (i = 1; i < n; i++) {
775 numaGetFValue(nas, i, &cur);
776 numaAddNumber(nad, cur - prev);
777 prev = cur;
778 }
779 return nad;
780 }
781
782
783 /*!
784 * \brief numaMakeSequence()
785 *
786 * \param[in] startval
787 * \param[in] increment
788 * \param[in] size of sequence
789 * \return numa of sequence of evenly spaced values, or NULL on error
790 */
791 NUMA *
792 numaMakeSequence(l_float32 startval,
793 l_float32 increment,
794 l_int32 size)
795 {
796 l_int32 i;
797 l_float32 val;
798 NUMA *na;
799
800 if ((na = numaCreate(size)) == NULL)
801 return (NUMA *)ERROR_PTR("na not made", __func__, NULL);
802
803 for (i = 0; i < size; i++) {
804 val = startval + i * increment;
805 numaAddNumber(na, val);
806 }
807 return na;
808 }
809
810
811 /*!
812 * \brief numaMakeConstant()
813 *
814 * \param[in] val
815 * \param[in] size of numa
816 * \return numa of given size with all entries equal to 'val',
817 * or NULL on error
818 */
819 NUMA *
820 numaMakeConstant(l_float32 val,
821 l_int32 size)
822 {
823 return numaMakeSequence(val, 0.0, size);
824 }
825
826
827 /*!
828 * \brief numaMakeAbsval()
829 *
830 * \param[in] nad can be null for new array, or the same as nas for inplace
831 * \param[in] nas input numa
832 * \return nad with all numbers being the absval of the input,
833 * or NULL on error
834 */
835 NUMA *
836 numaMakeAbsval(NUMA *nad,
837 NUMA *nas)
838 {
839 l_int32 i, n;
840 l_float32 val;
841
842 if (!nas)
843 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
844 if (nad && nad != nas)
845 return (NUMA *)ERROR_PTR("nad and not in-place", __func__, NULL);
846
847 if (!nad)
848 nad = numaCopy(nas);
849 n = numaGetCount(nad);
850 for (i = 0; i < n; i++) {
851 val = nad->array[i];
852 nad->array[i] = L_ABS(val);
853 }
854
855 return nad;
856 }
857
858
859 /*!
860 * \brief numaAddBorder()
861 *
862 * \param[in] nas
863 * \param[in] left number of elements to add before the start
864 * \param[in] right number of elements to add after the end
865 * \param[in] val initialize border elements
866 * \return nad with added elements at left and right, or NULL on error
867 */
868 NUMA *
869 numaAddBorder(NUMA *nas,
870 l_int32 left,
871 l_int32 right,
872 l_float32 val)
873 {
874 l_int32 i, n, len;
875 l_float32 startx, delx;
876 l_float32 *fas, *fad;
877 NUMA *nad;
878
879 if (!nas)
880 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
881 if (left < 0) left = 0;
882 if (right < 0) right = 0;
883 if (left == 0 && right == 0)
884 return numaCopy(nas);
885
886 n = numaGetCount(nas);
887 len = n + left + right;
888 nad = numaMakeConstant(val, len);
889 numaGetParameters(nas, &startx, &delx);
890 numaSetParameters(nad, startx - delx * left, delx);
891 fas = numaGetFArray(nas, L_NOCOPY);
892 fad = numaGetFArray(nad, L_NOCOPY);
893 for (i = 0; i < n; i++)
894 fad[left + i] = fas[i];
895
896 return nad;
897 }
898
899
900 /*!
901 * \brief numaAddSpecifiedBorder()
902 *
903 * \param[in] nas
904 * \param[in] left number of elements to add before the start
905 * \param[in] right number of elements to add after the end
906 * \param[in] type L_CONTINUED_BORDER, L_MIRRORED_BORDER
907 * \return nad with added elements at left and right, or NULL on error
908 */
909 NUMA *
910 numaAddSpecifiedBorder(NUMA *nas,
911 l_int32 left,
912 l_int32 right,
913 l_int32 type)
914 {
915 l_int32 i, n;
916 l_float32 *fa;
917 NUMA *nad;
918
919 if (!nas)
920 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
921 if (left < 0) left = 0;
922 if (right < 0) right = 0;
923 if (left == 0 && right == 0)
924 return numaCopy(nas);
925 if (type != L_CONTINUED_BORDER && type != L_MIRRORED_BORDER)
926 return (NUMA *)ERROR_PTR("invalid type", __func__, NULL);
927 n = numaGetCount(nas);
928 if (type == L_MIRRORED_BORDER && (left > n || right > n))
929 return (NUMA *)ERROR_PTR("border too large", __func__, NULL);
930
931 nad = numaAddBorder(nas, left, right, 0);
932 n = numaGetCount(nad);
933 fa = numaGetFArray(nad, L_NOCOPY);
934 if (type == L_CONTINUED_BORDER) {
935 for (i = 0; i < left; i++)
936 fa[i] = fa[left];
937 for (i = n - right; i < n; i++)
938 fa[i] = fa[n - right - 1];
939 } else { /* type == L_MIRRORED_BORDER */
940 for (i = 0; i < left; i++)
941 fa[i] = fa[2 * left - 1 - i];
942 for (i = 0; i < right; i++)
943 fa[n - right + i] = fa[n - right - i - 1];
944 }
945
946 return nad;
947 }
948
949
950 /*!
951 * \brief numaRemoveBorder()
952 *
953 * \param[in] nas
954 * \param[in] left number of elements to remove from the start
955 * \param[in] right number of elements to remove up to the end
956 * \return nad with removed elements at left and right, or NULL on error
957 */
958 NUMA *
959 numaRemoveBorder(NUMA *nas,
960 l_int32 left,
961 l_int32 right)
962 {
963 l_int32 i, n, len;
964 l_float32 startx, delx;
965 l_float32 *fas, *fad;
966 NUMA *nad;
967
968 if (!nas)
969 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
970 if (left < 0) left = 0;
971 if (right < 0) right = 0;
972 if (left == 0 && right == 0)
973 return numaCopy(nas);
974
975 n = numaGetCount(nas);
976 if ((len = n - left - right) < 0)
977 return (NUMA *)ERROR_PTR("len < 0 after removal", __func__, NULL);
978 nad = numaMakeConstant(0, len);
979 numaGetParameters(nas, &startx, &delx);
980 numaSetParameters(nad, startx + delx * left, delx);
981 fas = numaGetFArray(nas, L_NOCOPY);
982 fad = numaGetFArray(nad, L_NOCOPY);
983 for (i = 0; i < len; i++)
984 fad[i] = fas[left + i];
985
986 return nad;
987 }
988
989
990 /*!
991 * \brief numaCountNonzeroRuns()
992 *
993 * \param[in] na e.g., of pixel counts in rows or columns
994 * \param[out] pcount number of nonzero runs
995 * \return 0 if OK, 1 on error
996 */
997 l_ok
998 numaCountNonzeroRuns(NUMA *na,
999 l_int32 *pcount)
1000 {
1001 l_int32 n, i, val, count, inrun;
1002
1003 if (!pcount)
1004 return ERROR_INT("&count not defined", __func__, 1);
1005 *pcount = 0;
1006 if (!na)
1007 return ERROR_INT("na not defined", __func__, 1);
1008 if ((n = numaGetCount(na)) == 0)
1009 return ERROR_INT("na is empty", __func__, 1);
1010
1011 count = 0;
1012 inrun = FALSE;
1013 for (i = 0; i < n; i++) {
1014 numaGetIValue(na, i, &val);
1015 if (!inrun && val > 0) {
1016 count++;
1017 inrun = TRUE;
1018 } else if (inrun && val == 0) {
1019 inrun = FALSE;
1020 }
1021 }
1022 *pcount = count;
1023 return 0;
1024 }
1025
1026
1027 /*!
1028 * \brief numaGetNonzeroRange()
1029 *
1030 * \param[in] na source numa
1031 * \param[in] eps largest value considered to be zero
1032 * \param[out] pfirst, plast interval of array indices
1033 * where values are nonzero
1034 * \return 0 if OK, 1 on error or if no nonzero range is found.
1035 */
1036 l_ok
1037 numaGetNonzeroRange(NUMA *na,
1038 l_float32 eps,
1039 l_int32 *pfirst,
1040 l_int32 *plast)
1041 {
1042 l_int32 n, i, found;
1043 l_float32 val;
1044
1045 if (pfirst) *pfirst = 0;
1046 if (plast) *plast = 0;
1047 if (!pfirst || !plast)
1048 return ERROR_INT("pfirst and plast not both defined", __func__, 1);
1049 if (!na)
1050 return ERROR_INT("na not defined", __func__, 1);
1051 if ((n = numaGetCount(na)) == 0)
1052 return ERROR_INT("na is empty", __func__, 1);
1053
1054 found = FALSE;
1055 for (i = 0; i < n; i++) {
1056 numaGetFValue(na, i, &val);
1057 if (val > eps) {
1058 found = TRUE;
1059 break;
1060 }
1061 }
1062 if (!found) {
1063 *pfirst = n - 1;
1064 *plast = 0;
1065 return 1;
1066 }
1067
1068 *pfirst = i;
1069 for (i = n - 1; i >= 0; i--) {
1070 numaGetFValue(na, i, &val);
1071 if (val > eps)
1072 break;
1073 }
1074 *plast = i;
1075 return 0;
1076 }
1077
1078
1079 /*!
1080 * \brief numaGetCountRelativeToZero()
1081 *
1082 * \param[in] na source numa
1083 * \param[in] type L_LESS_THAN_ZERO, L_EQUAL_TO_ZERO, L_GREATER_THAN_ZERO
1084 * \param[out] pcount count of values of given type
1085 * \return 0 if OK, 1 on error
1086 */
1087 l_ok
1088 numaGetCountRelativeToZero(NUMA *na,
1089 l_int32 type,
1090 l_int32 *pcount)
1091 {
1092 l_int32 n, i, count;
1093 l_float32 val;
1094
1095 if (!pcount)
1096 return ERROR_INT("&count not defined", __func__, 1);
1097 *pcount = 0;
1098 if (!na)
1099 return ERROR_INT("na not defined", __func__, 1);
1100 if ((n = numaGetCount(na)) == 0)
1101 return ERROR_INT("na is empty", __func__, 1);
1102
1103 for (i = 0, count = 0; i < n; i++) {
1104 numaGetFValue(na, i, &val);
1105 if (type == L_LESS_THAN_ZERO && val < 0.0)
1106 count++;
1107 else if (type == L_EQUAL_TO_ZERO && val == 0.0)
1108 count++;
1109 else if (type == L_GREATER_THAN_ZERO && val > 0.0)
1110 count++;
1111 }
1112
1113 *pcount = count;
1114 return 0;
1115 }
1116
1117
1118 /*!
1119 * \brief numaClipToInterval()
1120 *
1121 * \param[in] nas
1122 * \param[in] first >= 0; <= last
1123 * \param[in] last
1124 * \return numa with the same values as the input, but clipped
1125 * to the specified interval
1126 *
1127 * <pre>
1128 * Notes:
1129 * If you want the indices of the array values to be unchanged,
1130 * use first = 0.
1131 * Usage:
1132 * This is useful to clip a histogram that has a few nonzero
1133 * values to its nonzero range.
1134 * </pre>
1135 */
1136 NUMA *
1137 numaClipToInterval(NUMA *nas,
1138 l_int32 first,
1139 l_int32 last)
1140 {
1141 l_int32 n, i;
1142 l_float32 val, startx, delx;
1143 NUMA *nad;
1144
1145 if (!nas)
1146 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1147 if ((n = numaGetCount(nas)) == 0)
1148 return (NUMA *)ERROR_PTR("nas is empty", __func__, NULL);
1149 if (first < 0 || first > last)
1150 return (NUMA *)ERROR_PTR("range not valid", __func__, NULL);
1151 if (first >= n)
1152 return (NUMA *)ERROR_PTR("no elements in range", __func__, NULL);
1153
1154 last = L_MIN(last, n - 1);
1155 if ((nad = numaCreate(last - first + 1)) == NULL)
1156 return (NUMA *)ERROR_PTR("nad not made", __func__, NULL);
1157 for (i = first; i <= last; i++) {
1158 numaGetFValue(nas, i, &val);
1159 numaAddNumber(nad, val);
1160 }
1161 numaGetParameters(nas, &startx, &delx);
1162 numaSetParameters(nad, startx + first * delx, delx);
1163 return nad;
1164 }
1165
1166
1167 /*!
1168 * \brief numaMakeThresholdIndicator()
1169 *
1170 * \param[in] nas input numa
1171 * \param[in] thresh threshold value
1172 * \param[in] type L_SELECT_IF_LT, L_SELECT_IF_GT,
1173 * L_SELECT_IF_LTE, L_SELECT_IF_GTE
1174 * \return nad : indicator array: values are 0 and 1
1175 *
1176 * <pre>
1177 * Notes:
1178 * (1) For each element in nas, if the constraint given by 'type'
1179 * correctly specifies its relation to thresh, a value of 1
1180 * is recorded in nad.
1181 * </pre>
1182 */
1183 NUMA *
1184 numaMakeThresholdIndicator(NUMA *nas,
1185 l_float32 thresh,
1186 l_int32 type)
1187 {
1188 l_int32 n, i, ival;
1189 l_float32 fval;
1190 NUMA *nai;
1191
1192 if (!nas)
1193 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1194 if ((n = numaGetCount(nas)) == 0)
1195 return (NUMA *)ERROR_PTR("nas is empty", __func__, NULL);
1196
1197 nai = numaCreate(n);
1198 for (i = 0; i < n; i++) {
1199 numaGetFValue(nas, i, &fval);
1200 ival = 0;
1201 switch (type)
1202 {
1203 case L_SELECT_IF_LT:
1204 if (fval < thresh) ival = 1;
1205 break;
1206 case L_SELECT_IF_GT:
1207 if (fval > thresh) ival = 1;
1208 break;
1209 case L_SELECT_IF_LTE:
1210 if (fval <= thresh) ival = 1;
1211 break;
1212 case L_SELECT_IF_GTE:
1213 if (fval >= thresh) ival = 1;
1214 break;
1215 default:
1216 numaDestroy(&nai);
1217 return (NUMA *)ERROR_PTR("invalid type", __func__, NULL);
1218 }
1219 numaAddNumber(nai, ival);
1220 }
1221
1222 return nai;
1223 }
1224
1225
1226 /*!
1227 * \brief numaUniformSampling()
1228 *
1229 * \param[in] nas input numa
1230 * \param[in] nsamp number of samples
1231 * \return nad : resampled array, or NULL on error
1232 *
1233 * <pre>
1234 * Notes:
1235 * (1) This resamples the values in the array, using %nsamp
1236 * equal divisions.
1237 * </pre>
1238 */
1239 NUMA *
1240 numaUniformSampling(NUMA *nas,
1241 l_int32 nsamp)
1242 {
1243 l_int32 n, i, j, ileft, iright;
1244 l_float32 left, right, binsize, lfract, rfract, sum, startx, delx;
1245 l_float32 *array;
1246 NUMA *nad;
1247
1248 if (!nas)
1249 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1250 if ((n = numaGetCount(nas)) == 0)
1251 return (NUMA *)ERROR_PTR("nas is empty", __func__, NULL);
1252 if (nsamp <= 0)
1253 return (NUMA *)ERROR_PTR("nsamp must be > 0", __func__, NULL);
1254
1255 nad = numaCreate(nsamp);
1256 array = numaGetFArray(nas, L_NOCOPY);
1257 binsize = (l_float32)n / (l_float32)nsamp;
1258 numaGetParameters(nas, &startx, &delx);
1259 numaSetParameters(nad, startx, binsize * delx);
1260 left = 0.0;
1261 for (i = 0; i < nsamp; i++) {
1262 sum = 0.0;
1263 right = left + binsize;
1264 ileft = (l_int32)left;
1265 lfract = 1.0f - left + ileft;
1266 if (lfract >= 1.0) /* on left bin boundary */
1267 lfract = 0.0;
1268 iright = (l_int32)right;
1269 rfract = right - iright;
1270 iright = L_MIN(iright, n - 1);
1271 if (ileft == iright) { /* both are within the same original sample */
1272 sum += (lfract + rfract - 1.0f) * array[ileft];
1273 } else {
1274 if (lfract > 0.0001) /* left fraction */
1275 sum += lfract * array[ileft];
1276 if (rfract > 0.0001) /* right fraction */
1277 sum += rfract * array[iright];
1278 for (j = ileft + 1; j < iright; j++) /* entire pixels */
1279 sum += array[j];
1280 }
1281
1282 numaAddNumber(nad, sum);
1283 left = right;
1284 }
1285 return nad;
1286 }
1287
1288
1289 /*!
1290 * \brief numaReverse()
1291 *
1292 * \param[in] nad [optional] can be null or equal to nas
1293 * \param[in] nas input numa
1294 * \return nad : reversed, or NULL on error
1295 *
1296 * <pre>
1297 * Notes:
1298 * (1) Usage:
1299 * numaReverse(nas, nas); // in-place
1300 * nad = numaReverse(NULL, nas); // makes a new one
1301 * </pre>
1302 */
1303 NUMA *
1304 numaReverse(NUMA *nad,
1305 NUMA *nas)
1306 {
1307 l_int32 n, i;
1308 l_float32 val1, val2;
1309
1310 if (!nas)
1311 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1312 if (nad && nas != nad)
1313 return (NUMA *)ERROR_PTR("nad defined but != nas", __func__, NULL);
1314
1315 n = numaGetCount(nas);
1316 if (nad) { /* in-place */
1317 for (i = 0; i < n / 2; i++) {
1318 numaGetFValue(nad, i, &val1);
1319 numaGetFValue(nad, n - i - 1, &val2);
1320 numaSetValue(nad, i, val2);
1321 numaSetValue(nad, n - i - 1, val1);
1322 }
1323 } else {
1324 nad = numaCreate(n);
1325 for (i = n - 1; i >= 0; i--) {
1326 numaGetFValue(nas, i, &val1);
1327 numaAddNumber(nad, val1);
1328 }
1329 }
1330
1331 /* Reverse the startx and delx fields */
1332 nad->startx = nas->startx + (n - 1) * nas->delx;
1333 nad->delx = -nas->delx;
1334 return nad;
1335 }
1336
1337
1338 /*----------------------------------------------------------------------*
1339 * Signal feature extraction *
1340 *----------------------------------------------------------------------*/
1341 /*!
1342 * \brief numaLowPassIntervals()
1343 *
1344 * \param[in] nas input numa
1345 * \param[in] thresh threshold fraction of max; in [0.0 ... 1.0]
1346 * \param[in] maxn for normalizing; set maxn = 0.0 to use the max in nas
1347 * \return nad : interval abscissa pairs, or NULL on error
1348 *
1349 * <pre>
1350 * Notes:
1351 * (1) For each interval where the value is less than a specified
1352 * fraction of the maximum, this records the left and right "x"
1353 * value.
1354 * </pre>
1355 */
1356 NUMA *
1357 numaLowPassIntervals(NUMA *nas,
1358 l_float32 thresh,
1359 l_float32 maxn)
1360 {
1361 l_int32 n, i, inrun;
1362 l_float32 maxval, threshval, fval, startx, delx, x0, x1;
1363 NUMA *nad;
1364
1365 if (!nas)
1366 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1367 if ((n = numaGetCount(nas)) == 0)
1368 return (NUMA *)ERROR_PTR("nas is empty", __func__, NULL);
1369 if (thresh < 0.0 || thresh > 1.0)
1370 return (NUMA *)ERROR_PTR("invalid thresh", __func__, NULL);
1371
1372 /* The input threshold is a fraction of the max.
1373 * The first entry in nad is the value of the max. */
1374 if (maxn == 0.0)
1375 numaGetMax(nas, &maxval, NULL);
1376 else
1377 maxval = maxn;
1378 numaGetParameters(nas, &startx, &delx);
1379 threshval = thresh * maxval;
1380 nad = numaCreate(0);
1381 numaAddNumber(nad, maxval);
1382
1383 /* Write pairs of pts (x0, x1) for the intervals */
1384 inrun = FALSE;
1385 for (i = 0; i < n; i++) {
1386 numaGetFValue(nas, i, &fval);
1387 if (fval < threshval && inrun == FALSE) { /* start a new run */
1388 inrun = TRUE;
1389 x0 = startx + i * delx;
1390 } else if (fval > threshval && inrun == TRUE) { /* end the run */
1391 inrun = FALSE;
1392 x1 = startx + i * delx;
1393 numaAddNumber(nad, x0);
1394 numaAddNumber(nad, x1);
1395 }
1396 }
1397 if (inrun == TRUE) { /* must end the last run */
1398 x1 = startx + (n - 1) * delx;
1399 numaAddNumber(nad, x0);
1400 numaAddNumber(nad, x1);
1401 }
1402
1403 return nad;
1404 }
1405
1406
1407 /*!
1408 * \brief numaThresholdEdges()
1409 *
1410 * \param[in] nas input numa
1411 * \param[in] thresh1 low threshold as fraction of max; in [0.0 ... 1.0]
1412 * \param[in] thresh2 high threshold as fraction of max; in [0.0 ... 1.0]
1413 * \param[in] maxn for normalizing; set maxn = 0.0 to use the max in nas
1414 * \return nad edge interval triplets, or NULL on error
1415 *
1416 * <pre>
1417 * Notes:
1418 * (1) For each edge interval, where the value is less
1419 * than %thresh1 on one side, greater than %thresh2 on
1420 * the other, and between these thresholds throughout the
1421 * interval, this records a triplet of values: the
1422 * 'left' and 'right' edges, and either +1 or -1, depending
1423 * on whether the edge is rising or falling.
1424 * (2) No assumption is made about the value outside the array,
1425 * so if the value at the array edge is between the threshold
1426 * values, it is not considered part of an edge. We start
1427 * looking for edge intervals only after leaving the thresholded
1428 * band.
1429 * </pre>
1430 */
1431 NUMA *
1432 numaThresholdEdges(NUMA *nas,
1433 l_float32 thresh1,
1434 l_float32 thresh2,
1435 l_float32 maxn)
1436 {
1437 l_int32 n, i, istart, inband, output, sign;
1438 l_int32 startbelow, below, above, belowlast, abovelast;
1439 l_float32 maxval, threshval1, threshval2, fval, startx, delx, x0, x1;
1440 NUMA *nad;
1441
1442 if (!nas)
1443 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1444 if ((n = numaGetCount(nas)) == 0)
1445 return (NUMA *)ERROR_PTR("nas is empty", __func__, NULL);
1446 if (thresh1 < 0.0 || thresh1 > 1.0 || thresh2 < 0.0 || thresh2 > 1.0)
1447 return (NUMA *)ERROR_PTR("invalid thresholds", __func__, NULL);
1448 if (thresh2 < thresh1)
1449 return (NUMA *)ERROR_PTR("thresh2 < thresh1", __func__, NULL);
1450
1451 /* The input thresholds are fractions of the max.
1452 * The first entry in nad is the value of the max used
1453 * here for normalization. */
1454 if (maxn == 0.0)
1455 numaGetMax(nas, &maxval, NULL);
1456 else
1457 maxval = maxn;
1458 numaGetMax(nas, &maxval, NULL);
1459 numaGetParameters(nas, &startx, &delx);
1460 threshval1 = thresh1 * maxval;
1461 threshval2 = thresh2 * maxval;
1462 nad = numaCreate(0);
1463 numaAddNumber(nad, maxval);
1464
1465 /* Write triplets of pts (x0, x1, sign) for the edges.
1466 * First make sure we start search from outside the band.
1467 * Only one of {belowlast, abovelast} is true. */
1468 for (i = 0; i < n; i++) {
1469 istart = i;
1470 numaGetFValue(nas, i, &fval);
1471 belowlast = (fval < threshval1) ? TRUE : FALSE;
1472 abovelast = (fval > threshval2) ? TRUE : FALSE;
1473 if (belowlast == TRUE || abovelast == TRUE)
1474 break;
1475 }
1476 if (istart == n) /* no intervals found */
1477 return nad;
1478
1479 /* x0 and x1 can only be set from outside the edge.
1480 * They are the values just before entering the band,
1481 * and just after entering the band. We can jump through
1482 * the band, in which case they differ by one index in nas. */
1483 inband = FALSE;
1484 startbelow = belowlast; /* one of these is true */
1485 output = FALSE;
1486 x0 = startx + istart * delx;
1487 for (i = istart + 1; i < n; i++) {
1488 numaGetFValue(nas, i, &fval);
1489 below = (fval < threshval1) ? TRUE : FALSE;
1490 above = (fval > threshval2) ? TRUE : FALSE;
1491 if (!inband && belowlast && above) { /* full jump up */
1492 x1 = startx + i * delx;
1493 sign = 1;
1494 startbelow = FALSE; /* for the next transition */
1495 output = TRUE;
1496 } else if (!inband && abovelast && below) { /* full jump down */
1497 x1 = startx + i * delx;
1498 sign = -1;
1499 startbelow = TRUE; /* for the next transition */
1500 output = TRUE;
1501 } else if (inband && startbelow && above) { /* exit rising; success */
1502 x1 = startx + i * delx;
1503 sign = 1;
1504 inband = FALSE;
1505 startbelow = FALSE; /* for the next transition */
1506 output = TRUE;
1507 } else if (inband && !startbelow && below) {
1508 /* exit falling; success */
1509 x1 = startx + i * delx;
1510 sign = -1;
1511 inband = FALSE;
1512 startbelow = TRUE; /* for the next transition */
1513 output = TRUE;
1514 } else if (inband && !startbelow && above) { /* exit rising; failure */
1515 x0 = startx + i * delx;
1516 inband = FALSE;
1517 } else if (inband && startbelow && below) { /* exit falling; failure */
1518 x0 = startx + i * delx;
1519 inband = FALSE;
1520 } else if (!inband && !above && !below) { /* enter */
1521 inband = TRUE;
1522 startbelow = belowlast;
1523 } else if (!inband && (above || below)) { /* outside and remaining */
1524 x0 = startx + i * delx; /* update position */
1525 }
1526 belowlast = below;
1527 abovelast = above;
1528 if (output) { /* we have exited; save new x0 */
1529 numaAddNumber(nad, x0);
1530 numaAddNumber(nad, x1);
1531 numaAddNumber(nad, sign);
1532 output = FALSE;
1533 x0 = startx + i * delx;
1534 }
1535 }
1536
1537 return nad;
1538 }
1539
1540
1541 /*!
1542 * \brief numaGetSpanValues()
1543 *
1544 * \param[in] na numa that is output of numaLowPassIntervals()
1545 * \param[in] span span number, zero-based
1546 * \param[out] pstart [optional] location of start of transition
1547 * \param[out] pend [optional] location of end of transition
1548 * \return 0 if OK, 1 on error
1549 */
1550 l_int32
1551 numaGetSpanValues(NUMA *na,
1552 l_int32 span,
1553 l_int32 *pstart,
1554 l_int32 *pend)
1555 {
1556 l_int32 n, nspans;
1557
1558 if (!na)
1559 return ERROR_INT("na not defined", __func__, 1);
1560 if ((n = numaGetCount(na)) == 0)
1561 return ERROR_INT("na is empty", __func__, 1);
1562 if (n % 2 != 1)
1563 return ERROR_INT("n is not odd", __func__, 1);
1564 nspans = n / 2;
1565 if (nspans < 0 || span >= nspans)
1566 return ERROR_INT("invalid span", __func__, 1);
1567
1568 if (pstart) numaGetIValue(na, 2 * span + 1, pstart);
1569 if (pend) numaGetIValue(na, 2 * span + 2, pend);
1570 return 0;
1571 }
1572
1573
1574 /*!
1575 * \brief numaGetEdgeValues()
1576 *
1577 * \param[in] na numa that is output of numaThresholdEdges()
1578 * \param[in] edge edge number, zero-based
1579 * \param[out] pstart [optional] location of start of transition
1580 * \param[out] pend [optional] location of end of transition
1581 * \param[out] psign [optional] transition sign: +1 is rising,
1582 * -1 is falling
1583 * \return 0 if OK, 1 on error
1584 */
1585 l_int32
1586 numaGetEdgeValues(NUMA *na,
1587 l_int32 edge,
1588 l_int32 *pstart,
1589 l_int32 *pend,
1590 l_int32 *psign)
1591 {
1592 l_int32 n, nedges;
1593
1594 if (!na)
1595 return ERROR_INT("na not defined", __func__, 1);
1596 if ((n = numaGetCount(na)) == 0)
1597 return ERROR_INT("na is empty", __func__, 1);
1598 if (n % 3 != 1)
1599 return ERROR_INT("n % 3 is not 1", __func__, 1);
1600 nedges = (n - 1) / 3;
1601 if (edge < 0 || edge >= nedges)
1602 return ERROR_INT("invalid edge", __func__, 1);
1603
1604 if (pstart) numaGetIValue(na, 3 * edge + 1, pstart);
1605 if (pend) numaGetIValue(na, 3 * edge + 2, pend);
1606 if (psign) numaGetIValue(na, 3 * edge + 3, psign);
1607 return 0;
1608 }
1609
1610
1611 /*----------------------------------------------------------------------*
1612 * Interpolation *
1613 *----------------------------------------------------------------------*/
1614 /*!
1615 * \brief numaInterpolateEqxVal()
1616 *
1617 * \param[in] startx xval corresponding to first element in array
1618 * \param[in] deltax x increment between array elements
1619 * \param[in] nay numa of ordinate values, assumed equally spaced
1620 * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP
1621 * \param[in] xval
1622 * \param[out] pyval interpolated value
1623 * \return 0 if OK, 1 on error e.g., if xval is outside range
1624 *
1625 * <pre>
1626 * Notes:
1627 * (1) Considering nay as a function of x, the x values
1628 * are equally spaced
1629 * (2) Caller should check for valid return.
1630 *
1631 * For linear Lagrangian interpolation (through 2 data pts):
1632 * y(x) = y1(x-x2)/(x1-x2) + y2(x-x1)/(x2-x1)
1633 *
1634 * For quadratic Lagrangian interpolation (through 3 data pts):
1635 * y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) +
1636 * y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) +
1637 * y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
1638 *
1639 * </pre>
1640 */
1641 l_ok
1642 numaInterpolateEqxVal(l_float32 startx,
1643 l_float32 deltax,
1644 NUMA *nay,
1645 l_int32 type,
1646 l_float32 xval,
1647 l_float32 *pyval)
1648 {
1649 l_int32 i, n, i1, i2, i3;
1650 l_float32 x1, x2, x3, fy1, fy2, fy3, d1, d2, d3, del, fi, maxx;
1651 l_float32 *fa;
1652
1653 if (!pyval)
1654 return ERROR_INT("&yval not defined", __func__, 1);
1655 *pyval = 0.0;
1656 if (!nay)
1657 return ERROR_INT("nay not defined", __func__, 1);
1658 if (deltax <= 0.0)
1659 return ERROR_INT("deltax not > 0", __func__, 1);
1660 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
1661 return ERROR_INT("invalid interp type", __func__, 1);
1662 if ((n = numaGetCount(nay)) < 2)
1663 return ERROR_INT("not enough points", __func__, 1);
1664 if (type == L_QUADRATIC_INTERP && n == 2) {
1665 type = L_LINEAR_INTERP;
1666 L_WARNING("only 2 points; using linear interp\n", __func__);
1667 }
1668 maxx = startx + deltax * (n - 1);
1669 if (xval < startx || xval > maxx)
1670 return ERROR_INT("xval is out of bounds", __func__, 1);
1671
1672 fa = numaGetFArray(nay, L_NOCOPY);
1673 fi = (xval - startx) / deltax;
1674 i = (l_int32)fi;
1675 del = fi - i;
1676 if (del == 0.0) { /* no interpolation required */
1677 *pyval = fa[i];
1678 return 0;
1679 }
1680
1681 if (type == L_LINEAR_INTERP) {
1682 *pyval = fa[i] + del * (fa[i + 1] - fa[i]);
1683 return 0;
1684 }
1685
1686 /* Quadratic interpolation */
1687 d1 = d3 = 0.5f / (deltax * deltax);
1688 d2 = -2.f * d1;
1689 if (i == 0) {
1690 i1 = i;
1691 i2 = i + 1;
1692 i3 = i + 2;
1693 } else {
1694 i1 = i - 1;
1695 i2 = i;
1696 i3 = i + 1;
1697 }
1698 x1 = startx + i1 * deltax;
1699 x2 = startx + i2 * deltax;
1700 x3 = startx + i3 * deltax;
1701 fy1 = d1 * fa[i1];
1702 fy2 = d2 * fa[i2];
1703 fy3 = d3 * fa[i3];
1704 *pyval = fy1 * (xval - x2) * (xval - x3) +
1705 fy2 * (xval - x1) * (xval - x3) +
1706 fy3 * (xval - x1) * (xval - x2);
1707 return 0;
1708 }
1709
1710
1711 /*!
1712 * \brief numaInterpolateArbxVal()
1713 *
1714 * \param[in] nax numa of abscissa values
1715 * \param[in] nay numa of ordinate values, corresponding to nax
1716 * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP
1717 * \param[in] xval
1718 * \param[out] pyval interpolated value
1719 * \return 0 if OK, 1 on error e.g., if xval is outside range
1720 *
1721 * <pre>
1722 * Notes:
1723 * (1) The values in nax must be sorted in increasing order.
1724 * If, additionally, they are equally spaced, you can use
1725 * numaInterpolateEqxVal().
1726 * (2) Caller should check for valid return.
1727 * (3) Uses lagrangian interpolation. See numaInterpolateEqxVal()
1728 * for formulas.
1729 * </pre>
1730 */
1731 l_ok
1732 numaInterpolateArbxVal(NUMA *nax,
1733 NUMA *nay,
1734 l_int32 type,
1735 l_float32 xval,
1736 l_float32 *pyval)
1737 {
1738 l_int32 i, im, nx, ny, i1, i2, i3;
1739 l_float32 delu, dell, fract, d1, d2, d3;
1740 l_float32 minx, maxx;
1741 l_float32 *fax, *fay;
1742
1743 if (!pyval)
1744 return ERROR_INT("&yval not defined", __func__, 1);
1745 *pyval = 0.0;
1746 if (!nax)
1747 return ERROR_INT("nax not defined", __func__, 1);
1748 if (!nay)
1749 return ERROR_INT("nay not defined", __func__, 1);
1750 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
1751 return ERROR_INT("invalid interp type", __func__, 1);
1752 ny = numaGetCount(nay);
1753 nx = numaGetCount(nax);
1754 if (nx != ny)
1755 return ERROR_INT("nax and nay not same size arrays", __func__, 1);
1756 if (ny < 2)
1757 return ERROR_INT("not enough points", __func__, 1);
1758 if (type == L_QUADRATIC_INTERP && ny == 2) {
1759 type = L_LINEAR_INTERP;
1760 L_WARNING("only 2 points; using linear interp\n", __func__);
1761 }
1762 numaGetFValue(nax, 0, &minx);
1763 numaGetFValue(nax, nx - 1, &maxx);
1764 if (xval < minx || xval > maxx)
1765 return ERROR_INT("xval is out of bounds", __func__, 1);
1766
1767 fax = numaGetFArray(nax, L_NOCOPY);
1768 fay = numaGetFArray(nay, L_NOCOPY);
1769
1770 /* Linear search for interval. We are guaranteed
1771 * to either return or break out of the loop.
1772 * In addition, we are assured that fax[i] - fax[im] > 0.0 */
1773 if (xval == fax[0]) {
1774 *pyval = fay[0];
1775 return 0;
1776 }
1777 im = 0;
1778 dell = 0.0;
1779 for (i = 1; i < nx; i++) {
1780 delu = fax[i] - xval;
1781 if (delu >= 0.0) { /* we've passed it */
1782 if (delu == 0.0) {
1783 *pyval = fay[i];
1784 return 0;
1785 }
1786 im = i - 1;
1787 dell = xval - fax[im]; /* >= 0 */
1788 break;
1789 }
1790 }
1791 fract = dell / (fax[i] - fax[im]);
1792
1793 if (type == L_LINEAR_INTERP) {
1794 *pyval = fay[i] + fract * (fay[i + 1] - fay[i]);
1795 return 0;
1796 }
1797
1798 /* Quadratic interpolation */
1799 if (im == 0) {
1800 i1 = im;
1801 i2 = im + 1;
1802 i3 = im + 2;
1803 } else {
1804 i1 = im - 1;
1805 i2 = im;
1806 i3 = im + 1;
1807 }
1808 d1 = (fax[i1] - fax[i2]) * (fax[i1] - fax[i3]);
1809 d2 = (fax[i2] - fax[i1]) * (fax[i2] - fax[i3]);
1810 d3 = (fax[i3] - fax[i1]) * (fax[i3] - fax[i2]);
1811 *pyval = fay[i1] * (xval - fax[i2]) * (xval - fax[i3]) / d1 +
1812 fay[i2] * (xval - fax[i1]) * (xval - fax[i3]) / d2 +
1813 fay[i3] * (xval - fax[i1]) * (xval - fax[i2]) / d3;
1814 return 0;
1815 }
1816
1817
1818 /*!
1819 * \brief numaInterpolateEqxInterval()
1820 *
1821 * \param[in] startx xval corresponding to first element in nas
1822 * \param[in] deltax x increment between array elements in nas
1823 * \param[in] nasy numa of ordinate values, assumed equally spaced
1824 * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP
1825 * \param[in] x0 start value of interval
1826 * \param[in] x1 end value of interval
1827 * \param[in] npts number of points to evaluate function in interval
1828 * \param[out] pnax [optional] array of x values in interval
1829 * \param[out] pnay array of y values in interval
1830 * \return 0 if OK, 1 on error
1831 *
1832 * <pre>
1833 * Notes:
1834 * (1) Considering nasy as a function of x, the x values
1835 * are equally spaced.
1836 * (2) This creates nay (and optionally nax) of interpolated
1837 * values over the specified interval (x0, x1).
1838 * (3) If the interval (x0, x1) lies partially outside the array
1839 * nasy (as interpreted by startx and deltax), it is an
1840 * error and returns 1.
1841 * (4) Note that deltax is the intrinsic x-increment for the input
1842 * array nasy, whereas delx is the intrinsic x-increment for the
1843 * output interpolated array nay.
1844 * </pre>
1845 */
1846 l_ok
1847 numaInterpolateEqxInterval(l_float32 startx,
1848 l_float32 deltax,
1849 NUMA *nasy,
1850 l_int32 type,
1851 l_float32 x0,
1852 l_float32 x1,
1853 l_int32 npts,
1854 NUMA **pnax,
1855 NUMA **pnay)
1856 {
1857 l_int32 i, n;
1858 l_float32 x, yval, maxx, delx;
1859 NUMA *nax = NULL, *nay;
1860
1861 if (pnax) *pnax = NULL;
1862 if (!pnay)
1863 return ERROR_INT("&nay not defined", __func__, 1);
1864 *pnay = NULL;
1865 if (!nasy)
1866 return ERROR_INT("nasy not defined", __func__, 1);
1867 if ((n = numaGetCount(nasy)) < 2)
1868 return ERROR_INT("n < 2", __func__, 1);
1869 if (deltax <= 0.0)
1870 return ERROR_INT("deltax not > 0", __func__, 1);
1871 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
1872 return ERROR_INT("invalid interp type", __func__, 1);
1873 if (type == L_QUADRATIC_INTERP && n == 2) {
1874 type = L_LINEAR_INTERP;
1875 L_WARNING("only 2 points; using linear interp\n", __func__);
1876 }
1877 maxx = startx + deltax * (n - 1);
1878 if (x0 < startx || x1 > maxx || x1 <= x0)
1879 return ERROR_INT("[x0 ... x1] is not valid", __func__, 1);
1880 if (npts < 3)
1881 return ERROR_INT("npts < 3", __func__, 1);
1882 delx = (x1 - x0) / (l_float32)(npts - 1); /* delx is for output nay */
1883
1884 if ((nay = numaCreate(npts)) == NULL)
1885 return ERROR_INT("nay not made", __func__, 1);
1886 numaSetParameters(nay, x0, delx);
1887 *pnay = nay;
1888 if (pnax) {
1889 nax = numaCreate(npts);
1890 *pnax = nax;
1891 }
1892
1893 for (i = 0; i < npts; i++) {
1894 x = x0 + i * delx;
1895 if (pnax)
1896 numaAddNumber(nax, x);
1897 numaInterpolateEqxVal(startx, deltax, nasy, type, x, &yval);
1898 numaAddNumber(nay, yval);
1899 }
1900
1901 return 0;
1902 }
1903
1904
1905 /*!
1906 * \brief numaInterpolateArbxInterval()
1907 *
1908 * \param[in] nax numa of abscissa values
1909 * \param[in] nay numa of ordinate values, corresponding to nax
1910 * \param[in] type L_LINEAR_INTERP, L_QUADRATIC_INTERP
1911 * \param[in] x0 start value of interval
1912 * \param[in] x1 end value of interval
1913 * \param[in] npts number of points to evaluate function in interval
1914 * \param[out] pnadx [optional] array of x values in interval
1915 * \param[out] pnady array of y values in interval
1916 * \return 0 if OK, 1 on error e.g., if x0 or x1 is outside range
1917 *
1918 * <pre>
1919 * Notes:
1920 * (1) The values in nax must be sorted in increasing order.
1921 * If they are not sorted, we do it here, and complain.
1922 * (2) If the values in nax are equally spaced, you can use
1923 * numaInterpolateEqxInterval().
1924 * (3) Caller should check for valid return.
1925 * (4) We don't call numaInterpolateArbxVal() for each output
1926 * point, because that requires an O(n) search for
1927 * each point. Instead, we do a single O(n) pass through
1928 * nax, saving the indices to be used for each output yval.
1929 * (5) Uses lagrangian interpolation. See numaInterpolateEqxVal()
1930 * for formulas.
1931 * </pre>
1932 */
1933 l_ok
1934 numaInterpolateArbxInterval(NUMA *nax,
1935 NUMA *nay,
1936 l_int32 type,
1937 l_float32 x0,
1938 l_float32 x1,
1939 l_int32 npts,
1940 NUMA **pnadx,
1941 NUMA **pnady)
1942 {
1943 l_int32 i, im, j, nx, ny, i1, i2, i3, sorted;
1944 l_int32 *index;
1945 l_float32 del, xval, yval, excess, fract, minx, maxx, d1, d2, d3;
1946 l_float32 *fax, *fay;
1947 NUMA *nasx, *nasy, *nadx = NULL, *nady;
1948
1949 if (pnadx) *pnadx = NULL;
1950 if (!pnady)
1951 return ERROR_INT("&nady not defined", __func__, 1);
1952 *pnady = NULL;
1953 if (!nay)
1954 return ERROR_INT("nay not defined", __func__, 1);
1955 if (!nax)
1956 return ERROR_INT("nax not defined", __func__, 1);
1957 if (type != L_LINEAR_INTERP && type != L_QUADRATIC_INTERP)
1958 return ERROR_INT("invalid interp type", __func__, 1);
1959 if (x0 > x1)
1960 return ERROR_INT("x0 > x1", __func__, 1);
1961 ny = numaGetCount(nay);
1962 nx = numaGetCount(nax);
1963 if (nx != ny)
1964 return ERROR_INT("nax and nay not same size arrays", __func__, 1);
1965 if (ny < 2)
1966 return ERROR_INT("not enough points", __func__, 1);
1967 if (type == L_QUADRATIC_INTERP && ny == 2) {
1968 type = L_LINEAR_INTERP;
1969 L_WARNING("only 2 points; using linear interp\n", __func__);
1970 }
1971 numaGetMin(nax, &minx, NULL);
1972 numaGetMax(nax, &maxx, NULL);
1973 if (x0 < minx || x1 > maxx)
1974 return ERROR_INT("xval is out of bounds", __func__, 1);
1975
1976 /* Make sure that nax is sorted in increasing order */
1977 numaIsSorted(nax, L_SORT_INCREASING, &sorted);
1978 if (!sorted) {
1979 L_WARNING("we are sorting nax in increasing order\n", __func__);
1980 numaSortPair(nax, nay, L_SORT_INCREASING, &nasx, &nasy);
1981 } else {
1982 nasx = numaClone(nax);
1983 nasy = numaClone(nay);
1984 }
1985
1986 fax = numaGetFArray(nasx, L_NOCOPY);
1987 fay = numaGetFArray(nasy, L_NOCOPY);
1988
1989 /* Get array of indices into fax for interpolated locations */
1990 if ((index = (l_int32 *)LEPT_CALLOC(npts, sizeof(l_int32))) == NULL) {
1991 numaDestroy(&nasx);
1992 numaDestroy(&nasy);
1993 return ERROR_INT("ind not made", __func__, 1);
1994 }
1995 del = (x1 - x0) / (npts - 1.0f);
1996 for (i = 0, j = 0; j < nx && i < npts; i++) {
1997 xval = x0 + i * del;
1998 while (j < nx - 1 && xval > fax[j])
1999 j++;
2000 if (xval == fax[j])
2001 index[i] = L_MIN(j, nx - 1);
2002 else /* the index of fax[] is just below xval */
2003 index[i] = L_MAX(j - 1, 0);
2004 }
2005
2006 /* For each point to be interpolated, get the y value */
2007 nady = numaCreate(npts);
2008 *pnady = nady;
2009 if (pnadx) {
2010 nadx = numaCreate(npts);
2011 *pnadx = nadx;
2012 }
2013 for (i = 0; i < npts; i++) {
2014 xval = x0 + i * del;
2015 if (pnadx)
2016 numaAddNumber(nadx, xval);
2017 im = index[i];
2018 excess = xval - fax[im];
2019 if (excess == 0.0) {
2020 numaAddNumber(nady, fay[im]);
2021 continue;
2022 }
2023 fract = excess / (fax[im + 1] - fax[im]);
2024
2025 if (type == L_LINEAR_INTERP) {
2026 yval = fay[im] + fract * (fay[im + 1] - fay[im]);
2027 numaAddNumber(nady, yval);
2028 continue;
2029 }
2030
2031 /* Quadratic interpolation */
2032 if (im == 0) {
2033 i1 = im;
2034 i2 = im + 1;
2035 i3 = im + 2;
2036 } else {
2037 i1 = im - 1;
2038 i2 = im;
2039 i3 = im + 1;
2040 }
2041 d1 = (fax[i1] - fax[i2]) * (fax[i1] - fax[i3]);
2042 d2 = (fax[i2] - fax[i1]) * (fax[i2] - fax[i3]);
2043 d3 = (fax[i3] - fax[i1]) * (fax[i3] - fax[i2]);
2044 yval = fay[i1] * (xval - fax[i2]) * (xval - fax[i3]) / d1 +
2045 fay[i2] * (xval - fax[i1]) * (xval - fax[i3]) / d2 +
2046 fay[i3] * (xval - fax[i1]) * (xval - fax[i2]) / d3;
2047 numaAddNumber(nady, yval);
2048 }
2049
2050 LEPT_FREE(index);
2051 numaDestroy(&nasx);
2052 numaDestroy(&nasy);
2053 return 0;
2054 }
2055
2056
2057 /*----------------------------------------------------------------------*
2058 * Functions requiring interpolation *
2059 *----------------------------------------------------------------------*/
2060 /*!
2061 * \brief numaFitMax()
2062 *
2063 * \param[in] na numa of ordinate values, to fit a max to
2064 * \param[out] pmaxval max value
2065 * \param[in] naloc [optional] associated numa of abscissa values
2066 * \param[out] pmaxloc abscissa value that gives max value in na;
2067 * if naloc == null, this is given as an interpolated
2068 * index value
2069 * \return 0 if OK; 1 on error
2070 *
2071 * <pre>
2072 * Notes:
2073 * If %naloc is given, there is no requirement that the
2074 * data points are evenly spaced. Lagrangian interpolation
2075 * handles that. The only requirement is that the
2076 * data points are ordered so that the values in naloc
2077 * are either increasing or decreasing. We test to make
2078 * sure that the sizes of na and naloc are equal, and it
2079 * is assumed that the correspondences %na[i] as a function
2080 * of %naloc[i] are properly arranged for all i.
2081 *
2082 * The formula for Lagrangian interpolation through 3 data pts is:
2083 * y(x) = y1(x-x2)(x-x3)/((x1-x2)(x1-x3)) +
2084 * y2(x-x1)(x-x3)/((x2-x1)(x2-x3)) +
2085 * y3(x-x1)(x-x2)/((x3-x1)(x3-x2))
2086 *
2087 * Then the derivative, using the constants (c1,c2,c3) defined below,
2088 * is set to 0:
2089 * y'(x) = 2x(c1+c2+c3) - c1(x2+x3) - c2(x1+x3) - c3(x1+x2) = 0
2090 * </pre>
2091 */
2092 l_ok
2093 numaFitMax(NUMA *na,
2094 l_float32 *pmaxval,
2095 NUMA *naloc,
2096 l_float32 *pmaxloc)
2097 {
2098 l_float32 val;
2099 l_float32 smaxval; /* start value of maximum sample, before interpolating */
2100 l_int32 n, imaxloc;
2101 l_float32 x1, x2, x3, y1, y2, y3, c1, c2, c3, a, b, xmax, ymax;
2102
2103 if (pmaxval) *pmaxval = 0.0;
2104 if (pmaxloc) *pmaxloc = 0.0;
2105 if (!na)
2106 return ERROR_INT("na not defined", __func__, 1);
2107 if ((n = numaGetCount(na)) == 0)
2108 return ERROR_INT("na is empty", __func__, 1);
2109 if (!pmaxval)
2110 return ERROR_INT("&maxval not defined", __func__, 1);
2111 if (!pmaxloc)
2112 return ERROR_INT("&maxloc not defined", __func__, 1);
2113 if (naloc) {
2114 if (n != numaGetCount(naloc))
2115 return ERROR_INT("na and naloc of unequal size", __func__, 1);
2116 }
2117
2118 numaGetMax(na, &smaxval, &imaxloc);
2119
2120 /* Simple case: max is at end point */
2121 if (imaxloc == 0 || imaxloc == n - 1) {
2122 *pmaxval = smaxval;
2123 if (naloc) {
2124 numaGetFValue(naloc, imaxloc, &val);
2125 *pmaxloc = val;
2126 } else {
2127 *pmaxloc = imaxloc;
2128 }
2129 return 0;
2130 }
2131
2132 /* Interior point; use quadratic interpolation */
2133 y2 = smaxval;
2134 numaGetFValue(na, imaxloc - 1, &val);
2135 y1 = val;
2136 numaGetFValue(na, imaxloc + 1, &val);
2137 y3 = val;
2138 if (naloc) {
2139 numaGetFValue(naloc, imaxloc - 1, &val);
2140 x1 = val;
2141 numaGetFValue(naloc, imaxloc, &val);
2142 x2 = val;
2143 numaGetFValue(naloc, imaxloc + 1, &val);
2144 x3 = val;
2145 } else {
2146 x1 = imaxloc - 1;
2147 x2 = imaxloc;
2148 x3 = imaxloc + 1;
2149 }
2150
2151 /* Can't interpolate; just use the max val in na
2152 * and the corresponding one in naloc */
2153 if (x1 == x2 || x1 == x3 || x2 == x3) {
2154 *pmaxval = y2;
2155 *pmaxloc = x2;
2156 return 0;
2157 }
2158
2159 /* Use lagrangian interpolation; set dy/dx = 0 */
2160 c1 = y1 / ((x1 - x2) * (x1 - x3));
2161 c2 = y2 / ((x2 - x1) * (x2 - x3));
2162 c3 = y3 / ((x3 - x1) * (x3 - x2));
2163 a = c1 + c2 + c3;
2164 b = c1 * (x2 + x3) + c2 * (x1 + x3) + c3 * (x1 + x2);
2165 xmax = b / (2 * a);
2166 ymax = c1 * (xmax - x2) * (xmax - x3) +
2167 c2 * (xmax - x1) * (xmax - x3) +
2168 c3 * (xmax - x1) * (xmax - x2);
2169 *pmaxval = ymax;
2170 *pmaxloc = xmax;
2171
2172 return 0;
2173 }
2174
2175
2176 /*!
2177 * \brief numaDifferentiateInterval()
2178 *
2179 * \param[in] nax numa of abscissa values
2180 * \param[in] nay numa of ordinate values, corresponding to nax
2181 * \param[in] x0 start value of interval
2182 * \param[in] x1 end value of interval
2183 * \param[in] npts number of points to evaluate function in interval
2184 * \param[out] pnadx [optional] array of x values in interval
2185 * \param[out] pnady array of derivatives in interval
2186 * \return 0 if OK, 1 on error e.g., if x0 or x1 is outside range
2187 *
2188 * <pre>
2189 * Notes:
2190 * (1) The values in nax must be sorted in increasing order.
2191 * If they are not sorted, it is done in the interpolation
2192 * step, and a warning is issued.
2193 * (2) Caller should check for valid return.
2194 * </pre>
2195 */
2196 l_ok
2197 numaDifferentiateInterval(NUMA *nax,
2198 NUMA *nay,
2199 l_float32 x0,
2200 l_float32 x1,
2201 l_int32 npts,
2202 NUMA **pnadx,
2203 NUMA **pnady)
2204 {
2205 l_int32 i, nx, ny;
2206 l_float32 minx, maxx, der, invdel;
2207 l_float32 *fay;
2208 NUMA *nady, *naiy;
2209
2210 if (pnadx) *pnadx = NULL;
2211 if (!pnady)
2212 return ERROR_INT("&nady not defined", __func__, 1);
2213 *pnady = NULL;
2214 if (!nay)
2215 return ERROR_INT("nay not defined", __func__, 1);
2216 if (!nax)
2217 return ERROR_INT("nax not defined", __func__, 1);
2218 if (x0 > x1)
2219 return ERROR_INT("x0 > x1", __func__, 1);
2220 ny = numaGetCount(nay);
2221 nx = numaGetCount(nax);
2222 if (nx != ny)
2223 return ERROR_INT("nax and nay not same size arrays", __func__, 1);
2224 if (ny < 2)
2225 return ERROR_INT("not enough points", __func__, 1);
2226 numaGetMin(nax, &minx, NULL);
2227 numaGetMax(nax, &maxx, NULL);
2228 if (x0 < minx || x1 > maxx)
2229 return ERROR_INT("xval is out of bounds", __func__, 1);
2230 if (npts < 2)
2231 return ERROR_INT("npts < 2", __func__, 1);
2232
2233 /* Generate interpolated array over specified interval */
2234 if (numaInterpolateArbxInterval(nax, nay, L_LINEAR_INTERP, x0, x1,
2235 npts, pnadx, &naiy))
2236 return ERROR_INT("interpolation failed", __func__, 1);
2237
2238 nady = numaCreate(npts);
2239 *pnady = nady;
2240 invdel = 0.5f * ((l_float32)npts - 1.0f) / (x1 - x0);
2241 fay = numaGetFArray(naiy, L_NOCOPY);
2242
2243 /* Compute and save derivatives */
2244 der = 0.5f * invdel * (fay[1] - fay[0]);
2245 numaAddNumber(nady, der);
2246 for (i = 1; i < npts - 1; i++) {
2247 der = invdel * (fay[i + 1] - fay[i - 1]);
2248 numaAddNumber(nady, der);
2249 }
2250 der = 0.5f * invdel * (fay[npts - 1] - fay[npts - 2]);
2251 numaAddNumber(nady, der);
2252
2253 numaDestroy(&naiy);
2254 return 0;
2255 }
2256
2257
2258 /*!
2259 * \brief numaIntegrateInterval()
2260 *
2261 * \param[in] nax numa of abscissa values
2262 * \param[in] nay numa of ordinate values, corresponding to nax
2263 * \param[in] x0 start value of interval
2264 * \param[in] x1 end value of interval
2265 * \param[in] npts number of points to evaluate function in interval
2266 * \param[out] psum integral of function over interval
2267 * \return 0 if OK, 1 on error e.g., if x0 or x1 is outside range
2268 *
2269 * <pre>
2270 * Notes:
2271 * (1) The values in nax must be sorted in increasing order.
2272 * If they are not sorted, it is done in the interpolation
2273 * step, and a warning is issued.
2274 * (2) Caller should check for valid return.
2275 * </pre>
2276 */
2277 l_ok
2278 numaIntegrateInterval(NUMA *nax,
2279 NUMA *nay,
2280 l_float32 x0,
2281 l_float32 x1,
2282 l_int32 npts,
2283 l_float32 *psum)
2284 {
2285 l_int32 i, nx, ny;
2286 l_float32 minx, maxx, sum, del;
2287 l_float32 *fay;
2288 NUMA *naiy;
2289
2290 if (!psum)
2291 return ERROR_INT("&sum not defined", __func__, 1);
2292 *psum = 0.0;
2293 if (!nay)
2294 return ERROR_INT("nay not defined", __func__, 1);
2295 if (!nax)
2296 return ERROR_INT("nax not defined", __func__, 1);
2297 if (x0 > x1)
2298 return ERROR_INT("x0 > x1", __func__, 1);
2299 if (npts < 2)
2300 return ERROR_INT("npts < 2", __func__, 1);
2301 ny = numaGetCount(nay);
2302 nx = numaGetCount(nax);
2303 if (nx != ny)
2304 return ERROR_INT("nax and nay not same size arrays", __func__, 1);
2305 if (ny < 2)
2306 return ERROR_INT("not enough points", __func__, 1);
2307 numaGetMin(nax, &minx, NULL);
2308 numaGetMax(nax, &maxx, NULL);
2309 if (x0 < minx || x1 > maxx)
2310 return ERROR_INT("xval is out of bounds", __func__, 1);
2311
2312 /* Generate interpolated array over specified interval */
2313 if (numaInterpolateArbxInterval(nax, nay, L_LINEAR_INTERP, x0, x1,
2314 npts, NULL, &naiy))
2315 return ERROR_INT("interpolation failed", __func__, 1);
2316
2317 del = (x1 - x0) / ((l_float32)npts - 1.0f);
2318 fay = numaGetFArray(naiy, L_NOCOPY);
2319
2320 /* Compute integral (simple trapezoid) */
2321 sum = 0.5f * (fay[0] + fay[npts - 1]);
2322 for (i = 1; i < npts - 1; i++)
2323 sum += fay[i];
2324 *psum = del * sum;
2325
2326 numaDestroy(&naiy);
2327 return 0;
2328 }
2329
2330
2331 /*----------------------------------------------------------------------*
2332 * Sorting *
2333 *----------------------------------------------------------------------*/
2334 /*!
2335 * \brief numaSortGeneral()
2336 *
2337 * \param[in] na source numa
2338 * \param[out] pnasort [optional] sorted numa
2339 * \param[out] pnaindex [optional] index of elements in na associated
2340 * with each element of nasort
2341 * \param[out] pnainvert [optional] index of elements in nasort associated
2342 * with each element of na
2343 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2344 * \param[in] sorttype L_SHELL_SORT or L_BIN_SORT
2345 * \return 0 if OK, 1 on error
2346 *
2347 * <pre>
2348 * Notes:
2349 * (1) Sorting can be confusing. Here's an array of five values with
2350 * the results shown for the 3 output arrays.
2351 *
2352 * na nasort naindex nainvert
2353 * -----------------------------------
2354 * 3 9 2 3
2355 * 4 6 3 2
2356 * 9 4 1 0
2357 * 6 3 0 1
2358 * 1 1 4 4
2359 *
2360 * Note that naindex is a LUT into na for the sorted array values,
2361 * and nainvert directly gives the sorted index values for the
2362 * input array. It is useful to view naindex is as a map:
2363 * 0 --> 2
2364 * 1 --> 3
2365 * 2 --> 1
2366 * 3 --> 0
2367 * 4 --> 4
2368 * and nainvert, the inverse of this map:
2369 * 0 --> 3
2370 * 1 --> 2
2371 * 2 --> 0
2372 * 3 --> 1
2373 * 4 --> 4
2374 *
2375 * We can write these relations symbolically as:
2376 * nasort[i] = na[naindex[i]]
2377 * na[i] = nasort[nainvert[i]]
2378 * </pre>
2379 */
2380 l_ok
2381 numaSortGeneral(NUMA *na,
2382 NUMA **pnasort,
2383 NUMA **pnaindex,
2384 NUMA **pnainvert,
2385 l_int32 sortorder,
2386 l_int32 sorttype)
2387 {
2388 l_int32 isize;
2389 l_float32 size;
2390 NUMA *naindex = NULL;
2391
2392 if (pnasort) *pnasort = NULL;
2393 if (pnaindex) *pnaindex = NULL;
2394 if (pnainvert) *pnainvert = NULL;
2395 if (!na)
2396 return ERROR_INT("na not defined", __func__, 1);
2397 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2398 return ERROR_INT("invalid sort order", __func__, 1);
2399 if (sorttype != L_SHELL_SORT && sorttype != L_BIN_SORT)
2400 return ERROR_INT("invalid sort type", __func__, 1);
2401 if (!pnasort && !pnaindex && !pnainvert)
2402 return ERROR_INT("nothing to do", __func__, 1);
2403
2404 if (sorttype == L_BIN_SORT) {
2405 numaGetMax(na, &size, NULL);
2406 isize = (l_int32)size;
2407 if (isize > MaxInitPtraSize - 1) {
2408 L_WARNING("array too large; using shell sort\n", __func__);
2409 sorttype = L_SHELL_SORT;
2410 } else {
2411 naindex = numaGetBinSortIndex(na, sortorder);
2412 }
2413 }
2414
2415 if (sorttype == L_SHELL_SORT)
2416 naindex = numaGetSortIndex(na, sortorder);
2417
2418 if (pnasort)
2419 *pnasort = numaSortByIndex(na, naindex);
2420 if (pnainvert)
2421 *pnainvert = numaInvertMap(naindex);
2422 if (pnaindex)
2423 *pnaindex = naindex;
2424 else
2425 numaDestroy(&naindex);
2426 return 0;
2427 }
2428
2429
2430 /*!
2431 * \brief numaSortAutoSelect()
2432 *
2433 * \param[in] nas
2434 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2435 * \return naout output sorted numa, or NULL on error
2436 *
2437 * <pre>
2438 * Notes:
2439 * (1) This does either a shell sort or a bin sort, depending on
2440 * the number of elements in nas and the dynamic range.
2441 * </pre>
2442 */
2443 NUMA *
2444 numaSortAutoSelect(NUMA *nas,
2445 l_int32 sortorder)
2446 {
2447 l_int32 type;
2448
2449 if (!nas)
2450 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2451 if (numaGetCount(nas) == 0) {
2452 L_WARNING("nas is empty; returning copy\n", __func__);
2453 return numaCopy(nas);
2454 }
2455 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2456 return (NUMA *)ERROR_PTR("invalid sort order", __func__, NULL);
2457
2458 type = numaChooseSortType(nas);
2459 if (type != L_SHELL_SORT && type != L_BIN_SORT)
2460 return (NUMA *)ERROR_PTR("invalid sort type", __func__, NULL);
2461
2462 if (type == L_BIN_SORT)
2463 return numaBinSort(nas, sortorder);
2464 else /* shell sort */
2465 return numaSort(NULL, nas, sortorder);
2466 }
2467
2468
2469 /*!
2470 * \brief numaSortIndexAutoSelect()
2471 *
2472 * \param[in] nas
2473 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2474 * \return nad indices of nas, sorted by value in nas, or NULL on error
2475 *
2476 * <pre>
2477 * Notes:
2478 * (1) This does either a shell sort or a bin sort, depending on
2479 * the number of elements in nas and the dynamic range.
2480 * </pre>
2481 */
2482 NUMA *
2483 numaSortIndexAutoSelect(NUMA *nas,
2484 l_int32 sortorder)
2485 {
2486 l_int32 type;
2487
2488 if (!nas)
2489 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2490 if (numaGetCount(nas) == 0) {
2491 L_WARNING("nas is empty; returning copy\n", __func__);
2492 return numaCopy(nas);
2493 }
2494 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2495 return (NUMA *)ERROR_PTR("invalid sort order", __func__, NULL);
2496 type = numaChooseSortType(nas);
2497 if (type != L_SHELL_SORT && type != L_BIN_SORT)
2498 return (NUMA *)ERROR_PTR("invalid sort type", __func__, NULL);
2499
2500 if (type == L_BIN_SORT)
2501 return numaGetBinSortIndex(nas, sortorder);
2502 else /* shell sort */
2503 return numaGetSortIndex(nas, sortorder);
2504 }
2505
2506
2507 /*!
2508 * \brief numaChooseSortType()
2509 *
2510 * \param[in] nas to be sorted
2511 * \return sorttype L_SHELL_SORT or L_BIN_SORT, or UNDEF on error.
2512 *
2513 * <pre>
2514 * Notes:
2515 * (1) This selects either a shell sort or a bin sort, depending on
2516 * the number of elements in nas and the dynamic range.
2517 * (2) If there are negative values in nas, it selects shell sort.
2518 * </pre>
2519 */
2520 l_int32
2521 numaChooseSortType(NUMA *nas)
2522 {
2523 l_int32 n;
2524 l_float32 minval, maxval;
2525
2526 if (!nas)
2527 return ERROR_INT("nas not defined", __func__, UNDEF);
2528
2529 /* If small histogram or negative values; use shell sort */
2530 numaGetMin(nas, &minval, NULL);
2531 n = numaGetCount(nas);
2532 if (minval < 0.0 || n < 200)
2533 return L_SHELL_SORT;
2534
2535 /* If large maxval, use shell sort */
2536 numaGetMax(nas, &maxval, NULL);
2537 if (maxval > MaxInitPtraSize - 1)
2538 return L_SHELL_SORT;
2539
2540 /* Otherwise, need to compare nlog(n) with maxval.
2541 * The factor of 0.003 was determined by comparing times for
2542 * different histogram sizes and maxval. It is very small
2543 * because binsort is fast and shell sort gets slow for large n. */
2544 if (n * log((l_float32)n) < 0.003 * maxval)
2545 return L_SHELL_SORT;
2546 else
2547 return L_BIN_SORT;
2548 }
2549
2550
2551 /*!
2552 * \brief numaSort()
2553 *
2554 * \param[in] naout output numa; can be NULL or equal to nain
2555 * \param[in] nain input numa
2556 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2557 * \return naout output sorted numa, or NULL on error
2558 *
2559 * <pre>
2560 * Notes:
2561 * (1) Set naout = nain for in-place; otherwise, set naout = NULL.
2562 * (2) Source: Shell sort, modified from K&R, 2nd edition, p.62.
2563 * Slow but simple O(n logn) sort.
2564 * </pre>
2565 */
2566 NUMA *
2567 numaSort(NUMA *naout,
2568 NUMA *nain,
2569 l_int32 sortorder)
2570 {
2571 l_int32 i, n, gap, j;
2572 l_float32 tmp;
2573 l_float32 *array;
2574
2575 if (!nain)
2576 return (NUMA *)ERROR_PTR("nain not defined", __func__, NULL);
2577 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2578 return (NUMA *)ERROR_PTR("invalid sort order", __func__, NULL);
2579
2580 /* Make naout if necessary; otherwise do in-place */
2581 if (!naout)
2582 naout = numaCopy(nain);
2583 else if (nain != naout)
2584 return (NUMA *)ERROR_PTR("invalid: not in-place", __func__, NULL);
2585 if ((n = numaGetCount(naout)) == 0) {
2586 L_WARNING("naout is empty\n", __func__);
2587 return naout;
2588 }
2589 array = naout->array; /* operate directly on the array */
2590 n = numaGetCount(naout);
2591
2592 /* Shell sort */
2593 for (gap = n/2; gap > 0; gap = gap / 2) {
2594 for (i = gap; i < n; i++) {
2595 for (j = i - gap; j >= 0; j -= gap) {
2596 if ((sortorder == L_SORT_INCREASING &&
2597 array[j] > array[j + gap]) ||
2598 (sortorder == L_SORT_DECREASING &&
2599 array[j] < array[j + gap]))
2600 {
2601 tmp = array[j];
2602 array[j] = array[j + gap];
2603 array[j + gap] = tmp;
2604 }
2605 }
2606 }
2607 }
2608
2609 return naout;
2610 }
2611
2612
2613 /*!
2614 * \brief numaBinSort()
2615 *
2616 * \param[in] nas of non-negative integers with a max that can
2617 * not exceed (MaxInitPtraSize - 1)
2618 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2619 * \return na sorted, or NULL on error
2620 *
2621 * <pre>
2622 * Notes:
2623 * (1) Because this uses a bin sort with buckets of size 1, it
2624 * is not appropriate for sorting either small arrays or
2625 * arrays containing very large integer values. For such
2626 * arrays, use a standard general sort function like
2627 * numaSort().
2628 * (2) You can use numaSortAutoSelect() to decide which sorting
2629 * method to use.
2630 * </pre>
2631 */
2632 NUMA *
2633 numaBinSort(NUMA *nas,
2634 l_int32 sortorder)
2635 {
2636 NUMA *nat, *nad;
2637
2638 if (!nas)
2639 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2640 if (numaGetCount(nas) == 0) {
2641 L_WARNING("nas is empty; returning copy\n", __func__);
2642 return numaCopy(nas);
2643 }
2644 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2645 return (NUMA *)ERROR_PTR("invalid sort order", __func__, NULL);
2646
2647 if ((nat = numaGetBinSortIndex(nas, sortorder)) == NULL)
2648 return (NUMA *)ERROR_PTR("bin sort failed", __func__, NULL);
2649 nad = numaSortByIndex(nas, nat);
2650 numaDestroy(&nat);
2651 return nad;
2652 }
2653
2654
2655 /*!
2656 * \brief numaGetSortIndex()
2657 *
2658 * \param[in] na source numa
2659 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2660 * \return na giving an array of indices that would sort
2661 * the input array, or NULL on error
2662 */
2663 NUMA *
2664 numaGetSortIndex(NUMA *na,
2665 l_int32 sortorder)
2666 {
2667 l_int32 i, n, gap, j;
2668 l_float32 tmp;
2669 l_float32 *array; /* copy of input array */
2670 l_float32 *iarray; /* array of indices */
2671 NUMA *naisort;
2672
2673 if (!na)
2674 return (NUMA *)ERROR_PTR("na not defined", __func__, NULL);
2675 if (numaGetCount(na) == 0) {
2676 L_WARNING("na is empty\n", __func__);
2677 return numaCreate(1);
2678 }
2679 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2680 return (NUMA *)ERROR_PTR("invalid sortorder", __func__, NULL);
2681
2682 n = numaGetCount(na);
2683 if ((array = numaGetFArray(na, L_COPY)) == NULL)
2684 return (NUMA *)ERROR_PTR("array not made", __func__, NULL);
2685 if ((iarray = (l_float32 *)LEPT_CALLOC(n, sizeof(l_float32))) == NULL) {
2686 LEPT_FREE(array);
2687 return (NUMA *)ERROR_PTR("iarray not made", __func__, NULL);
2688 }
2689 for (i = 0; i < n; i++)
2690 iarray[i] = i;
2691
2692 /* Shell sort */
2693 for (gap = n/2; gap > 0; gap = gap / 2) {
2694 for (i = gap; i < n; i++) {
2695 for (j = i - gap; j >= 0; j -= gap) {
2696 if ((sortorder == L_SORT_INCREASING &&
2697 array[j] > array[j + gap]) ||
2698 (sortorder == L_SORT_DECREASING &&
2699 array[j] < array[j + gap]))
2700 {
2701 tmp = array[j];
2702 array[j] = array[j + gap];
2703 array[j + gap] = tmp;
2704 tmp = iarray[j];
2705 iarray[j] = iarray[j + gap];
2706 iarray[j + gap] = tmp;
2707 }
2708 }
2709 }
2710 }
2711
2712 naisort = numaCreate(n);
2713 for (i = 0; i < n; i++)
2714 numaAddNumber(naisort, iarray[i]);
2715
2716 LEPT_FREE(array);
2717 LEPT_FREE(iarray);
2718 return naisort;
2719 }
2720
2721
2722 /*!
2723 * \brief numaGetBinSortIndex()
2724 *
2725 * \param[in] nas of non-negative integers with a max that can
2726 * not exceed (MaxInitPtraSize - 1)
2727 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2728 * \return na sorted, or NULL on error
2729 *
2730 * <pre>
2731 * Notes:
2732 * (1) This creates an array (or lookup table) that contains
2733 * the sorted position of the elements in the input Numa.
2734 * (2) Because it uses a bin sort with buckets of size 1, it
2735 * is not appropriate for sorting either small arrays or
2736 * arrays containing very large integer values. For such
2737 * arrays, use a standard general sort function like
2738 * numaGetSortIndex().
2739 * (3) You can use numaSortIndexAutoSelect() to decide which
2740 * sorting method to use.
2741 * </pre>
2742 */
2743 NUMA *
2744 numaGetBinSortIndex(NUMA *nas,
2745 l_int32 sortorder)
2746 {
2747 l_int32 i, n, isize, ival, imax;
2748 l_float32 minsize, size;
2749 NUMA *na, *nai, *nad;
2750 L_PTRA *paindex;
2751
2752 if (!nas)
2753 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2754 if (numaGetCount(nas) == 0) {
2755 L_WARNING("nas is empty\n", __func__);
2756 return numaCreate(1);
2757 }
2758 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2759 return (NUMA *)ERROR_PTR("invalid sort order", __func__, NULL);
2760 numaGetMin(nas, &minsize, NULL);
2761 if (minsize < 0)
2762 return (NUMA *)ERROR_PTR("nas has negative numbers", __func__, NULL);
2763 numaGetMax(nas, &size, NULL);
2764 isize = (l_int32)size;
2765 if (isize > MaxInitPtraSize - 1) {
2766 L_ERROR("array too large: %d elements > max size = %d\n",
2767 __func__, isize, MaxInitPtraSize - 1);
2768 return NULL;
2769 }
2770
2771 /* Set up a ptra holding numa at indices for which there
2772 * are values in nas. Suppose nas has the value 230 at index
2773 * 7355. A numa holding the index 7355 is created and stored
2774 * at the ptra index 230. If there is another value of 230
2775 * in nas, its index is added to the same numa (at index 230
2776 * in the ptra). When finished, the ptra can be scanned for numa,
2777 * and the original indices in the nas can be read out. In this
2778 * way, the ptra effectively sorts the input numbers in the nas. */
2779 paindex = ptraCreate(isize + 1);
2780 n = numaGetCount(nas);
2781 for (i = 0; i < n; i++) {
2782 numaGetIValue(nas, i, &ival);
2783 nai = (NUMA *)ptraGetPtrToItem(paindex, ival);
2784 if (!nai) { /* make it; no shifting will occur */
2785 nai = numaCreate(1);
2786 ptraInsert(paindex, ival, nai, L_MIN_DOWNSHIFT);
2787 }
2788 numaAddNumber(nai, i);
2789 }
2790
2791 /* Sort by scanning the ptra, extracting numas and pulling
2792 * the (index into nas) numbers out of each numa, taken
2793 * successively in requested order. */
2794 ptraGetMaxIndex(paindex, &imax);
2795 nad = numaCreate(0);
2796 if (sortorder == L_SORT_INCREASING) {
2797 for (i = 0; i <= imax; i++) {
2798 na = (NUMA *)ptraRemove(paindex, i, L_NO_COMPACTION);
2799 if (!na) continue;
2800 numaJoin(nad, na, 0, -1);
2801 numaDestroy(&na);
2802 }
2803 } else { /* L_SORT_DECREASING */
2804 for (i = imax; i >= 0; i--) {
2805 na = (NUMA *)ptraRemoveLast(paindex);
2806 if (!na) break; /* they've all been removed */
2807 numaJoin(nad, na, 0, -1);
2808 numaDestroy(&na);
2809 }
2810 }
2811
2812 ptraDestroy(&paindex, FALSE, FALSE);
2813 return nad;
2814 }
2815
2816
2817 /*!
2818 * \brief numaSortByIndex()
2819 *
2820 * \param[in] nas
2821 * \param[in] naindex na that maps from the new numa to the input numa
2822 * \return nad sorted, or NULL on error
2823 */
2824 NUMA *
2825 numaSortByIndex(NUMA *nas,
2826 NUMA *naindex)
2827 {
2828 l_int32 i, n, ni, index;
2829 l_float32 val;
2830 NUMA *nad;
2831
2832 if (!nas)
2833 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2834 if (!naindex)
2835 return (NUMA *)ERROR_PTR("naindex not defined", __func__, NULL);
2836 n = numaGetCount(nas);
2837 ni = numaGetCount(naindex);
2838 if (n != ni)
2839 return (NUMA *)ERROR_PTR("numa sizes differ", __func__, NULL);
2840 if (n == 0) {
2841 L_WARNING("nas is empty\n", __func__);
2842 return numaCopy(nas);
2843 }
2844
2845 nad = numaCreate(n);
2846 for (i = 0; i < n; i++) {
2847 numaGetIValue(naindex, i, &index);
2848 numaGetFValue(nas, index, &val);
2849 numaAddNumber(nad, val);
2850 }
2851
2852 return nad;
2853 }
2854
2855
2856 /*!
2857 * \brief numaIsSorted()
2858 *
2859 * \param[in] nas
2860 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2861 * \param[out] psorted 1 if sorted; 0 if not
2862 * \return 1 if OK; 0 on error
2863 *
2864 * <pre>
2865 * Notes:
2866 * (1) This is a quick O(n) test if nas is sorted. It is useful
2867 * in situations where the array is likely to be already
2868 * sorted, and a sort operation can be avoided.
2869 * </pre>
2870 */
2871 l_int32
2872 numaIsSorted(NUMA *nas,
2873 l_int32 sortorder,
2874 l_int32 *psorted)
2875 {
2876 l_int32 i, n;
2877 l_float32 prevval, val;
2878
2879 if (!psorted)
2880 return ERROR_INT("&sorted not defined", __func__, 1);
2881 *psorted = FALSE;
2882 if (!nas)
2883 return ERROR_INT("nas not defined", __func__, 1);
2884 if ((n = numaGetCount(nas))== 0) {
2885 L_WARNING("nas is empty\n", __func__);
2886 *psorted = TRUE;
2887 return 0;
2888 }
2889 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2890 return ERROR_INT("invalid sortorder", __func__, 1);
2891
2892 n = numaGetCount(nas);
2893 numaGetFValue(nas, 0, &prevval);
2894 for (i = 1; i < n; i++) {
2895 numaGetFValue(nas, i, &val);
2896 if ((sortorder == L_SORT_INCREASING && val < prevval) ||
2897 (sortorder == L_SORT_DECREASING && val > prevval))
2898 return 0;
2899 }
2900
2901 *psorted = TRUE;
2902 return 0;
2903 }
2904
2905
2906 /*!
2907 * \brief numaSortPair()
2908 *
2909 * \param[in] nax, nay input arrays
2910 * \param[in] sortorder L_SORT_INCREASING or L_SORT_DECREASING
2911 * \param[out] pnasx sorted
2912 * \param[out] pnasy sorted exactly in order of nasx
2913 * \return 0 if OK, 1 on error
2914 *
2915 * <pre>
2916 * Notes:
2917 * (1) This function sorts the two input arrays, nax and nay,
2918 * together, using nax as the key for sorting.
2919 * </pre>
2920 */
2921 l_ok
2922 numaSortPair(NUMA *nax,
2923 NUMA *nay,
2924 l_int32 sortorder,
2925 NUMA **pnasx,
2926 NUMA **pnasy)
2927 {
2928 l_int32 sorted;
2929 NUMA *naindex;
2930
2931 if (pnasx) *pnasx = NULL;
2932 if (pnasy) *pnasy = NULL;
2933 if (!pnasx || !pnasy)
2934 return ERROR_INT("&nasx and/or &nasy not defined", __func__, 1);
2935 if (!nax)
2936 return ERROR_INT("nax not defined", __func__, 1);
2937 if (!nay)
2938 return ERROR_INT("nay not defined", __func__, 1);
2939 if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
2940 return ERROR_INT("invalid sortorder", __func__, 1);
2941
2942 numaIsSorted(nax, sortorder, &sorted);
2943 if (sorted == TRUE) {
2944 *pnasx = numaCopy(nax);
2945 *pnasy = numaCopy(nay);
2946 } else {
2947 naindex = numaGetSortIndex(nax, sortorder);
2948 *pnasx = numaSortByIndex(nax, naindex);
2949 *pnasy = numaSortByIndex(nay, naindex);
2950 numaDestroy(&naindex);
2951 }
2952
2953 return 0;
2954 }
2955
2956
2957 /*!
2958 * \brief numaInvertMap()
2959 *
2960 * \param[in] nas
2961 * \return nad the inverted map, or NULL on error or if not invertible
2962 *
2963 * <pre>
2964 * Notes:
2965 * (1) This requires that nas contain each integer from 0 to n-1.
2966 * The array is typically an index array into a sort or permutation
2967 * of another array.
2968 * </pre>
2969 */
2970 NUMA *
2971 numaInvertMap(NUMA *nas)
2972 {
2973 l_int32 i, n, val, error;
2974 l_int32 *test;
2975 NUMA *nad;
2976
2977 if (!nas)
2978 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2979 if ((n = numaGetCount(nas)) == 0) {
2980 L_WARNING("nas is empty\n", __func__);
2981 return numaCopy(nas);
2982 }
2983
2984 nad = numaMakeConstant(0.0, n);
2985 test = (l_int32 *)LEPT_CALLOC(n, sizeof(l_int32));
2986 error = 0;
2987 for (i = 0; i < n; i++) {
2988 numaGetIValue(nas, i, &val);
2989 if (val >= n) {
2990 error = 1;
2991 break;
2992 }
2993 numaReplaceNumber(nad, val, i);
2994 if (test[val] == 0) {
2995 test[val] = 1;
2996 } else {
2997 error = 1;
2998 break;
2999 }
3000 }
3001
3002 LEPT_FREE(test);
3003 if (error) {
3004 numaDestroy(&nad);
3005 return (NUMA *)ERROR_PTR("nas not invertible", __func__, NULL);
3006 }
3007
3008 return nad;
3009 }
3010
3011 /*!
3012 * \brief numaAddSorted()
3013 *
3014 * \param[in] na sorted input
3015 * \param[in] val value to be inserted in sorted order
3016 * \return 0 if OK, 1 on error
3017 *
3018 * <pre>
3019 * Notes:
3020 * (1) The input %na is sorted. This function determines the
3021 * sort order of %na and inserts %val into the array.
3022 * </pre>
3023 */
3024 l_ok
3025 numaAddSorted(NUMA *na,
3026 l_float32 val)
3027 {
3028 l_int32 index;
3029
3030 if (!na)
3031 return ERROR_INT("na not defined", __func__, 1);
3032
3033 if (numaFindSortedLoc(na, val, &index) == 1)
3034 return ERROR_INT("insert failure", __func__, 1);
3035 numaInsertNumber(na, index, val);
3036 return 0;
3037 }
3038
3039
3040 /*!
3041 * \brief numaFindSortedLoc()
3042 *
3043 * \param[in] na sorted input
3044 * \param[in] val value to be inserted in sorted order
3045 * \param[out] *ploc index location to insert @val
3046 * \return 0 if OK, 1 on error
3047 *
3048 * <pre>
3049 * Notes:
3050 * (1) The input %na is sorted. This determines the sort order of @na,
3051 * either increasing or decreasing, and does a binary search for the
3052 * location to insert %val into the array. The search is O(log n).
3053 * (2) The index returned is the location to insert into the array.
3054 * The value at the index, and all values to the right, are
3055 * moved to the right (increasing their index location by 1).
3056 * (3) If n is the size of %na, *ploc can be anything in [0 ... n].
3057 * if *ploc == 0, the value is inserted at the beginning of the
3058 * array; if *ploc == n, it is inserted at the end.
3059 * (4) If the size of %na is 1, insert with an increasing sort.
3060 * </pre>
3061 */
3062 l_ok
3063 numaFindSortedLoc(NUMA *na,
3064 l_float32 val,
3065 l_int32 *pindex)
3066 {
3067 l_int32 n, increasing, lindex, rindex, midindex;
3068 l_float32 val0, valn, valmid;
3069
3070 if (!pindex)
3071 return ERROR_INT("&index not defined", __func__, 1);
3072 *pindex = 0;
3073 if (!na)
3074 return ERROR_INT("na not defined", __func__, 1);
3075
3076 n = numaGetCount(na);
3077 if (n == 0) return 0;
3078 numaGetFValue(na, 0, &val0);
3079 if (n == 1) { /* use increasing sort order */
3080 if (val >= val0)
3081 *pindex = 1;
3082 return 0;
3083 }
3084
3085 /* ----------------- n >= 2 ----------------- */
3086 numaGetFValue(na, n - 1, &valn);
3087 increasing = (valn >= val0) ? 1 : 0; /* sort order */
3088
3089 /* Check if outside bounds of existing array */
3090 if (increasing) {
3091 if (val < val0) {
3092 *pindex = 0;
3093 return 0;
3094 } else if (val > valn) {
3095 *pindex = n;
3096 return 0;
3097 }
3098 } else { /* decreasing */
3099 if (val > val0) {
3100 *pindex = 0;
3101 return 0;
3102 } else if (val < valn) {
3103 *pindex = n;
3104 return 0;
3105 }
3106 }
3107
3108 /* Within bounds of existing array; search */
3109 lindex = 0;
3110 rindex = n - 1;
3111 while (1) {
3112 midindex = (lindex + rindex) / 2;
3113 if (midindex == lindex || midindex == rindex) break;
3114 numaGetFValue(na, midindex, &valmid);
3115 if (increasing) {
3116 if (val > valmid)
3117 lindex = midindex;
3118 else
3119 rindex = midindex;
3120 } else { /* decreasing */
3121 if (val > valmid)
3122 rindex = midindex;
3123 else
3124 lindex = midindex;
3125 }
3126 }
3127 *pindex = rindex;
3128 return 0;
3129 }
3130
3131
3132 /*----------------------------------------------------------------------*
3133 * Random permutation *
3134 *----------------------------------------------------------------------*/
3135 /*!
3136 * \brief numaPseudorandomSequence()
3137 *
3138 * \param[in] size of sequence
3139 * \param[in] seed for random number generation
3140 * \return na pseudorandom on [0,...,size - 1], or NULL on error
3141 *
3142 * <pre>
3143 * Notes:
3144 * (1) This uses the Durstenfeld shuffle.
3145 * See: http://en.wikipedia.org/wiki/Fisher–Yates_shuffle.
3146 * Result is a pseudorandom permutation of the sequence of integers
3147 * from 0 to size - 1.
3148 * </pre>
3149 */
3150 NUMA *
3151 numaPseudorandomSequence(l_int32 size,
3152 l_int32 seed)
3153 {
3154 l_int32 i, index, temp;
3155 l_int32 *array;
3156 NUMA *na;
3157
3158 if (size <= 0)
3159 return (NUMA *)ERROR_PTR("size <= 0", __func__, NULL);
3160
3161 if ((array = (l_int32 *)LEPT_CALLOC(size, sizeof(l_int32))) == NULL)
3162 return (NUMA *)ERROR_PTR("array not made", __func__, NULL);
3163 for (i = 0; i < size; i++)
3164 array[i] = i;
3165 srand(seed);
3166 for (i = size - 1; i > 0; i--) {
3167 index = (l_int32)((i + 1) * ((l_float64)rand() / (l_float64)RAND_MAX));
3168 index = L_MIN(index, i);
3169 temp = array[i];
3170 array[i] = array[index];
3171 array[index] = temp;
3172 }
3173
3174 na = numaCreateFromIArray(array, size);
3175 LEPT_FREE(array);
3176 return na;
3177 }
3178
3179
3180 /*!
3181 * \brief numaRandomPermutation()
3182 *
3183 * \param[in] nas input array
3184 * \param[in] seed for random number generation
3185 * \return nas randomly shuffled array, or NULL on error
3186 */
3187 NUMA *
3188 numaRandomPermutation(NUMA *nas,
3189 l_int32 seed)
3190 {
3191 l_int32 i, index, size;
3192 l_float32 val;
3193 NUMA *naindex, *nad;
3194
3195 if (!nas)
3196 return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
3197 if ((size = numaGetCount(nas)) == 0) {
3198 L_WARNING("nas is empty\n", __func__);
3199 return numaCopy(nas);
3200 }
3201
3202 naindex = numaPseudorandomSequence(size, seed);
3203 nad = numaCreate(size);
3204 for (i = 0; i < size; i++) {
3205 numaGetIValue(naindex, i, &index);
3206 numaGetFValue(nas, index, &val);
3207 numaAddNumber(nad, val);
3208 }
3209 numaDestroy(&naindex);
3210 return nad;
3211 }
3212
3213
3214 /*----------------------------------------------------------------------*
3215 * Functions requiring sorting *
3216 *----------------------------------------------------------------------*/
3217 /*!
3218 * \brief numaGetRankValue()
3219 *
3220 * \param[in] na source numa
3221 * \param[in] fract use 0.0 for smallest, 1.0 for largest
3222 * \param[in] nasort [optional] increasing sorted version of na
3223 * \param[in] usebins 0 for general sort; 1 for bin sort
3224 * \param[out] pval rank val
3225 * \return 0 if OK; 1 on error
3226 *
3227 * <pre>
3228 * Notes:
3229 * (1) Computes the rank value of a number in the %na, which is
3230 * the number that is a fraction %fract from the small
3231 * end of the sorted version of %na.
3232 * (2) If you do this multiple times for different rank values,
3233 * sort the array in advance and use that for %nasort;
3234 * if you're only calling this once, input %nasort == NULL.
3235 * (3) If %usebins == 1, this uses a bin sorting method.
3236 * Use this only where:
3237 * * the numbers are non-negative integers
3238 * * there are over 100 numbers
3239 * * the maximum value is less than about 50,000
3240 * (4) The advantage of using a bin sort is that it is O(n),
3241 * instead of O(nlogn) for general sort routines.
3242 * </pre>
3243 */
3244 l_ok
3245 numaGetRankValue(NUMA *na,
3246 l_float32 fract,
3247 NUMA *nasort,
3248 l_int32 usebins,
3249 l_float32 *pval)
3250 {
3251 l_int32 n, index;
3252 NUMA *nas;
3253
3254 if (!pval)
3255 return ERROR_INT("&val not defined", __func__, 1);
3256 *pval = 0.0; /* init */
3257 if (!na)
3258 return ERROR_INT("na not defined", __func__, 1);
3259 if ((n = numaGetCount(na)) == 0)
3260 return ERROR_INT("na empty", __func__, 1);
3261 if (fract < 0.0 || fract > 1.0)
3262 return ERROR_INT("fract not in [0.0 ... 1.0]", __func__, 1);
3263
3264 if (nasort) {
3265 nas = nasort;
3266 } else {
3267 if (usebins == 0)
3268 nas = numaSort(NULL, na, L_SORT_INCREASING);
3269 else
3270 nas = numaBinSort(na, L_SORT_INCREASING);
3271 if (!nas)
3272 return ERROR_INT("nas not made", __func__, 1);
3273 }
3274 index = (l_int32)(fract * (l_float32)(n - 1) + 0.5);
3275 numaGetFValue(nas, index, pval);
3276
3277 if (!nasort) numaDestroy(&nas);
3278 return 0;
3279 }
3280
3281
3282 /*!
3283 * \brief numaGetMedian()
3284 *
3285 * \param[in] na source numa
3286 * \param[out] pval median value
3287 * \return 0 if OK; 1 on error
3288 *
3289 * <pre>
3290 * Notes:
3291 * (1) Computes the median value of the numbers in the numa, by
3292 * sorting and finding the middle value in the sorted array.
3293 * </pre>
3294 */
3295 l_ok
3296 numaGetMedian(NUMA *na,
3297 l_float32 *pval)
3298 {
3299 if (!pval)
3300 return ERROR_INT("&val not defined", __func__, 1);
3301 *pval = 0.0; /* init */
3302 if (!na || numaGetCount(na) == 0)
3303 return ERROR_INT("na not defined or empty", __func__, 1);
3304
3305 return numaGetRankValue(na, 0.5, NULL, 0, pval);
3306 }
3307
3308
3309 /*!
3310 * \brief numaGetBinnedMedian()
3311 *
3312 * \param[in] na source numa
3313 * \param[out] pval integer median value
3314 * \return 0 if OK; 1 on error
3315 *
3316 * <pre>
3317 * Notes:
3318 * (1) Computes the median value of the numbers in the numa,
3319 * using bin sort and finding the middle value in the sorted array.
3320 * (2) See numaGetRankValue() for conditions on na for which
3321 * this should be used. Otherwise, use numaGetMedian().
3322 * </pre>
3323 */
3324 l_ok
3325 numaGetBinnedMedian(NUMA *na,
3326 l_int32 *pval)
3327 {
3328 l_int32 ret;
3329 l_float32 fval;
3330
3331 if (!pval)
3332 return ERROR_INT("&val not defined", __func__, 1);
3333 *pval = 0; /* init */
3334 if (!na || numaGetCount(na) == 0)
3335 return ERROR_INT("na not defined or empty", __func__, 1);
3336
3337 ret = numaGetRankValue(na, 0.5, NULL, 1, &fval);
3338 *pval = lept_roundftoi(fval);
3339 return ret;
3340 }
3341
3342
3343 /*!
3344 * \brief numaGetMeanDevFromMedian()
3345 *
3346 * \param[in] na source numa
3347 * \param[in] med median value
3348 * \param[out] pdev average absolute value deviation from median value
3349 * \return 0 if OK; 1 on error
3350 */
3351 l_ok
3352 numaGetMeanDevFromMedian(NUMA *na,
3353 l_float32 med,
3354 l_float32 *pdev)
3355 {
3356 l_int32 i, n;
3357 l_float32 val, dev;
3358
3359 if (!pdev)
3360 return ERROR_INT("&dev not defined", __func__, 1);
3361 *pdev = 0.0; /* init */
3362 if (!na)
3363 return ERROR_INT("na not defined", __func__, 1);
3364 if ((n = numaGetCount(na)) == 0)
3365 return ERROR_INT("na is empty", __func__, 1);
3366
3367 dev = 0.0;
3368 for (i = 0; i < n; i++) {
3369 numaGetFValue(na, i, &val);
3370 dev += L_ABS(val - med);
3371 }
3372 *pdev = dev / (l_float32)n;
3373 return 0;
3374 }
3375
3376
3377 /*!
3378 * \brief numaGetMedianDevFromMedian()
3379 *
3380 * \param[in] na source numa
3381 * \param[out] pmed [optional] median value
3382 * \param[out] pdev median deviation from median val
3383 * \return 0 if OK; 1 on error
3384 *
3385 * <pre>
3386 * Notes:
3387 * (1) Finds the median of the absolute value of the deviation from
3388 * the median value in the array. Why take the absolute value?
3389 * Consider the case where you have values equally distributed
3390 * about both sides of a median value. Without taking the absolute
3391 * value of the differences, you will get 0 for the deviation,
3392 * and this is not useful.
3393 * </pre>
3394 */
3395 l_ok
3396 numaGetMedianDevFromMedian(NUMA *na,
3397 l_float32 *pmed,
3398 l_float32 *pdev)
3399 {
3400 l_int32 n, i;
3401 l_float32 val, med;
3402 NUMA *nadev;
3403
3404 if (pmed) *pmed = 0.0;
3405 if (!pdev)
3406 return ERROR_INT("&dev not defined", __func__, 1);
3407 *pdev = 0.0;
3408 if (!na || numaGetCount(na) == 0)
3409 return ERROR_INT("na not defined or empty", __func__, 1);
3410
3411 numaGetMedian(na, &med);
3412 if (pmed) *pmed = med;
3413 n = numaGetCount(na);
3414 nadev = numaCreate(n);
3415 for (i = 0; i < n; i++) {
3416 numaGetFValue(na, i, &val);
3417 numaAddNumber(nadev, L_ABS(val - med));
3418 }
3419 numaGetMedian(nadev, pdev);
3420
3421 numaDestroy(&nadev);
3422 return 0;
3423 }
3424
3425
3426 /*!
3427 * \brief numaGetMode()
3428 *
3429 * \param[in] na source numa
3430 * \param[out] pval mode val
3431 * \param[out] pcount [optional] mode count
3432 * \return 0 if OK; 1 on error
3433 *
3434 * <pre>
3435 * Notes:
3436 * (1) Computes the mode value of the numbers in the numa, by
3437 * sorting and finding the value of the number with the
3438 * largest count.
3439 * (2) Optionally, also returns that count.
3440 * </pre>
3441 */
3442 l_ok
3443 numaGetMode(NUMA *na,
3444 l_float32 *pval,
3445 l_int32 *pcount)
3446 {
3447 l_int32 i, n, maxcount, prevcount;
3448 l_float32 val, maxval, prevval;
3449 l_float32 *array;
3450 NUMA *nasort;
3451
3452 if (pcount) *pcount = 0;
3453 if (!pval)
3454 return ERROR_INT("&val not defined", __func__, 1);
3455 *pval = 0.0;
3456 if (!na)
3457 return ERROR_INT("na not defined", __func__, 1);
3458 if ((n = numaGetCount(na)) == 0)
3459 return ERROR_INT("na is empty", __func__, 1);
3460
3461 if ((nasort = numaSort(NULL, na, L_SORT_DECREASING)) == NULL)
3462 return ERROR_INT("nas not made", __func__, 1);
3463 array = numaGetFArray(nasort, L_NOCOPY);
3464
3465 /* Initialize with array[0] */
3466 prevval = array[0];
3467 prevcount = 1;
3468 maxval = prevval;
3469 maxcount = prevcount;
3470
3471 /* Scan the sorted array, aggregating duplicates */
3472 for (i = 1; i < n; i++) {
3473 val = array[i];
3474 if (val == prevval) {
3475 prevcount++;
3476 } else { /* new value */
3477 if (prevcount > maxcount) { /* new max */
3478 maxcount = prevcount;
3479 maxval = prevval;
3480 }
3481 prevval = val;
3482 prevcount = 1;
3483 }
3484 }
3485
3486 /* Was the mode the last run of elements? */
3487 if (prevcount > maxcount) {
3488 maxcount = prevcount;
3489 maxval = prevval;
3490 }
3491
3492 *pval = maxval;
3493 if (pcount)
3494 *pcount = maxcount;
3495
3496 numaDestroy(&nasort);
3497 return 0;
3498 }
3499
3500
3501 /*----------------------------------------------------------------------*
3502 * Rearrangements *
3503 *----------------------------------------------------------------------*/
3504 /*!
3505 * \brief numaJoin()
3506 *
3507 * \param[in] nad dest numa; add to this one
3508 * \param[in] nas [optional] source numa; add from this one
3509 * \param[in] istart starting index in nas
3510 * \param[in] iend ending index in nas; use -1 to cat all
3511 * \return 0 if OK, 1 on error
3512 *
3513 * <pre>
3514 * Notes:
3515 * (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
3516 * (2) iend < 0 means 'read to the end'
3517 * (3) if nas == NULL, this is a no-op
3518 * </pre>
3519 */
3520 l_ok
3521 numaJoin(NUMA *nad,
3522 NUMA *nas,
3523 l_int32 istart,
3524 l_int32 iend)
3525 {
3526 l_int32 n, i;
3527 l_float32 val;
3528
3529 if (!nad)
3530 return ERROR_INT("nad not defined", __func__, 1);
3531 if (!nas)
3532 return 0;
3533
3534 if (istart < 0)
3535 istart = 0;
3536 n = numaGetCount(nas);
3537 if (iend < 0 || iend >= n)
3538 iend = n - 1;
3539 if (istart > iend)
3540 return ERROR_INT("istart > iend; nothing to add", __func__, 1);
3541
3542 for (i = istart; i <= iend; i++) {
3543 numaGetFValue(nas, i, &val);
3544 numaAddNumber(nad, val);
3545 }
3546
3547 return 0;
3548 }
3549
3550
3551 /*!
3552 * \brief numaaJoin()
3553 *
3554 * \param[in] naad dest naa; add to this one
3555 * \param[in] naas [optional] source naa; add from this one
3556 * \param[in] istart starting index in nas
3557 * \param[in] iend ending index in naas; use -1 to cat all
3558 * \return 0 if OK, 1 on error
3559 *
3560 * <pre>
3561 * Notes:
3562 * (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
3563 * (2) iend < 0 means 'read to the end'
3564 * (3) if naas == NULL, this is a no-op
3565 * </pre>
3566 */
3567 l_ok
3568 numaaJoin(NUMAA *naad,
3569 NUMAA *naas,
3570 l_int32 istart,
3571 l_int32 iend)
3572 {
3573 l_int32 n, i;
3574 NUMA *na;
3575
3576 if (!naad)
3577 return ERROR_INT("naad not defined", __func__, 1);
3578 if (!naas)
3579 return 0;
3580
3581 if (istart < 0)
3582 istart = 0;
3583 n = numaaGetCount(naas);
3584 if (iend < 0 || iend >= n)
3585 iend = n - 1;
3586 if (istart > iend)
3587 return ERROR_INT("istart > iend; nothing to add", __func__, 1);
3588
3589 for (i = istart; i <= iend; i++) {
3590 na = numaaGetNuma(naas, i, L_CLONE);
3591 numaaAddNuma(naad, na, L_INSERT);
3592 }
3593
3594 return 0;
3595 }
3596
3597
3598 /*!
3599 * \brief numaaFlattenToNuma()
3600 *
3601 * \param[in] naa
3602 * \return numa, or NULL on error
3603 *
3604 * <pre>
3605 * Notes:
3606 * (1) This 'flattens' the Numaa to a Numa, by joining successively
3607 * each Numa in the Numaa.
3608 * (2) It doesn't make any assumptions about the location of the
3609 * Numas in the Numaa array, unlike most Numaa functions.
3610 * (3) It leaves the input Numaa unchanged.
3611 * </pre>
3612 */
3613 NUMA *
3614 numaaFlattenToNuma(NUMAA *naa)
3615 {
3616 l_int32 i, nalloc;
3617 NUMA *na, *nad;
3618 NUMA **array;
3619
3620 if (!naa)
3621 return (NUMA *)ERROR_PTR("naa not defined", __func__, NULL);
3622
3623 nalloc = naa->nalloc;
3624 array = numaaGetPtrArray(naa);
3625 nad = numaCreate(0);
3626 for (i = 0; i < nalloc; i++) {
3627 na = array[i];
3628 if (!na) continue;
3629 numaJoin(nad, na, 0, -1);
3630 }
3631
3632 return nad;
3633 }
3634