comparison mupdf-source/thirdparty/leptonica/src/bytearray.c @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file bytearray.c
29 * <pre>
30 *
31 * Functions for handling byte arrays, in analogy with C++ 'strings'
32 *
33 * Creation, copy, clone, destruction
34 * L_BYTEA *l_byteaCreate()
35 * L_BYTEA *l_byteaInitFromMem()
36 * L_BYTEA *l_byteaInitFromFile()
37 * L_BYTEA *l_byteaInitFromStream()
38 * L_BYTEA *l_byteaCopy()
39 * void l_byteaDestroy()
40 *
41 * Accessors
42 * size_t l_byteaGetSize()
43 * l_uint8 *l_byteaGetData()
44 * l_uint8 *l_byteaCopyData()
45 *
46 * Appending
47 * l_int32 l_byteaAppendData()
48 * l_int32 l_byteaAppendString()
49 * static l_int32 l_byteaExtendArrayToSize()
50 *
51 * Join/Split
52 * l_int32 l_byteaJoin()
53 * l_int32 l_byteaSplit()
54 *
55 * Search
56 * l_int32 l_byteaFindEachSequence()
57 *
58 * Output to file
59 * l_int32 l_byteaWrite()
60 * l_int32 l_byteaWriteStream()
61 *
62 * The internal data array is always null-terminated, for ease of use
63 * in the event that it is an ascii string without null bytes.
64 * </pre>
65 */
66
67 #ifdef HAVE_CONFIG_H
68 #include <config_auto.h>
69 #endif /* HAVE_CONFIG_H */
70
71 #include <string.h>
72 #include "allheaders.h"
73 #include "array_internal.h"
74
75 /* Bounds on array size */
76 static const l_uint32 MaxArraySize = 1000000000; /* 10^9 bytes */
77 static const l_int32 InitialArraySize = 200; /*!< n'importe quoi */
78
79 /* Static function */
80 static l_int32 l_byteaExtendArrayToSize(L_BYTEA *ba, size_t size);
81
82 /*---------------------------------------------------------------------*
83 * Creation, copy, clone, destruction *
84 *---------------------------------------------------------------------*/
85 /*!
86 * \brief l_byteaCreate()
87 *
88 * \param[in] nbytes determines initial size of data array
89 * \return l_bytea, or NULL on error
90 *
91 * <pre>
92 * Notes:
93 * (1) The allocated array is n + 1 bytes. This allows room
94 * for null termination.
95 * </pre>
96 */
97 L_BYTEA *
98 l_byteaCreate(size_t nbytes)
99 {
100 L_BYTEA *ba;
101
102 if (nbytes <= 0 || nbytes > MaxArraySize)
103 nbytes = InitialArraySize;
104 ba = (L_BYTEA *)LEPT_CALLOC(1, sizeof(L_BYTEA));
105 ba->data = (l_uint8 *)LEPT_CALLOC(nbytes + 1, sizeof(l_uint8));
106 if (!ba->data) {
107 l_byteaDestroy(&ba);
108 return (L_BYTEA *)ERROR_PTR("ba array not made", __func__, NULL);
109 }
110 ba->nalloc = nbytes + 1;
111 ba->refcount = 1;
112 return ba;
113 }
114
115
116 /*!
117 * \brief l_byteaInitFromMem()
118 *
119 * \param[in] data to be copied to the array
120 * \param[in] size amount of data
121 * \return l_bytea, or NULL on error
122 */
123 L_BYTEA *
124 l_byteaInitFromMem(const l_uint8 *data,
125 size_t size)
126 {
127 L_BYTEA *ba;
128
129 if (!data)
130 return (L_BYTEA *)ERROR_PTR("data not defined", __func__, NULL);
131 if (size <= 0)
132 return (L_BYTEA *)ERROR_PTR("no bytes to initialize", __func__, NULL);
133 if (size > MaxArraySize)
134 return (L_BYTEA *)ERROR_PTR("size is too big", __func__, NULL);
135
136 if ((ba = l_byteaCreate(size)) == NULL)
137 return (L_BYTEA *)ERROR_PTR("ba not made", __func__, NULL);
138 memcpy(ba->data, data, size);
139 ba->size = size;
140 return ba;
141 }
142
143
144 /*!
145 * \brief l_byteaInitFromFile()
146 *
147 * \param[in] fname
148 * \return l_bytea, or NULL on error
149 */
150 L_BYTEA *
151 l_byteaInitFromFile(const char *fname)
152 {
153 FILE *fp;
154 L_BYTEA *ba;
155
156 if (!fname)
157 return (L_BYTEA *)ERROR_PTR("fname not defined", __func__, NULL);
158
159 if ((fp = fopenReadStream(fname)) == NULL)
160 return (L_BYTEA *)ERROR_PTR_1("file stream not opened",
161 fname, __func__, NULL);
162 ba = l_byteaInitFromStream(fp);
163 fclose(fp);
164 if (!ba)
165 return (L_BYTEA *)ERROR_PTR_1("ba not made", fname, __func__, NULL);
166 return ba;
167 }
168
169
170 /*!
171 * \brief l_byteaInitFromStream()
172 *
173 * \param[in] fp file stream
174 * \return l_bytea, or NULL on error
175 */
176 L_BYTEA *
177 l_byteaInitFromStream(FILE *fp)
178 {
179 l_uint8 *data;
180 size_t nbytes;
181 L_BYTEA *ba;
182
183 if (!fp)
184 return (L_BYTEA *)ERROR_PTR("stream not defined", __func__, NULL);
185
186 if ((data = l_binaryReadStream(fp, &nbytes)) == NULL)
187 return (L_BYTEA *)ERROR_PTR("data not read", __func__, NULL);
188 if ((ba = l_byteaCreate(nbytes)) == NULL) {
189 LEPT_FREE(data);
190 return (L_BYTEA *)ERROR_PTR("ba not made", __func__, NULL);
191 }
192 memcpy(ba->data, data, nbytes);
193 ba->size = nbytes;
194 LEPT_FREE(data);
195 return ba;
196 }
197
198
199 /*!
200 * \brief l_byteaCopy()
201 *
202 * \param[in] bas source lba
203 * \param[in] copyflag L_COPY, L_CLONE
204 * \return clone or copy of bas, or NULL on error
205 *
206 * <pre>
207 * Notes:
208 * (1) If cloning, up the refcount and return a ptr to %bas.
209 * </pre>
210 */
211 L_BYTEA *
212 l_byteaCopy(L_BYTEA *bas,
213 l_int32 copyflag)
214 {
215 if (!bas)
216 return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL);
217
218 if (copyflag == L_CLONE) {
219 bas->refcount++;
220 return bas;
221 }
222
223 return l_byteaInitFromMem(bas->data, bas->size);
224 }
225
226
227 /*!
228 * \brief l_byteaDestroy()
229 *
230 * \param[in,out] pba will be set to null before returning
231 * \return void
232 *
233 * <pre>
234 * Notes:
235 * (1) Decrements the ref count and, if 0, destroys the lba.
236 * (2) Always nulls the input ptr.
237 * (3) If the data has been previously removed, the lba will
238 * have been nulled, so this will do nothing.
239 * </pre>
240 */
241 void
242 l_byteaDestroy(L_BYTEA **pba)
243 {
244 L_BYTEA *ba;
245
246 if (pba == NULL) {
247 L_WARNING("ptr address is null!\n", __func__);
248 return;
249 }
250
251 if ((ba = *pba) == NULL)
252 return;
253
254 /* Decrement the ref count. If it is 0, destroy the lba. */
255 if (--ba->refcount == 0) {
256 if (ba->data) LEPT_FREE(ba->data);
257 LEPT_FREE(ba);
258 }
259 *pba = NULL;
260 }
261
262
263 /*---------------------------------------------------------------------*
264 * Accessors *
265 *---------------------------------------------------------------------*/
266 /*!
267 * \brief l_byteaGetSize()
268 *
269 * \param[in] ba
270 * \return size of stored byte array, or 0 on error
271 */
272 size_t
273 l_byteaGetSize(L_BYTEA *ba)
274 {
275 if (!ba)
276 return ERROR_INT("ba not defined", __func__, 0);
277 return ba->size;
278 }
279
280
281 /*!
282 * \brief l_byteaGetData()
283 *
284 * \param[in] ba
285 * \param[out] psize size of data in lba
286 * \return ptr to existing data array, or NULL on error
287 *
288 * <pre>
289 * Notes:
290 * (1) The returned ptr is owned by %ba. Do not free it!
291 * </pre>
292 */
293 l_uint8 *
294 l_byteaGetData(L_BYTEA *ba,
295 size_t *psize)
296 {
297 if (!ba)
298 return (l_uint8 *)ERROR_PTR("ba not defined", __func__, NULL);
299 if (!psize)
300 return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL);
301
302 *psize = ba->size;
303 return ba->data;
304 }
305
306
307 /*!
308 * \brief l_byteaCopyData()
309 *
310 * \param[in] ba
311 * \param[out] psize size of data in lba
312 * \return copy of data in use in the data array, or NULL on error.
313 *
314 * <pre>
315 * Notes:
316 * (1) The returned data is owned by the caller. The input %ba
317 * still owns the original data array.
318 * </pre>
319 */
320 l_uint8 *
321 l_byteaCopyData(L_BYTEA *ba,
322 size_t *psize)
323 {
324 l_uint8 *data;
325
326 if (!psize)
327 return (l_uint8 *)ERROR_PTR("&size not defined", __func__, NULL);
328 *psize = 0;
329 if (!ba)
330 return (l_uint8 *)ERROR_PTR("ba not defined", __func__, NULL);
331
332 data = l_byteaGetData(ba, psize);
333 return l_binaryCopy(data, *psize);
334 }
335
336
337 /*---------------------------------------------------------------------*
338 * Appending *
339 *---------------------------------------------------------------------*/
340 /*!
341 * \brief l_byteaAppendData()
342 *
343 * \param[in] ba
344 * \param[in] newdata byte array to be appended
345 * \param[in] newbytes size of data array
346 * \return 0 if OK, 1 on error
347 */
348 l_ok
349 l_byteaAppendData(L_BYTEA *ba,
350 const l_uint8 *newdata,
351 size_t newbytes)
352 {
353 size_t size, nalloc, reqsize;
354
355 if (!ba)
356 return ERROR_INT("ba not defined", __func__, 1);
357 if (!newdata)
358 return ERROR_INT("newdata not defined", __func__, 1);
359
360 size = l_byteaGetSize(ba);
361 reqsize = size + newbytes + 1;
362 nalloc = ba->nalloc;
363 if (nalloc < reqsize) {
364 if (l_byteaExtendArrayToSize(ba, 2 * reqsize))
365 return ERROR_INT("extension failed", __func__, 1);
366 }
367
368 memcpy(ba->data + size, newdata, newbytes);
369 ba->size += newbytes;
370 return 0;
371 }
372
373
374 /*!
375 * \brief l_byteaAppendString()
376 *
377 * \param[in] ba
378 * \param[in] str null-terminated string to be appended
379 * \return 0 if OK, 1 on error
380 */
381 l_ok
382 l_byteaAppendString(L_BYTEA *ba,
383 const char *str)
384 {
385 size_t size, len, nalloc, reqsize;
386
387 if (!ba)
388 return ERROR_INT("ba not defined", __func__, 1);
389 if (!str)
390 return ERROR_INT("str not defined", __func__, 1);
391
392 size = l_byteaGetSize(ba);
393 len = strlen(str);
394 reqsize = size + len + 1;
395 nalloc = ba->nalloc;
396 if (nalloc < reqsize) {
397 if (l_byteaExtendArrayToSize(ba, 2 * reqsize))
398 return ERROR_INT("extension failed", __func__, 1);
399 }
400
401 memcpy(ba->data + size, str, len);
402 ba->size += len;
403 return 0;
404 }
405
406
407 /*!
408 * \brief l_byteaExtendArrayToSize()
409 *
410 * \param[in] ba
411 * \param[in] size new size of lba data array
412 * \return 0 if OK; 1 on error
413 *
414 * <pre>
415 * Notes:
416 * (1) If necessary, reallocs the byte array to %size.
417 * (2) The max buffer size is 1 GB.
418 * </pre>
419 */
420 static l_int32
421 l_byteaExtendArrayToSize(L_BYTEA *ba,
422 size_t size)
423 {
424 if (!ba)
425 return ERROR_INT("ba not defined", __func__, 1);
426 if (ba->nalloc > MaxArraySize) /* belt & suspenders */
427 return ERROR_INT("ba has too many ptrs", __func__, 1);
428 if (size > MaxArraySize)
429 return ERROR_INT("size > 1 GB; too large", __func__, 1);
430 if (size <= ba->nalloc) {
431 L_INFO("size too small; no extension\n", __func__);
432 return 0;
433 }
434
435 if ((ba->data =
436 (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size)) == NULL)
437 return ERROR_INT("new array not returned", __func__, 1);
438 ba->nalloc = size;
439 return 0;
440 }
441
442
443 /*---------------------------------------------------------------------*
444 * String join/split *
445 *---------------------------------------------------------------------*/
446 /*!
447 * \brief l_byteaJoin()
448 *
449 * \param[in] ba1
450 * \param[in,out] pba2 data array is added to the one in ba1;
451 * then ba2 is destroyed and its pointer is nulled.
452 * \return 0 if OK, 1 on error
453 *
454 * <pre>
455 * Notes:
456 * (1) It is a no-op, not an error, for %ba2 to be null.
457 * </pre>
458 */
459 l_ok
460 l_byteaJoin(L_BYTEA *ba1,
461 L_BYTEA **pba2)
462 {
463 l_uint8 *data2;
464 size_t nbytes2;
465 L_BYTEA *ba2;
466
467 if (!ba1)
468 return ERROR_INT("ba1 not defined", __func__, 1);
469 if (!pba2)
470 return ERROR_INT("&ba2 not defined", __func__, 1);
471 if ((ba2 = *pba2) == NULL) return 0;
472
473 data2 = l_byteaGetData(ba2, &nbytes2);
474 l_byteaAppendData(ba1, data2, nbytes2);
475
476 l_byteaDestroy(pba2);
477 return 0;
478 }
479
480
481 /*!
482 * \brief l_byteaSplit()
483 *
484 * \param[in] ba1 lba to split; array bytes nulled beyond the split loc
485 * \param[in] splitloc location in ba1 to split; ba2 begins there
486 * \param[out] pba2 with data starting at splitloc
487 * \return 0 if OK, 1 on error
488 */
489 l_ok
490 l_byteaSplit(L_BYTEA *ba1,
491 size_t splitloc,
492 L_BYTEA **pba2)
493 {
494 l_uint8 *data1;
495 size_t nbytes1, nbytes2;
496
497 if (!pba2)
498 return ERROR_INT("&ba2 not defined", __func__, 1);
499 *pba2 = NULL;
500 if (!ba1)
501 return ERROR_INT("ba1 not defined", __func__, 1);
502
503 data1 = l_byteaGetData(ba1, &nbytes1);
504 if (splitloc >= nbytes1)
505 return ERROR_INT("splitloc invalid", __func__, 1);
506 nbytes2 = nbytes1 - splitloc;
507
508 /* Make the new lba */
509 *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2);
510
511 /* Null the removed bytes in the input lba */
512 memset(data1 + splitloc, 0, nbytes2);
513 ba1->size = splitloc;
514 return 0;
515 }
516
517
518 /*---------------------------------------------------------------------*
519 * Search *
520 *---------------------------------------------------------------------*/
521 /*!
522 * \brief l_byteaFindEachSequence()
523 *
524 * \param[in] ba
525 * \param[in] sequence subarray of bytes to find in data
526 * \param[in] seqlen length of sequence, in bytes
527 * \param[out] pda byte positions of each occurrence of %sequence
528 * \return 0 if OK, 1 on error
529 */
530 l_ok
531 l_byteaFindEachSequence(L_BYTEA *ba,
532 const l_uint8 *sequence,
533 size_t seqlen,
534 L_DNA **pda)
535 {
536 l_uint8 *data;
537 size_t size;
538
539 if (!pda)
540 return ERROR_INT("&da not defined", __func__, 1);
541 *pda = NULL;
542 if (!ba)
543 return ERROR_INT("ba not defined", __func__, 1);
544 if (!sequence)
545 return ERROR_INT("sequence not defined", __func__, 1);
546
547 data = l_byteaGetData(ba, &size);
548 *pda = arrayFindEachSequence(data, size, sequence, seqlen);
549 return 0;
550 }
551
552
553 /*---------------------------------------------------------------------*
554 * Output to file *
555 *---------------------------------------------------------------------*/
556 /*!
557 * \brief l_byteaWrite()
558 *
559 * \param[in] fname output file
560 * \param[in] ba
561 * \param[in] startloc first byte to output
562 * \param[in] nbytes number of bytes to write; use 0 to write to
563 * the end of the data array
564 * \return 0 if OK, 1 on error
565 */
566 l_ok
567 l_byteaWrite(const char *fname,
568 L_BYTEA *ba,
569 size_t startloc,
570 size_t nbytes)
571 {
572 l_int32 ret;
573 FILE *fp;
574
575 if (!fname)
576 return ERROR_INT("fname not defined", __func__, 1);
577 if (!ba)
578 return ERROR_INT("ba not defined", __func__, 1);
579
580 if ((fp = fopenWriteStream(fname, "wb")) == NULL)
581 return ERROR_INT_1("stream not opened", fname, __func__, 1);
582 ret = l_byteaWriteStream(fp, ba, startloc, nbytes);
583 fclose(fp);
584 return ret;
585 }
586
587
588 /*!
589 * \brief l_byteaWriteStream()
590 *
591 * \param[in] fp file stream opened for binary write
592 * \param[in] ba
593 * \param[in] startloc first byte to output
594 * \param[in] nbytes number of bytes to write; use 0 to write to
595 * the end of the data array
596 * \return 0 if OK, 1 on error
597 */
598 l_ok
599 l_byteaWriteStream(FILE *fp,
600 L_BYTEA *ba,
601 size_t startloc,
602 size_t nbytes)
603 {
604 l_uint8 *data;
605 size_t size, maxbytes;
606
607 if (!fp)
608 return ERROR_INT("stream not defined", __func__, 1);
609 if (!ba)
610 return ERROR_INT("ba not defined", __func__, 1);
611
612 data = l_byteaGetData(ba, &size);
613 if (startloc >= size)
614 return ERROR_INT("invalid startloc", __func__, 1);
615 maxbytes = size - startloc;
616 nbytes = (nbytes == 0) ? maxbytes : L_MIN(nbytes, maxbytes);
617
618 fwrite(data + startloc, 1, nbytes, fp);
619 return 0;
620 }