Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/leptonica/src/encoding.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /*====================================================================* | |
| 2 - Copyright (C) 2001 Leptonica. All rights reserved. | |
| 3 - This software is distributed in the hope that it will be | |
| 4 - useful, but with NO WARRANTY OF ANY KIND. | |
| 5 - No author or distributor accepts responsibility to anyone for the | |
| 6 - consequences of using this software, or for whether it serves any | |
| 7 - particular purpose or works at all, unless he or she says so in | |
| 8 - writing. Everyone is granted permission to copy, modify and | |
| 9 - redistribute this source code, for commercial or non-commercial | |
| 10 - purposes, with the following restrictions: (1) the origin of this | |
| 11 - source code must not be misrepresented; (2) modified versions must | |
| 12 - be plainly marked as such; and (3) this notice may not be removed | |
| 13 - or altered from any source or modified source distribution. | |
| 14 *====================================================================*/ | |
| 15 | |
| 16 /* | |
| 17 * encodings.c | |
| 18 * | |
| 19 * Base64 | |
| 20 * char *encodeBase64() | |
| 21 * l_uint8 *decodeBase64() | |
| 22 * static l_int32 isBase64() | |
| 23 * static l_int32 *genReverseTab64() | |
| 24 * static void byteConvert3to4() | |
| 25 * static void byteConvert4to3() | |
| 26 * | |
| 27 * Ascii85 | |
| 28 * char *encodeAscii85() | |
| 29 * l_uint8 *decodeAscii85() | |
| 30 * static l_int32 convertChunkToAscii85() | |
| 31 * | |
| 32 * char *encodeAscii85WithComp() | |
| 33 * l_uint8 *decodeAscii85WithComp() | |
| 34 * | |
| 35 * String reformatting for base 64 encoded data | |
| 36 * char *reformatPacked64() | |
| 37 * | |
| 38 * Base64 encoding is useful for encding binary data in a restricted set of | |
| 39 * 64 printable ascii symbols, that includes the 62 alphanumerics and '+' | |
| 40 * and '/'. Notably it does not include quotes, so that base64 encoded | |
| 41 * strings can be used in situations where quotes are used for formatting. | |
| 42 * 64 symbols was chosen because it is the smallest number that can be used | |
| 43 * in 4-for-3 byte encoding of binary data: | |
| 44 * log2(64) / log2(256) = 0.75 = 3/4 | |
| 45 * | |
| 46 * Ascii85 encoding is used in PostScript and some pdf files for | |
| 47 * representing binary data (for example, a compressed image) in printable | |
| 48 * ascii symbols. It has a dictionary of 85 symbols; 85 was chosen because | |
| 49 * it is the smallest number that can be used in 5-for-4 byte encoding | |
| 50 * of binary data (256 possible input values). This can be seen from | |
| 51 * the max information content in such a sequence: | |
| 52 * log2(84) / log2(256) = 0.799 < 4/5 | |
| 53 * log2(85) / log2(256) = 0.801 > 4/5 | |
| 54 */ | |
| 55 | |
| 56 #ifdef HAVE_CONFIG_H | |
| 57 #include <config_auto.h> | |
| 58 #endif /* HAVE_CONFIG_H */ | |
| 59 | |
| 60 #include <ctype.h> | |
| 61 #include <string.h> | |
| 62 #include "allheaders.h" | |
| 63 | |
| 64 /* Base64 encoding table in string representation */ | |
| 65 static const l_int32 MAX_BASE64_LINE = 72; /* max line length base64 */ | |
| 66 static const char *tablechar64 = | |
| 67 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 68 "abcdefghijklmnopqrstuvwxyz" | |
| 69 "0123456789+/"; | |
| 70 | |
| 71 static l_int32 isBase64(char); | |
| 72 static l_int32 *genReverseTab64(void); | |
| 73 static void byteConvert3to4(l_uint8 *in3, l_uint8 *out4); | |
| 74 static void byteConvert4to3(l_uint8 *in4, l_uint8 *out3); | |
| 75 | |
| 76 /* Ascii85 encoding */ | |
| 77 static const l_int32 MAX_ASCII85_LINE = 64; /* max line length ascii85 */ | |
| 78 static const l_uint32 power85[5] = {1, | |
| 79 85, | |
| 80 85 * 85, | |
| 81 85 * 85 * 85, | |
| 82 85 * 85 * 85 * 85}; | |
| 83 | |
| 84 static l_int32 convertChunkToAscii85(const l_uint8 *inarray, size_t insize, | |
| 85 l_int32 *pindex, char *outbuf, | |
| 86 l_int32 *pnbout); | |
| 87 | |
| 88 /*-------------------------------------------------------------* | |
| 89 * Utility for encoding and decoding data with base64 * | |
| 90 *-------------------------------------------------------------*/ | |
| 91 /*! | |
| 92 * \brief encodeBase64() | |
| 93 * | |
| 94 * \param[in] inarray input binary data | |
| 95 * \param[in] insize number of bytes in input array | |
| 96 * \param[out] poutsize number of bytes in output char array | |
| 97 * \return chara with MAX_BASE64_LINE characters + \n in each line | |
| 98 * | |
| 99 * <pre> | |
| 100 * Notes: | |
| 101 * (1) The input character data is unrestricted binary. | |
| 102 * The output encoded data consists of the 64 characters | |
| 103 * in the base64 set, plus newlines and the pad character '='. | |
| 104 * </pre> | |
| 105 */ | |
| 106 char * | |
| 107 encodeBase64(const l_uint8 *inarray, | |
| 108 l_int32 insize, | |
| 109 l_int32 *poutsize) | |
| 110 { | |
| 111 char *chara; | |
| 112 const l_uint8 *bytea; | |
| 113 l_uint8 array3[3], array4[4]; | |
| 114 l_int32 outsize, i, j, index, linecount; | |
| 115 | |
| 116 if (!poutsize) | |
| 117 return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 118 *poutsize = 0; | |
| 119 if (!inarray) | |
| 120 return (char *)ERROR_PTR("inarray not defined", __func__, NULL); | |
| 121 if (insize <= 0) | |
| 122 return (char *)ERROR_PTR("insize not > 0", __func__, NULL); | |
| 123 | |
| 124 /* The output array is padded to a multiple of 4 bytes, not | |
| 125 * counting the newlines. We just need to allocate a large | |
| 126 * enough array, and add 4 bytes to make sure it is big enough. */ | |
| 127 outsize = 4 * ((insize + 2) / 3); /* without newlines */ | |
| 128 outsize += outsize / MAX_BASE64_LINE + 4; /* with the newlines */ | |
| 129 if ((chara = (char *)LEPT_CALLOC(outsize, sizeof(char))) == NULL) | |
| 130 return (char *)ERROR_PTR("chara not made", __func__, NULL); | |
| 131 | |
| 132 /* Read all the input data, and convert in sets of 3 input | |
| 133 * bytes --> 4 output bytes. */ | |
| 134 i = index = linecount = 0; | |
| 135 bytea = inarray; | |
| 136 while (insize--) { | |
| 137 if (linecount == MAX_BASE64_LINE) { | |
| 138 chara[index++] = '\n'; | |
| 139 linecount = 0; | |
| 140 } | |
| 141 array3[i++] = *bytea++; | |
| 142 if (i == 3) { /* convert 3 to 4 and save */ | |
| 143 byteConvert3to4(array3, array4); | |
| 144 for (j = 0; j < 4; j++) | |
| 145 chara[index++] = tablechar64[array4[j]]; | |
| 146 i = 0; | |
| 147 linecount += 4; | |
| 148 } | |
| 149 } | |
| 150 | |
| 151 /* Suppose 1 or 2 bytes has been read but not yet processed. | |
| 152 * If 1 byte has been read, this will generate 2 bytes of | |
| 153 * output, with 6 bits to the first byte and 2 bits to the second. | |
| 154 * We will add two bytes of '=' for padding. | |
| 155 * If 2 bytes has been read, this will generate 3 bytes of output, | |
| 156 * with 6 bits to the first 2 bytes and 4 bits to the third, and | |
| 157 * we add a fourth padding byte ('='). */ | |
| 158 if (i > 0) { /* left-over 1 or 2 input bytes */ | |
| 159 for (j = i; j < 3; j++) | |
| 160 array3[j] = '\0'; /* zero the remaining input bytes */ | |
| 161 byteConvert3to4(array3, array4); | |
| 162 for (j = 0; j <= i; j++) | |
| 163 chara[index++] = tablechar64[array4[j]]; | |
| 164 for (j = i + 1; j < 4; j++) | |
| 165 chara[index++] = '='; | |
| 166 } | |
| 167 *poutsize = index; | |
| 168 | |
| 169 return chara; | |
| 170 } | |
| 171 | |
| 172 | |
| 173 /*! | |
| 174 * \brief decodeBase64() | |
| 175 * | |
| 176 * \param[in] inarray input encoded char data, with 72 chars/line) | |
| 177 * \param[in] insize number of bytes in input array | |
| 178 * \param[out] poutsize number of bytes in output byte array | |
| 179 * \return bytea decoded byte data, or NULL on error | |
| 180 * | |
| 181 * <pre> | |
| 182 * Notes: | |
| 183 * (1) The input character data should have only 66 different characters: | |
| 184 * The 64 character set for base64 encoding, plus the pad | |
| 185 * character '=' and newlines for formatting with fixed line | |
| 186 * lengths. If there are any other characters, the decoder | |
| 187 * will declare the input data to be invalid and return NULL. | |
| 188 * (2) The decoder ignores newlines and, for a valid input string, | |
| 189 * stops reading input when a pad byte is found. | |
| 190 * </pre> | |
| 191 */ | |
| 192 l_uint8 * | |
| 193 decodeBase64(const char *inarray, | |
| 194 l_int32 insize, | |
| 195 l_int32 *poutsize) | |
| 196 { | |
| 197 char inchar; | |
| 198 l_uint8 *bytea; | |
| 199 l_uint8 array3[3], array4[4]; | |
| 200 l_int32 *rtable64; | |
| 201 l_int32 i, j, outsize, in_index, out_index; | |
| 202 | |
| 203 if (!poutsize) | |
| 204 return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 205 *poutsize = 0; | |
| 206 if (!inarray) | |
| 207 return (l_uint8 *)ERROR_PTR("inarray not defined", __func__, NULL); | |
| 208 if (insize <= 0) | |
| 209 return (l_uint8 *)ERROR_PTR("insize not > 0", __func__, NULL); | |
| 210 | |
| 211 /* Validate the input data */ | |
| 212 for (i = 0; i < insize; i++) { | |
| 213 inchar = inarray[i]; | |
| 214 if (inchar == '\n') continue; | |
| 215 if (isBase64(inchar) == 0 && inchar != '=') | |
| 216 return (l_uint8 *)ERROR_PTR("invalid char in inarray", | |
| 217 __func__, NULL); | |
| 218 } | |
| 219 | |
| 220 /* The input array typically is made with a newline every | |
| 221 * MAX_BASE64_LINE input bytes. However, as a printed string, the | |
| 222 * newlines would be stripped. So when we allocate the output | |
| 223 * array, assume the input array is all data, but strip | |
| 224 * out the newlines during decoding. This guarantees that | |
| 225 * the allocated array is large enough. */ | |
| 226 outsize = 3 * ((insize + 3) / 4) + 4; | |
| 227 if ((bytea = (l_uint8 *)LEPT_CALLOC(outsize, sizeof(l_uint8))) == NULL) | |
| 228 return (l_uint8 *)ERROR_PTR("bytea not made", __func__, NULL); | |
| 229 | |
| 230 /* The number of encoded input data bytes is always a multiple of 4. | |
| 231 * Read all the data, until you reach either the end or | |
| 232 * the first pad character '='. The data is processed in | |
| 233 * units of 4 input bytes, generating 3 output decoded bytes | |
| 234 * of binary data. Newlines are ignored. If there are no | |
| 235 * pad bytes, i == 0 at the end of this section. */ | |
| 236 rtable64 = genReverseTab64(); | |
| 237 i = in_index = out_index = 0; | |
| 238 for (in_index = 0; in_index < insize; in_index++) { | |
| 239 inchar = inarray[in_index]; | |
| 240 if (inchar == '\n') continue; | |
| 241 if (inchar == '=') break; | |
| 242 array4[i++] = rtable64[(unsigned char)inchar]; | |
| 243 if (i < 4) { | |
| 244 continue; | |
| 245 } else { /* i == 4; convert 4 to 3 and save */ | |
| 246 byteConvert4to3(array4, array3); | |
| 247 for (j = 0; j < 3; j++) | |
| 248 bytea[out_index++] = array3[j]; | |
| 249 i = 0; | |
| 250 } | |
| 251 } | |
| 252 | |
| 253 /* If i > 0, we ran into pad bytes ('='). If i == 2, there are | |
| 254 * two input pad bytes and one output data byte. If i == 3, | |
| 255 * there is one input pad byte and two output data bytes. */ | |
| 256 if (i > 0) { | |
| 257 for (j = i; j < 4; j++) | |
| 258 array4[j] = '\0'; /* zero the remaining input bytes */ | |
| 259 byteConvert4to3(array4, array3); | |
| 260 for (j = 0; j < i - 1; j++) | |
| 261 bytea[out_index++] = array3[j]; | |
| 262 } | |
| 263 *poutsize = out_index; | |
| 264 | |
| 265 LEPT_FREE(rtable64); | |
| 266 return bytea; | |
| 267 } | |
| 268 | |
| 269 | |
| 270 /*! | |
| 271 * \brief isBase64() | |
| 272 */ | |
| 273 static l_int32 | |
| 274 isBase64(char c) | |
| 275 { | |
| 276 return (isalnum(((int)c)) || ((c) == '+') || ((c) == '/')) ? 1 : 0; | |
| 277 } | |
| 278 | |
| 279 /*! | |
| 280 * \brief genReverseTab64() | |
| 281 */ | |
| 282 static l_int32 * | |
| 283 genReverseTab64(void) | |
| 284 { | |
| 285 l_int32 i; | |
| 286 l_int32 *rtable64; | |
| 287 | |
| 288 rtable64 = (l_int32 *)LEPT_CALLOC(128, sizeof(l_int32)); | |
| 289 for (i = 0; i < 64; i++) { | |
| 290 rtable64[(unsigned char)tablechar64[i]] = i; | |
| 291 } | |
| 292 return rtable64; | |
| 293 } | |
| 294 | |
| 295 /*! | |
| 296 * \brief byteConvert3to4() | |
| 297 */ | |
| 298 static void | |
| 299 byteConvert3to4(l_uint8 *in3, | |
| 300 l_uint8 *out4) | |
| 301 { | |
| 302 out4[0] = in3[0] >> 2; | |
| 303 out4[1] = ((in3[0] & 0x03) << 4) | (in3[1] >> 4); | |
| 304 out4[2] = ((in3[1] & 0x0f) << 2) | (in3[2] >> 6); | |
| 305 out4[3] = in3[2] & 0x3f; | |
| 306 return; | |
| 307 } | |
| 308 | |
| 309 /*! | |
| 310 * \brief byteConvert4to3() | |
| 311 */ | |
| 312 static void | |
| 313 byteConvert4to3(l_uint8 *in4, | |
| 314 l_uint8 *out3) | |
| 315 { | |
| 316 out3[0] = (in4[0] << 2) | (in4[1] >> 4); | |
| 317 out3[1] = ((in4[1] & 0x0f) << 4) | (in4[2] >> 2); | |
| 318 out3[2] = ((in4[2] & 0x03) << 6) | in4[3]; | |
| 319 return; | |
| 320 } | |
| 321 | |
| 322 | |
| 323 /*-------------------------------------------------------------* | |
| 324 * Utility for encoding and decoding data with ascii85 * | |
| 325 *-------------------------------------------------------------*/ | |
| 326 /*! | |
| 327 * \brief encodeAscii85() | |
| 328 * | |
| 329 * \param[in] inarray input data | |
| 330 * \param[in] insize number of bytes in input array | |
| 331 * \param[out] poutsize number of bytes in output char array | |
| 332 * \return chara with 64 characters + \n in each line | |
| 333 * | |
| 334 * <pre> | |
| 335 * Notes: | |
| 336 * (1) Ghostscript has a stack break if the last line of | |
| 337 * data only has a '>', so we avoid the problem by | |
| 338 * always putting '~>' on the last line. | |
| 339 * </pre> | |
| 340 */ | |
| 341 char * | |
| 342 encodeAscii85(const l_uint8 *inarray, | |
| 343 size_t insize, | |
| 344 size_t *poutsize) | |
| 345 { | |
| 346 char *chara; | |
| 347 char outbuf[8]; | |
| 348 l_int32 maxsize, i, index, linecount, nbout, eof; | |
| 349 size_t outindex; | |
| 350 | |
| 351 if (!poutsize) | |
| 352 return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 353 *poutsize = 0; | |
| 354 if (!inarray) | |
| 355 return (char *)ERROR_PTR("inarray not defined", __func__, NULL); | |
| 356 if (insize <= 0) | |
| 357 return (char *)ERROR_PTR("insize not > 0", __func__, NULL); | |
| 358 | |
| 359 /* Accumulate results in char array */ | |
| 360 maxsize = (l_int32)(80. + (insize * 5. / 4.) * | |
| 361 (1. + 2. / MAX_ASCII85_LINE)); | |
| 362 if ((chara = (char *)LEPT_CALLOC(maxsize, sizeof(char))) == NULL) | |
| 363 return (char *)ERROR_PTR("chara not made", __func__, NULL); | |
| 364 | |
| 365 linecount = 0; | |
| 366 index = 0; | |
| 367 outindex = 0; | |
| 368 while (1) { | |
| 369 eof = convertChunkToAscii85(inarray, insize, &index, outbuf, &nbout); | |
| 370 for (i = 0; i < nbout; i++) { | |
| 371 chara[outindex++] = outbuf[i]; | |
| 372 linecount++; | |
| 373 if (linecount >= MAX_ASCII85_LINE) { | |
| 374 chara[outindex++] = '\n'; | |
| 375 linecount = 0; | |
| 376 } | |
| 377 } | |
| 378 if (eof == TRUE) { | |
| 379 if (linecount != 0) | |
| 380 chara[outindex++] = '\n'; | |
| 381 chara[outindex++] = '~'; | |
| 382 chara[outindex++] = '>'; | |
| 383 chara[outindex++] = '\n'; | |
| 384 break; | |
| 385 } | |
| 386 } | |
| 387 | |
| 388 *poutsize = outindex; | |
| 389 return chara; | |
| 390 } | |
| 391 | |
| 392 | |
| 393 /*! | |
| 394 * \brief convertChunkToAscii85() | |
| 395 * | |
| 396 * \param[in] inarray input data | |
| 397 * \param[in] insize number of bytes in input array | |
| 398 * \param[out] pindex use and -- ptr | |
| 399 * \param[in] outbuf holds 8 ascii chars; we use no more than 7 | |
| 400 * \param[out] pnbsout number of bytes written to outbuf | |
| 401 * \return boolean for eof 0 if more data, 1 if end of file | |
| 402 * | |
| 403 * <pre> | |
| 404 * Notes: | |
| 405 * (1) Attempts to read 4 bytes and write 5. | |
| 406 * (2) Writes 1 byte if the value is 0. | |
| 407 * </pre> | |
| 408 */ | |
| 409 static l_int32 | |
| 410 convertChunkToAscii85(const l_uint8 *inarray, | |
| 411 size_t insize, | |
| 412 l_int32 *pindex, | |
| 413 char *outbuf, | |
| 414 l_int32 *pnbout) | |
| 415 { | |
| 416 l_uint8 inbyte; | |
| 417 l_uint32 inword, val; | |
| 418 l_int32 eof, index, nread, nbout, i; | |
| 419 | |
| 420 eof = FALSE; | |
| 421 index = *pindex; | |
| 422 nread = L_MIN(4, (insize - index)); | |
| 423 if (insize == index + nread) | |
| 424 eof = TRUE; | |
| 425 *pindex += nread; /* save new index */ | |
| 426 | |
| 427 /* Read input data and save in l_uint32 */ | |
| 428 inword = 0; | |
| 429 for (i = 0; i < nread; i++) { | |
| 430 inbyte = inarray[index + i]; | |
| 431 inword += (l_uint32)inbyte << (8 * (3 - i)); | |
| 432 } | |
| 433 | |
| 434 #if 0 | |
| 435 lept_stderr("index = %d, nread = %d\n", index, nread); | |
| 436 lept_stderr("inword = %x\n", inword); | |
| 437 lept_stderr("eof = %d\n", eof); | |
| 438 #endif | |
| 439 | |
| 440 /* Special case: output 1 byte only */ | |
| 441 if (inword == 0) { | |
| 442 outbuf[0] = 'z'; | |
| 443 nbout = 1; | |
| 444 } else { /* output nread + 1 bytes */ | |
| 445 for (i = 4; i >= 4 - nread; i--) { | |
| 446 val = inword / power85[i]; | |
| 447 outbuf[4 - i] = (l_uint8)(val + '!'); | |
| 448 inword -= val * power85[i]; | |
| 449 } | |
| 450 nbout = nread + 1; | |
| 451 } | |
| 452 *pnbout = nbout; | |
| 453 | |
| 454 return eof; | |
| 455 } | |
| 456 | |
| 457 | |
| 458 /*! | |
| 459 * \brief decodeAscii85() | |
| 460 * | |
| 461 * \param[in] inarray ascii85 input data | |
| 462 * \param[in] insize number of bytes in input array | |
| 463 * \param[out] poutsize number of bytes in output l_uint8 array | |
| 464 * \return outarray binary | |
| 465 * | |
| 466 * <pre> | |
| 467 * Notes: | |
| 468 * (1) We assume the data is properly encoded, so we do not check | |
| 469 * for invalid characters or the final '>' character. | |
| 470 * (2) We permit whitespace to be added to the encoding in an | |
| 471 * arbitrary way. | |
| 472 * </pre> | |
| 473 */ | |
| 474 l_uint8 * | |
| 475 decodeAscii85(const char *inarray, | |
| 476 size_t insize, | |
| 477 size_t *poutsize) | |
| 478 { | |
| 479 char inc; | |
| 480 const char *pin; | |
| 481 l_uint8 val; | |
| 482 l_uint8 *outa; | |
| 483 l_int32 maxsize, ocount, bytecount, index; | |
| 484 l_uint32 oword; | |
| 485 | |
| 486 if (!poutsize) | |
| 487 return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 488 *poutsize = 0; | |
| 489 if (!inarray) | |
| 490 return (l_uint8 *)ERROR_PTR("inarray not defined", __func__, NULL); | |
| 491 if (insize <= 0) | |
| 492 return (l_uint8 *)ERROR_PTR("insize not > 0", __func__, NULL); | |
| 493 | |
| 494 /* Accumulate results in outa */ | |
| 495 maxsize = (l_int32)(80. + (insize * 4. / 5.)); /* plenty big */ | |
| 496 if ((outa = (l_uint8 *)LEPT_CALLOC(maxsize, sizeof(l_uint8))) == NULL) | |
| 497 return (l_uint8 *)ERROR_PTR("outa not made", __func__, NULL); | |
| 498 | |
| 499 pin = inarray; | |
| 500 ocount = 0; /* byte index into outa */ | |
| 501 oword = 0; | |
| 502 for (index = 0, bytecount = 0; index < insize; index++, pin++) { | |
| 503 inc = *pin; | |
| 504 | |
| 505 if (inc == ' ' || inc == '\t' || inc == '\n' || | |
| 506 inc == '\f' || inc == '\r' || inc == '\v') /* ignore white space */ | |
| 507 continue; | |
| 508 | |
| 509 val = inc - '!'; | |
| 510 if (val < 85) { | |
| 511 oword = oword * 85 + val; | |
| 512 if (bytecount < 4) { | |
| 513 bytecount++; | |
| 514 } else { /* we have all 5 input chars for the oword */ | |
| 515 outa[ocount] = (oword >> 24) & 0xff; | |
| 516 outa[ocount + 1] = (oword >> 16) & 0xff; | |
| 517 outa[ocount + 2] = (oword >> 8) & 0xff; | |
| 518 outa[ocount + 3] = oword & 0xff; | |
| 519 ocount += 4; | |
| 520 bytecount = 0; | |
| 521 oword = 0; | |
| 522 } | |
| 523 } else if (inc == 'z' && bytecount == 0) { | |
| 524 outa[ocount] = 0; | |
| 525 outa[ocount + 1] = 0; | |
| 526 outa[ocount + 2] = 0; | |
| 527 outa[ocount + 3] = 0; | |
| 528 ocount += 4; | |
| 529 } else if (inc == '~') { /* end of data */ | |
| 530 L_INFO(" %d extra bytes output\n", __func__, bytecount - 1); | |
| 531 switch (bytecount) { | |
| 532 case 0: /* normal eof */ | |
| 533 case 1: /* error */ | |
| 534 break; | |
| 535 case 2: /* 1 extra byte */ | |
| 536 oword = oword * power85[3] + 0xffffff; | |
| 537 outa[ocount] = (oword >> 24) & 0xff; | |
| 538 break; | |
| 539 case 3: /* 2 extra bytes */ | |
| 540 oword = oword * power85[2] + 0xffff; | |
| 541 outa[ocount] = (oword >> 24) & 0xff; | |
| 542 outa[ocount + 1] = (oword >> 16) & 0xff; | |
| 543 break; | |
| 544 case 4: /* 3 extra bytes */ | |
| 545 oword = oword * 85 + 0xff; | |
| 546 outa[ocount] = (oword >> 24) & 0xff; | |
| 547 outa[ocount + 1] = (oword >> 16) & 0xff; | |
| 548 outa[ocount + 2] = (oword >> 8) & 0xff; | |
| 549 break; | |
| 550 } | |
| 551 if (bytecount > 1) | |
| 552 ocount += (bytecount - 1); | |
| 553 break; | |
| 554 } | |
| 555 } | |
| 556 *poutsize = ocount; | |
| 557 | |
| 558 return outa; | |
| 559 } | |
| 560 | |
| 561 | |
| 562 /*! | |
| 563 * \brief encodeAscii85WithComp) | |
| 564 * | |
| 565 * \param[in] indata input binary data | |
| 566 * \param[in] insize number of bytes in input data | |
| 567 * \param[out] poutsize number of bytes in output string | |
| 568 * \return outstr with 64 characters + \n in each line | |
| 569 * | |
| 570 * <pre> | |
| 571 * Notes: | |
| 572 * (1) Compress the input data; then encode ascii85. For ascii | |
| 573 * input, a first compression step will significantly reduce | |
| 574 * the final encoded output size. | |
| 575 * </pre> | |
| 576 */ | |
| 577 char * | |
| 578 encodeAscii85WithComp(const l_uint8 *indata, | |
| 579 size_t insize, | |
| 580 size_t *poutsize) | |
| 581 { | |
| 582 char *outstr; | |
| 583 size_t size1; | |
| 584 l_uint8 *data1; | |
| 585 | |
| 586 if (!poutsize) | |
| 587 return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 588 *poutsize = 0; | |
| 589 if (!indata) | |
| 590 return (char *)ERROR_PTR("indata not defined", __func__, NULL); | |
| 591 | |
| 592 if ((data1 = zlibCompress(indata, insize, &size1)) == NULL) | |
| 593 return (char *)ERROR_PTR("data1 not made", __func__, NULL); | |
| 594 outstr = encodeAscii85(data1, size1, poutsize); | |
| 595 LEPT_FREE(data1); | |
| 596 return outstr; | |
| 597 } | |
| 598 | |
| 599 | |
| 600 /*! | |
| 601 * \brief decodeAscii85WithComp() | |
| 602 * | |
| 603 * \param[in] instr ascii85 input data string | |
| 604 * \param[in] insize number of bytes in input data | |
| 605 * \param[out] poutsize number of bytes in output binary data | |
| 606 * \return outdata binary data before compression and ascii85 encoding | |
| 607 * | |
| 608 * <pre> | |
| 609 * Notes: | |
| 610 * (1) We assume the input data has been zlib compressed and then | |
| 611 * properly encoded, so we reverse the procedure. This is the | |
| 612 * inverse of encodeAscii85WithComp(). | |
| 613 * (2) Set %insize == 0 to use strlen(%instr). | |
| 614 * </pre> | |
| 615 */ | |
| 616 l_uint8 * | |
| 617 decodeAscii85WithComp(const char *instr, | |
| 618 size_t insize, | |
| 619 size_t *poutsize) | |
| 620 { | |
| 621 size_t size1; | |
| 622 l_uint8 *data1, *outdata; | |
| 623 | |
| 624 if (!poutsize) | |
| 625 return (l_uint8 *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 626 *poutsize = 0; | |
| 627 if (!instr) | |
| 628 return (l_uint8 *)ERROR_PTR("instr not defined", __func__, NULL); | |
| 629 | |
| 630 if (insize == 0) insize = strlen(instr); | |
| 631 if ((data1 = decodeAscii85(instr, insize, &size1)) == NULL) | |
| 632 return (l_uint8 *)ERROR_PTR("data1 not made", __func__, NULL); | |
| 633 outdata = zlibUncompress(data1, size1, poutsize); | |
| 634 LEPT_FREE(data1); | |
| 635 return outdata; | |
| 636 } | |
| 637 | |
| 638 | |
| 639 /*-------------------------------------------------------------* | |
| 640 * String reformatting for base 64 encoded data * | |
| 641 *-------------------------------------------------------------*/ | |
| 642 /*! | |
| 643 * \brief reformatPacked64() | |
| 644 * | |
| 645 * \param[in] inarray base64 encoded string with newlines | |
| 646 * \param[in] insize number of bytes in input array | |
| 647 * \param[in] leadspace number of spaces in each line before the data | |
| 648 * \param[in] linechars number of bytes of data in each line; multiple of 4 | |
| 649 * \param[in] addquotes 1 to add quotes to each line of data; 0 to skip | |
| 650 * \param[out] poutsize number of bytes in output char array | |
| 651 * \return outarray ascii | |
| 652 * | |
| 653 * <pre> | |
| 654 * Notes: | |
| 655 * (1) Each line in the output array has %leadspace space characters, | |
| 656 * followed optionally by a double-quote, followed by %linechars | |
| 657 * bytes of base64 data, followed optionally by a double-quote, | |
| 658 * followed by a newline. | |
| 659 * (2) This can be used to convert a base64 encoded string to a | |
| 660 * string formatted for inclusion in a C source file. | |
| 661 * </pre> | |
| 662 */ | |
| 663 char * | |
| 664 reformatPacked64(const char *inarray, | |
| 665 l_int32 insize, | |
| 666 l_int32 leadspace, | |
| 667 l_int32 linechars, | |
| 668 l_int32 addquotes, | |
| 669 l_int32 *poutsize) | |
| 670 { | |
| 671 char *flata, *outa; | |
| 672 l_int32 i, j, flatindex, flatsize, outindex, nlines, linewithpad, linecount; | |
| 673 | |
| 674 if (!poutsize) | |
| 675 return (char *)ERROR_PTR("&outsize not defined", __func__, NULL); | |
| 676 *poutsize = 0; | |
| 677 if (!inarray) | |
| 678 return (char *)ERROR_PTR("inarray not defined", __func__, NULL); | |
| 679 if (insize <= 0) | |
| 680 return (char *)ERROR_PTR("insize not > 0", __func__, NULL); | |
| 681 if (leadspace < 0) | |
| 682 return (char *)ERROR_PTR("leadspace must be >= 0", __func__, NULL); | |
| 683 if (linechars % 4) | |
| 684 return (char *)ERROR_PTR("linechars % 4 must be 0", __func__, NULL); | |
| 685 | |
| 686 /* Remove all white space */ | |
| 687 if ((flata = (char *)LEPT_CALLOC(insize, sizeof(char))) == NULL) | |
| 688 return (char *)ERROR_PTR("flata not made", __func__, NULL); | |
| 689 for (i = 0, flatindex = 0; i < insize; i++) { | |
| 690 if (isBase64(inarray[i]) || inarray[i] == '=') | |
| 691 flata[flatindex++] = inarray[i]; | |
| 692 } | |
| 693 | |
| 694 /* Generate output string */ | |
| 695 flatsize = flatindex; | |
| 696 nlines = (flatsize + linechars - 1) / linechars; | |
| 697 linewithpad = leadspace + linechars + 1; /* including newline */ | |
| 698 if (addquotes) linewithpad += 2; | |
| 699 if ((outa = (char *)LEPT_CALLOC((size_t)nlines * linewithpad, | |
| 700 sizeof(char))) == NULL) { | |
| 701 LEPT_FREE(flata); | |
| 702 return (char *)ERROR_PTR("outa not made", __func__, NULL); | |
| 703 } | |
| 704 for (j = 0, outindex = 0; j < leadspace; j++) | |
| 705 outa[outindex++] = ' '; | |
| 706 if (addquotes) outa[outindex++] = '"'; | |
| 707 for (i = 0, linecount = 0; i < flatsize; i++) { | |
| 708 if (linecount == linechars) { | |
| 709 if (addquotes) outa[outindex++] = '"'; | |
| 710 outa[outindex++] = '\n'; | |
| 711 for (j = 0; j < leadspace; j++) | |
| 712 outa[outindex++] = ' '; | |
| 713 if (addquotes) outa[outindex++] = '"'; | |
| 714 linecount = 0; | |
| 715 } | |
| 716 outa[outindex++] = flata[i]; | |
| 717 linecount++; | |
| 718 } | |
| 719 if (addquotes) outa[outindex++] = '"'; | |
| 720 *poutsize = outindex; | |
| 721 | |
| 722 LEPT_FREE(flata); | |
| 723 return outa; | |
| 724 } |
