Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/zlib/crc32.c @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* crc32.c -- compute the CRC-32 of a data stream | |
| 2 * Copyright (C) 1995-2022 Mark Adler | |
| 3 * For conditions of distribution and use, see copyright notice in zlib.h | |
| 4 * | |
| 5 * This interleaved implementation of a CRC makes use of pipelined multiple | |
| 6 * arithmetic-logic units, commonly found in modern CPU cores. It is due to | |
| 7 * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. | |
| 8 */ | |
| 9 | |
| 10 /* @(#) $Id$ */ | |
| 11 | |
| 12 /* | |
| 13 Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore | |
| 14 protection on the static variables used to control the first-use generation | |
| 15 of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should | |
| 16 first call get_crc_table() to initialize the tables before allowing more than | |
| 17 one thread to use crc32(). | |
| 18 | |
| 19 MAKECRCH can be #defined to write out crc32.h. A main() routine is also | |
| 20 produced, so that this one source file can be compiled to an executable. | |
| 21 */ | |
| 22 | |
| 23 #ifdef MAKECRCH | |
| 24 # include <stdio.h> | |
| 25 # ifndef DYNAMIC_CRC_TABLE | |
| 26 # define DYNAMIC_CRC_TABLE | |
| 27 # endif /* !DYNAMIC_CRC_TABLE */ | |
| 28 #endif /* MAKECRCH */ | |
| 29 | |
| 30 #include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ | |
| 31 | |
| 32 /* | |
| 33 A CRC of a message is computed on N braids of words in the message, where | |
| 34 each word consists of W bytes (4 or 8). If N is 3, for example, then three | |
| 35 running sparse CRCs are calculated respectively on each braid, at these | |
| 36 indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... | |
| 37 This is done starting at a word boundary, and continues until as many blocks | |
| 38 of N * W bytes as are available have been processed. The results are combined | |
| 39 into a single CRC at the end. For this code, N must be in the range 1..6 and | |
| 40 W must be 4 or 8. The upper limit on N can be increased if desired by adding | |
| 41 more #if blocks, extending the patterns apparent in the code. In addition, | |
| 42 crc32.h would need to be regenerated, if the maximum N value is increased. | |
| 43 | |
| 44 N and W are chosen empirically by benchmarking the execution time on a given | |
| 45 processor. The choices for N and W below were based on testing on Intel Kaby | |
| 46 Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 | |
| 47 Octeon II processors. The Intel, AMD, and ARM processors were all fastest | |
| 48 with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. | |
| 49 They were all tested with either gcc or clang, all using the -O3 optimization | |
| 50 level. Your mileage may vary. | |
| 51 */ | |
| 52 | |
| 53 /* Define N */ | |
| 54 #ifdef Z_TESTN | |
| 55 # define N Z_TESTN | |
| 56 #else | |
| 57 # define N 5 | |
| 58 #endif | |
| 59 #if N < 1 || N > 6 | |
| 60 # error N must be in 1..6 | |
| 61 #endif | |
| 62 | |
| 63 /* | |
| 64 z_crc_t must be at least 32 bits. z_word_t must be at least as long as | |
| 65 z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and | |
| 66 that bytes are eight bits. | |
| 67 */ | |
| 68 | |
| 69 /* | |
| 70 Define W and the associated z_word_t type. If W is not defined, then a | |
| 71 braided calculation is not used, and the associated tables and code are not | |
| 72 compiled. | |
| 73 */ | |
| 74 #ifdef Z_TESTW | |
| 75 # if Z_TESTW-1 != -1 | |
| 76 # define W Z_TESTW | |
| 77 # endif | |
| 78 #else | |
| 79 # ifdef MAKECRCH | |
| 80 # define W 8 /* required for MAKECRCH */ | |
| 81 # else | |
| 82 # if defined(__x86_64__) || defined(__aarch64__) | |
| 83 # define W 8 | |
| 84 # else | |
| 85 # define W 4 | |
| 86 # endif | |
| 87 # endif | |
| 88 #endif | |
| 89 #ifdef W | |
| 90 # if W == 8 && defined(Z_U8) | |
| 91 typedef Z_U8 z_word_t; | |
| 92 # elif defined(Z_U4) | |
| 93 # undef W | |
| 94 # define W 4 | |
| 95 typedef Z_U4 z_word_t; | |
| 96 # else | |
| 97 # undef W | |
| 98 # endif | |
| 99 #endif | |
| 100 | |
| 101 /* If available, use the ARM processor CRC32 instruction. */ | |
| 102 #if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 | |
| 103 # define ARMCRC32 | |
| 104 #endif | |
| 105 | |
| 106 #if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) | |
| 107 /* | |
| 108 Swap the bytes in a z_word_t to convert between little and big endian. Any | |
| 109 self-respecting compiler will optimize this to a single machine byte-swap | |
| 110 instruction, if one is available. This assumes that word_t is either 32 bits | |
| 111 or 64 bits. | |
| 112 */ | |
| 113 local z_word_t byte_swap(z_word_t word) { | |
| 114 # if W == 8 | |
| 115 return | |
| 116 (word & 0xff00000000000000) >> 56 | | |
| 117 (word & 0xff000000000000) >> 40 | | |
| 118 (word & 0xff0000000000) >> 24 | | |
| 119 (word & 0xff00000000) >> 8 | | |
| 120 (word & 0xff000000) << 8 | | |
| 121 (word & 0xff0000) << 24 | | |
| 122 (word & 0xff00) << 40 | | |
| 123 (word & 0xff) << 56; | |
| 124 # else /* W == 4 */ | |
| 125 return | |
| 126 (word & 0xff000000) >> 24 | | |
| 127 (word & 0xff0000) >> 8 | | |
| 128 (word & 0xff00) << 8 | | |
| 129 (word & 0xff) << 24; | |
| 130 # endif | |
| 131 } | |
| 132 #endif | |
| 133 | |
| 134 #ifdef DYNAMIC_CRC_TABLE | |
| 135 /* ========================================================================= | |
| 136 * Table of powers of x for combining CRC-32s, filled in by make_crc_table() | |
| 137 * below. | |
| 138 */ | |
| 139 local z_crc_t FAR x2n_table[32]; | |
| 140 #else | |
| 141 /* ========================================================================= | |
| 142 * Tables for byte-wise and braided CRC-32 calculations, and a table of powers | |
| 143 * of x for combining CRC-32s, all made by make_crc_table(). | |
| 144 */ | |
| 145 # include "crc32.h" | |
| 146 #endif | |
| 147 | |
| 148 /* CRC polynomial. */ | |
| 149 #define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ | |
| 150 | |
| 151 /* | |
| 152 Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, | |
| 153 reflected. For speed, this requires that a not be zero. | |
| 154 */ | |
| 155 local z_crc_t multmodp(z_crc_t a, z_crc_t b) { | |
| 156 z_crc_t m, p; | |
| 157 | |
| 158 m = (z_crc_t)1 << 31; | |
| 159 p = 0; | |
| 160 for (;;) { | |
| 161 if (a & m) { | |
| 162 p ^= b; | |
| 163 if ((a & (m - 1)) == 0) | |
| 164 break; | |
| 165 } | |
| 166 m >>= 1; | |
| 167 b = b & 1 ? (b >> 1) ^ POLY : b >> 1; | |
| 168 } | |
| 169 return p; | |
| 170 } | |
| 171 | |
| 172 /* | |
| 173 Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been | |
| 174 initialized. | |
| 175 */ | |
| 176 local z_crc_t x2nmodp(z_off64_t n, unsigned k) { | |
| 177 z_crc_t p; | |
| 178 | |
| 179 p = (z_crc_t)1 << 31; /* x^0 == 1 */ | |
| 180 while (n) { | |
| 181 if (n & 1) | |
| 182 p = multmodp(x2n_table[k & 31], p); | |
| 183 n >>= 1; | |
| 184 k++; | |
| 185 } | |
| 186 return p; | |
| 187 } | |
| 188 | |
| 189 #ifdef DYNAMIC_CRC_TABLE | |
| 190 /* ========================================================================= | |
| 191 * Build the tables for byte-wise and braided CRC-32 calculations, and a table | |
| 192 * of powers of x for combining CRC-32s. | |
| 193 */ | |
| 194 local z_crc_t FAR crc_table[256]; | |
| 195 #ifdef W | |
| 196 local z_word_t FAR crc_big_table[256]; | |
| 197 local z_crc_t FAR crc_braid_table[W][256]; | |
| 198 local z_word_t FAR crc_braid_big_table[W][256]; | |
| 199 local void braid(z_crc_t [][256], z_word_t [][256], int, int); | |
| 200 #endif | |
| 201 #ifdef MAKECRCH | |
| 202 local void write_table(FILE *, const z_crc_t FAR *, int); | |
| 203 local void write_table32hi(FILE *, const z_word_t FAR *, int); | |
| 204 local void write_table64(FILE *, const z_word_t FAR *, int); | |
| 205 #endif /* MAKECRCH */ | |
| 206 | |
| 207 /* | |
| 208 Define a once() function depending on the availability of atomics. If this is | |
| 209 compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in | |
| 210 multiple threads, and if atomics are not available, then get_crc_table() must | |
| 211 be called to initialize the tables and must return before any threads are | |
| 212 allowed to compute or combine CRCs. | |
| 213 */ | |
| 214 | |
| 215 /* Definition of once functionality. */ | |
| 216 typedef struct once_s once_t; | |
| 217 | |
| 218 /* Check for the availability of atomics. */ | |
| 219 #if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ | |
| 220 !defined(__STDC_NO_ATOMICS__) | |
| 221 | |
| 222 #include <stdatomic.h> | |
| 223 | |
| 224 /* Structure for once(), which must be initialized with ONCE_INIT. */ | |
| 225 struct once_s { | |
| 226 atomic_flag begun; | |
| 227 atomic_int done; | |
| 228 }; | |
| 229 #define ONCE_INIT {ATOMIC_FLAG_INIT, 0} | |
| 230 | |
| 231 /* | |
| 232 Run the provided init() function exactly once, even if multiple threads | |
| 233 invoke once() at the same time. The state must be a once_t initialized with | |
| 234 ONCE_INIT. | |
| 235 */ | |
| 236 local void once(once_t *state, void (*init)(void)) { | |
| 237 if (!atomic_load(&state->done)) { | |
| 238 if (atomic_flag_test_and_set(&state->begun)) | |
| 239 while (!atomic_load(&state->done)) | |
| 240 ; | |
| 241 else { | |
| 242 init(); | |
| 243 atomic_store(&state->done, 1); | |
| 244 } | |
| 245 } | |
| 246 } | |
| 247 | |
| 248 #else /* no atomics */ | |
| 249 | |
| 250 /* Structure for once(), which must be initialized with ONCE_INIT. */ | |
| 251 struct once_s { | |
| 252 volatile int begun; | |
| 253 volatile int done; | |
| 254 }; | |
| 255 #define ONCE_INIT {0, 0} | |
| 256 | |
| 257 /* Test and set. Alas, not atomic, but tries to minimize the period of | |
| 258 vulnerability. */ | |
| 259 local int test_and_set(int volatile *flag) { | |
| 260 int was; | |
| 261 | |
| 262 was = *flag; | |
| 263 *flag = 1; | |
| 264 return was; | |
| 265 } | |
| 266 | |
| 267 /* Run the provided init() function once. This is not thread-safe. */ | |
| 268 local void once(once_t *state, void (*init)(void)) { | |
| 269 if (!state->done) { | |
| 270 if (test_and_set(&state->begun)) | |
| 271 while (!state->done) | |
| 272 ; | |
| 273 else { | |
| 274 init(); | |
| 275 state->done = 1; | |
| 276 } | |
| 277 } | |
| 278 } | |
| 279 | |
| 280 #endif | |
| 281 | |
| 282 /* State for once(). */ | |
| 283 local once_t made = ONCE_INIT; | |
| 284 | |
| 285 /* | |
| 286 Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: | |
| 287 x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. | |
| 288 | |
| 289 Polynomials over GF(2) are represented in binary, one bit per coefficient, | |
| 290 with the lowest powers in the most significant bit. Then adding polynomials | |
| 291 is just exclusive-or, and multiplying a polynomial by x is a right shift by | |
| 292 one. If we call the above polynomial p, and represent a byte as the | |
| 293 polynomial q, also with the lowest power in the most significant bit (so the | |
| 294 byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, | |
| 295 where a mod b means the remainder after dividing a by b. | |
| 296 | |
| 297 This calculation is done using the shift-register method of multiplying and | |
| 298 taking the remainder. The register is initialized to zero, and for each | |
| 299 incoming bit, x^32 is added mod p to the register if the bit is a one (where | |
| 300 x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x | |
| 301 (which is shifting right by one and adding x^32 mod p if the bit shifted out | |
| 302 is a one). We start with the highest power (least significant bit) of q and | |
| 303 repeat for all eight bits of q. | |
| 304 | |
| 305 The table is simply the CRC of all possible eight bit values. This is all the | |
| 306 information needed to generate CRCs on data a byte at a time for all | |
| 307 combinations of CRC register values and incoming bytes. | |
| 308 */ | |
| 309 | |
| 310 local void make_crc_table(void) { | |
| 311 unsigned i, j, n; | |
| 312 z_crc_t p; | |
| 313 | |
| 314 /* initialize the CRC of bytes tables */ | |
| 315 for (i = 0; i < 256; i++) { | |
| 316 p = i; | |
| 317 for (j = 0; j < 8; j++) | |
| 318 p = p & 1 ? (p >> 1) ^ POLY : p >> 1; | |
| 319 crc_table[i] = p; | |
| 320 #ifdef W | |
| 321 crc_big_table[i] = byte_swap(p); | |
| 322 #endif | |
| 323 } | |
| 324 | |
| 325 /* initialize the x^2^n mod p(x) table */ | |
| 326 p = (z_crc_t)1 << 30; /* x^1 */ | |
| 327 x2n_table[0] = p; | |
| 328 for (n = 1; n < 32; n++) | |
| 329 x2n_table[n] = p = multmodp(p, p); | |
| 330 | |
| 331 #ifdef W | |
| 332 /* initialize the braiding tables -- needs x2n_table[] */ | |
| 333 braid(crc_braid_table, crc_braid_big_table, N, W); | |
| 334 #endif | |
| 335 | |
| 336 #ifdef MAKECRCH | |
| 337 { | |
| 338 /* | |
| 339 The crc32.h header file contains tables for both 32-bit and 64-bit | |
| 340 z_word_t's, and so requires a 64-bit type be available. In that case, | |
| 341 z_word_t must be defined to be 64-bits. This code then also generates | |
| 342 and writes out the tables for the case that z_word_t is 32 bits. | |
| 343 */ | |
| 344 #if !defined(W) || W != 8 | |
| 345 # error Need a 64-bit integer type in order to generate crc32.h. | |
| 346 #endif | |
| 347 FILE *out; | |
| 348 int k, n; | |
| 349 z_crc_t ltl[8][256]; | |
| 350 z_word_t big[8][256]; | |
| 351 | |
| 352 out = fopen("crc32.h", "w"); | |
| 353 if (out == NULL) return; | |
| 354 | |
| 355 /* write out little-endian CRC table to crc32.h */ | |
| 356 fprintf(out, | |
| 357 "/* crc32.h -- tables for rapid CRC calculation\n" | |
| 358 " * Generated automatically by crc32.c\n */\n" | |
| 359 "\n" | |
| 360 "local const z_crc_t FAR crc_table[] = {\n" | |
| 361 " "); | |
| 362 write_table(out, crc_table, 256); | |
| 363 fprintf(out, | |
| 364 "};\n"); | |
| 365 | |
| 366 /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ | |
| 367 fprintf(out, | |
| 368 "\n" | |
| 369 "#ifdef W\n" | |
| 370 "\n" | |
| 371 "#if W == 8\n" | |
| 372 "\n" | |
| 373 "local const z_word_t FAR crc_big_table[] = {\n" | |
| 374 " "); | |
| 375 write_table64(out, crc_big_table, 256); | |
| 376 fprintf(out, | |
| 377 "};\n"); | |
| 378 | |
| 379 /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ | |
| 380 fprintf(out, | |
| 381 "\n" | |
| 382 "#else /* W == 4 */\n" | |
| 383 "\n" | |
| 384 "local const z_word_t FAR crc_big_table[] = {\n" | |
| 385 " "); | |
| 386 write_table32hi(out, crc_big_table, 256); | |
| 387 fprintf(out, | |
| 388 "};\n" | |
| 389 "\n" | |
| 390 "#endif\n"); | |
| 391 | |
| 392 /* write out braid tables for each value of N */ | |
| 393 for (n = 1; n <= 6; n++) { | |
| 394 fprintf(out, | |
| 395 "\n" | |
| 396 "#if N == %d\n", n); | |
| 397 | |
| 398 /* compute braid tables for this N and 64-bit word_t */ | |
| 399 braid(ltl, big, n, 8); | |
| 400 | |
| 401 /* write out braid tables for 64-bit z_word_t to crc32.h */ | |
| 402 fprintf(out, | |
| 403 "\n" | |
| 404 "#if W == 8\n" | |
| 405 "\n" | |
| 406 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); | |
| 407 for (k = 0; k < 8; k++) { | |
| 408 fprintf(out, " {"); | |
| 409 write_table(out, ltl[k], 256); | |
| 410 fprintf(out, "}%s", k < 7 ? ",\n" : ""); | |
| 411 } | |
| 412 fprintf(out, | |
| 413 "};\n" | |
| 414 "\n" | |
| 415 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); | |
| 416 for (k = 0; k < 8; k++) { | |
| 417 fprintf(out, " {"); | |
| 418 write_table64(out, big[k], 256); | |
| 419 fprintf(out, "}%s", k < 7 ? ",\n" : ""); | |
| 420 } | |
| 421 fprintf(out, | |
| 422 "};\n"); | |
| 423 | |
| 424 /* compute braid tables for this N and 32-bit word_t */ | |
| 425 braid(ltl, big, n, 4); | |
| 426 | |
| 427 /* write out braid tables for 32-bit z_word_t to crc32.h */ | |
| 428 fprintf(out, | |
| 429 "\n" | |
| 430 "#else /* W == 4 */\n" | |
| 431 "\n" | |
| 432 "local const z_crc_t FAR crc_braid_table[][256] = {\n"); | |
| 433 for (k = 0; k < 4; k++) { | |
| 434 fprintf(out, " {"); | |
| 435 write_table(out, ltl[k], 256); | |
| 436 fprintf(out, "}%s", k < 3 ? ",\n" : ""); | |
| 437 } | |
| 438 fprintf(out, | |
| 439 "};\n" | |
| 440 "\n" | |
| 441 "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); | |
| 442 for (k = 0; k < 4; k++) { | |
| 443 fprintf(out, " {"); | |
| 444 write_table32hi(out, big[k], 256); | |
| 445 fprintf(out, "}%s", k < 3 ? ",\n" : ""); | |
| 446 } | |
| 447 fprintf(out, | |
| 448 "};\n" | |
| 449 "\n" | |
| 450 "#endif\n" | |
| 451 "\n" | |
| 452 "#endif\n"); | |
| 453 } | |
| 454 fprintf(out, | |
| 455 "\n" | |
| 456 "#endif\n"); | |
| 457 | |
| 458 /* write out zeros operator table to crc32.h */ | |
| 459 fprintf(out, | |
| 460 "\n" | |
| 461 "local const z_crc_t FAR x2n_table[] = {\n" | |
| 462 " "); | |
| 463 write_table(out, x2n_table, 32); | |
| 464 fprintf(out, | |
| 465 "};\n"); | |
| 466 fclose(out); | |
| 467 } | |
| 468 #endif /* MAKECRCH */ | |
| 469 } | |
| 470 | |
| 471 #ifdef MAKECRCH | |
| 472 | |
| 473 /* | |
| 474 Write the 32-bit values in table[0..k-1] to out, five per line in | |
| 475 hexadecimal separated by commas. | |
| 476 */ | |
| 477 local void write_table(FILE *out, const z_crc_t FAR *table, int k) { | |
| 478 int n; | |
| 479 | |
| 480 for (n = 0; n < k; n++) | |
| 481 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", | |
| 482 (unsigned long)(table[n]), | |
| 483 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); | |
| 484 } | |
| 485 | |
| 486 /* | |
| 487 Write the high 32-bits of each value in table[0..k-1] to out, five per line | |
| 488 in hexadecimal separated by commas. | |
| 489 */ | |
| 490 local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { | |
| 491 int n; | |
| 492 | |
| 493 for (n = 0; n < k; n++) | |
| 494 fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", | |
| 495 (unsigned long)(table[n] >> 32), | |
| 496 n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); | |
| 497 } | |
| 498 | |
| 499 /* | |
| 500 Write the 64-bit values in table[0..k-1] to out, three per line in | |
| 501 hexadecimal separated by commas. This assumes that if there is a 64-bit | |
| 502 type, then there is also a long long integer type, and it is at least 64 | |
| 503 bits. If not, then the type cast and format string can be adjusted | |
| 504 accordingly. | |
| 505 */ | |
| 506 local void write_table64(FILE *out, const z_word_t FAR *table, int k) { | |
| 507 int n; | |
| 508 | |
| 509 for (n = 0; n < k; n++) | |
| 510 fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", | |
| 511 (unsigned long long)(table[n]), | |
| 512 n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); | |
| 513 } | |
| 514 | |
| 515 /* Actually do the deed. */ | |
| 516 int main(void) { | |
| 517 make_crc_table(); | |
| 518 return 0; | |
| 519 } | |
| 520 | |
| 521 #endif /* MAKECRCH */ | |
| 522 | |
| 523 #ifdef W | |
| 524 /* | |
| 525 Generate the little and big-endian braid tables for the given n and z_word_t | |
| 526 size w. Each array must have room for w blocks of 256 elements. | |
| 527 */ | |
| 528 local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { | |
| 529 int k; | |
| 530 z_crc_t i, p, q; | |
| 531 for (k = 0; k < w; k++) { | |
| 532 p = x2nmodp((n * w + 3 - k) << 3, 0); | |
| 533 ltl[k][0] = 0; | |
| 534 big[w - 1 - k][0] = 0; | |
| 535 for (i = 1; i < 256; i++) { | |
| 536 ltl[k][i] = q = multmodp(i << 24, p); | |
| 537 big[w - 1 - k][i] = byte_swap(q); | |
| 538 } | |
| 539 } | |
| 540 } | |
| 541 #endif | |
| 542 | |
| 543 #endif /* DYNAMIC_CRC_TABLE */ | |
| 544 | |
| 545 /* ========================================================================= | |
| 546 * This function can be used by asm versions of crc32(), and to force the | |
| 547 * generation of the CRC tables in a threaded application. | |
| 548 */ | |
| 549 const z_crc_t FAR * ZEXPORT get_crc_table(void) { | |
| 550 #ifdef DYNAMIC_CRC_TABLE | |
| 551 once(&made, make_crc_table); | |
| 552 #endif /* DYNAMIC_CRC_TABLE */ | |
| 553 return (const z_crc_t FAR *)crc_table; | |
| 554 } | |
| 555 | |
| 556 /* ========================================================================= | |
| 557 * Use ARM machine instructions if available. This will compute the CRC about | |
| 558 * ten times faster than the braided calculation. This code does not check for | |
| 559 * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will | |
| 560 * only be defined if the compilation specifies an ARM processor architecture | |
| 561 * that has the instructions. For example, compiling with -march=armv8.1-a or | |
| 562 * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 | |
| 563 * instructions. | |
| 564 */ | |
| 565 #ifdef ARMCRC32 | |
| 566 | |
| 567 /* | |
| 568 Constants empirically determined to maximize speed. These values are from | |
| 569 measurements on a Cortex-A57. Your mileage may vary. | |
| 570 */ | |
| 571 #define Z_BATCH 3990 /* number of words in a batch */ | |
| 572 #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ | |
| 573 #define Z_BATCH_MIN 800 /* fewest words in a final batch */ | |
| 574 | |
| 575 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, | |
| 576 z_size_t len) { | |
| 577 z_crc_t val; | |
| 578 z_word_t crc1, crc2; | |
| 579 const z_word_t *word; | |
| 580 z_word_t val0, val1, val2; | |
| 581 z_size_t last, last2, i; | |
| 582 z_size_t num; | |
| 583 | |
| 584 /* Return initial CRC, if requested. */ | |
| 585 if (buf == Z_NULL) return 0; | |
| 586 | |
| 587 #ifdef DYNAMIC_CRC_TABLE | |
| 588 once(&made, make_crc_table); | |
| 589 #endif /* DYNAMIC_CRC_TABLE */ | |
| 590 | |
| 591 /* Pre-condition the CRC */ | |
| 592 crc = (~crc) & 0xffffffff; | |
| 593 | |
| 594 /* Compute the CRC up to a word boundary. */ | |
| 595 while (len && ((z_size_t)buf & 7) != 0) { | |
| 596 len--; | |
| 597 val = *buf++; | |
| 598 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); | |
| 599 } | |
| 600 | |
| 601 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ | |
| 602 word = (z_word_t const *)buf; | |
| 603 num = len >> 3; | |
| 604 len &= 7; | |
| 605 | |
| 606 /* Do three interleaved CRCs to realize the throughput of one crc32x | |
| 607 instruction per cycle. Each CRC is calculated on Z_BATCH words. The | |
| 608 three CRCs are combined into a single CRC after each set of batches. */ | |
| 609 while (num >= 3 * Z_BATCH) { | |
| 610 crc1 = 0; | |
| 611 crc2 = 0; | |
| 612 for (i = 0; i < Z_BATCH; i++) { | |
| 613 val0 = word[i]; | |
| 614 val1 = word[i + Z_BATCH]; | |
| 615 val2 = word[i + 2 * Z_BATCH]; | |
| 616 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | |
| 617 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); | |
| 618 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); | |
| 619 } | |
| 620 word += 3 * Z_BATCH; | |
| 621 num -= 3 * Z_BATCH; | |
| 622 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; | |
| 623 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; | |
| 624 } | |
| 625 | |
| 626 /* Do one last smaller batch with the remaining words, if there are enough | |
| 627 to pay for the combination of CRCs. */ | |
| 628 last = num / 3; | |
| 629 if (last >= Z_BATCH_MIN) { | |
| 630 last2 = last << 1; | |
| 631 crc1 = 0; | |
| 632 crc2 = 0; | |
| 633 for (i = 0; i < last; i++) { | |
| 634 val0 = word[i]; | |
| 635 val1 = word[i + last]; | |
| 636 val2 = word[i + last2]; | |
| 637 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | |
| 638 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); | |
| 639 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); | |
| 640 } | |
| 641 word += 3 * last; | |
| 642 num -= 3 * last; | |
| 643 val = x2nmodp(last, 6); | |
| 644 crc = multmodp(val, crc) ^ crc1; | |
| 645 crc = multmodp(val, crc) ^ crc2; | |
| 646 } | |
| 647 | |
| 648 /* Compute the CRC on any remaining words. */ | |
| 649 for (i = 0; i < num; i++) { | |
| 650 val0 = word[i]; | |
| 651 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | |
| 652 } | |
| 653 word += num; | |
| 654 | |
| 655 /* Complete the CRC on any remaining bytes. */ | |
| 656 buf = (const unsigned char FAR *)word; | |
| 657 while (len) { | |
| 658 len--; | |
| 659 val = *buf++; | |
| 660 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); | |
| 661 } | |
| 662 | |
| 663 /* Return the CRC, post-conditioned. */ | |
| 664 return crc ^ 0xffffffff; | |
| 665 } | |
| 666 | |
| 667 #else | |
| 668 | |
| 669 #ifdef W | |
| 670 | |
| 671 /* | |
| 672 Return the CRC of the W bytes in the word_t data, taking the | |
| 673 least-significant byte of the word as the first byte of data, without any pre | |
| 674 or post conditioning. This is used to combine the CRCs of each braid. | |
| 675 */ | |
| 676 local z_crc_t crc_word(z_word_t data) { | |
| 677 int k; | |
| 678 for (k = 0; k < W; k++) | |
| 679 data = (data >> 8) ^ crc_table[data & 0xff]; | |
| 680 return (z_crc_t)data; | |
| 681 } | |
| 682 | |
| 683 local z_word_t crc_word_big(z_word_t data) { | |
| 684 int k; | |
| 685 for (k = 0; k < W; k++) | |
| 686 data = (data << 8) ^ | |
| 687 crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; | |
| 688 return data; | |
| 689 } | |
| 690 | |
| 691 #endif | |
| 692 | |
| 693 /* ========================================================================= */ | |
| 694 unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, | |
| 695 z_size_t len) { | |
| 696 /* Return initial CRC, if requested. */ | |
| 697 if (buf == Z_NULL) return 0; | |
| 698 | |
| 699 #ifdef DYNAMIC_CRC_TABLE | |
| 700 once(&made, make_crc_table); | |
| 701 #endif /* DYNAMIC_CRC_TABLE */ | |
| 702 | |
| 703 /* Pre-condition the CRC */ | |
| 704 crc = (~crc) & 0xffffffff; | |
| 705 | |
| 706 #ifdef W | |
| 707 | |
| 708 /* If provided enough bytes, do a braided CRC calculation. */ | |
| 709 if (len >= N * W + W - 1) { | |
| 710 z_size_t blks; | |
| 711 z_word_t const *words; | |
| 712 unsigned endian; | |
| 713 int k; | |
| 714 | |
| 715 /* Compute the CRC up to a z_word_t boundary. */ | |
| 716 while (len && ((z_size_t)buf & (W - 1)) != 0) { | |
| 717 len--; | |
| 718 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 719 } | |
| 720 | |
| 721 /* Compute the CRC on as many N z_word_t blocks as are available. */ | |
| 722 blks = len / (N * W); | |
| 723 len -= blks * N * W; | |
| 724 words = (z_word_t const *)buf; | |
| 725 | |
| 726 /* Do endian check at execution time instead of compile time, since ARM | |
| 727 processors can change the endianness at execution time. If the | |
| 728 compiler knows what the endianness will be, it can optimize out the | |
| 729 check and the unused branch. */ | |
| 730 endian = 1; | |
| 731 if (*(unsigned char *)&endian) { | |
| 732 /* Little endian. */ | |
| 733 | |
| 734 z_crc_t crc0; | |
| 735 z_word_t word0; | |
| 736 #if N > 1 | |
| 737 z_crc_t crc1; | |
| 738 z_word_t word1; | |
| 739 #if N > 2 | |
| 740 z_crc_t crc2; | |
| 741 z_word_t word2; | |
| 742 #if N > 3 | |
| 743 z_crc_t crc3; | |
| 744 z_word_t word3; | |
| 745 #if N > 4 | |
| 746 z_crc_t crc4; | |
| 747 z_word_t word4; | |
| 748 #if N > 5 | |
| 749 z_crc_t crc5; | |
| 750 z_word_t word5; | |
| 751 #endif | |
| 752 #endif | |
| 753 #endif | |
| 754 #endif | |
| 755 #endif | |
| 756 | |
| 757 /* Initialize the CRC for each braid. */ | |
| 758 crc0 = crc; | |
| 759 #if N > 1 | |
| 760 crc1 = 0; | |
| 761 #if N > 2 | |
| 762 crc2 = 0; | |
| 763 #if N > 3 | |
| 764 crc3 = 0; | |
| 765 #if N > 4 | |
| 766 crc4 = 0; | |
| 767 #if N > 5 | |
| 768 crc5 = 0; | |
| 769 #endif | |
| 770 #endif | |
| 771 #endif | |
| 772 #endif | |
| 773 #endif | |
| 774 | |
| 775 /* | |
| 776 Process the first blks-1 blocks, computing the CRCs on each braid | |
| 777 independently. | |
| 778 */ | |
| 779 while (--blks) { | |
| 780 /* Load the word for each braid into registers. */ | |
| 781 word0 = crc0 ^ words[0]; | |
| 782 #if N > 1 | |
| 783 word1 = crc1 ^ words[1]; | |
| 784 #if N > 2 | |
| 785 word2 = crc2 ^ words[2]; | |
| 786 #if N > 3 | |
| 787 word3 = crc3 ^ words[3]; | |
| 788 #if N > 4 | |
| 789 word4 = crc4 ^ words[4]; | |
| 790 #if N > 5 | |
| 791 word5 = crc5 ^ words[5]; | |
| 792 #endif | |
| 793 #endif | |
| 794 #endif | |
| 795 #endif | |
| 796 #endif | |
| 797 words += N; | |
| 798 | |
| 799 /* Compute and update the CRC for each word. The loop should | |
| 800 get unrolled. */ | |
| 801 crc0 = crc_braid_table[0][word0 & 0xff]; | |
| 802 #if N > 1 | |
| 803 crc1 = crc_braid_table[0][word1 & 0xff]; | |
| 804 #if N > 2 | |
| 805 crc2 = crc_braid_table[0][word2 & 0xff]; | |
| 806 #if N > 3 | |
| 807 crc3 = crc_braid_table[0][word3 & 0xff]; | |
| 808 #if N > 4 | |
| 809 crc4 = crc_braid_table[0][word4 & 0xff]; | |
| 810 #if N > 5 | |
| 811 crc5 = crc_braid_table[0][word5 & 0xff]; | |
| 812 #endif | |
| 813 #endif | |
| 814 #endif | |
| 815 #endif | |
| 816 #endif | |
| 817 for (k = 1; k < W; k++) { | |
| 818 crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; | |
| 819 #if N > 1 | |
| 820 crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; | |
| 821 #if N > 2 | |
| 822 crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; | |
| 823 #if N > 3 | |
| 824 crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; | |
| 825 #if N > 4 | |
| 826 crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; | |
| 827 #if N > 5 | |
| 828 crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; | |
| 829 #endif | |
| 830 #endif | |
| 831 #endif | |
| 832 #endif | |
| 833 #endif | |
| 834 } | |
| 835 } | |
| 836 | |
| 837 /* | |
| 838 Process the last block, combining the CRCs of the N braids at the | |
| 839 same time. | |
| 840 */ | |
| 841 crc = crc_word(crc0 ^ words[0]); | |
| 842 #if N > 1 | |
| 843 crc = crc_word(crc1 ^ words[1] ^ crc); | |
| 844 #if N > 2 | |
| 845 crc = crc_word(crc2 ^ words[2] ^ crc); | |
| 846 #if N > 3 | |
| 847 crc = crc_word(crc3 ^ words[3] ^ crc); | |
| 848 #if N > 4 | |
| 849 crc = crc_word(crc4 ^ words[4] ^ crc); | |
| 850 #if N > 5 | |
| 851 crc = crc_word(crc5 ^ words[5] ^ crc); | |
| 852 #endif | |
| 853 #endif | |
| 854 #endif | |
| 855 #endif | |
| 856 #endif | |
| 857 words += N; | |
| 858 } | |
| 859 else { | |
| 860 /* Big endian. */ | |
| 861 | |
| 862 z_word_t crc0, word0, comb; | |
| 863 #if N > 1 | |
| 864 z_word_t crc1, word1; | |
| 865 #if N > 2 | |
| 866 z_word_t crc2, word2; | |
| 867 #if N > 3 | |
| 868 z_word_t crc3, word3; | |
| 869 #if N > 4 | |
| 870 z_word_t crc4, word4; | |
| 871 #if N > 5 | |
| 872 z_word_t crc5, word5; | |
| 873 #endif | |
| 874 #endif | |
| 875 #endif | |
| 876 #endif | |
| 877 #endif | |
| 878 | |
| 879 /* Initialize the CRC for each braid. */ | |
| 880 crc0 = byte_swap(crc); | |
| 881 #if N > 1 | |
| 882 crc1 = 0; | |
| 883 #if N > 2 | |
| 884 crc2 = 0; | |
| 885 #if N > 3 | |
| 886 crc3 = 0; | |
| 887 #if N > 4 | |
| 888 crc4 = 0; | |
| 889 #if N > 5 | |
| 890 crc5 = 0; | |
| 891 #endif | |
| 892 #endif | |
| 893 #endif | |
| 894 #endif | |
| 895 #endif | |
| 896 | |
| 897 /* | |
| 898 Process the first blks-1 blocks, computing the CRCs on each braid | |
| 899 independently. | |
| 900 */ | |
| 901 while (--blks) { | |
| 902 /* Load the word for each braid into registers. */ | |
| 903 word0 = crc0 ^ words[0]; | |
| 904 #if N > 1 | |
| 905 word1 = crc1 ^ words[1]; | |
| 906 #if N > 2 | |
| 907 word2 = crc2 ^ words[2]; | |
| 908 #if N > 3 | |
| 909 word3 = crc3 ^ words[3]; | |
| 910 #if N > 4 | |
| 911 word4 = crc4 ^ words[4]; | |
| 912 #if N > 5 | |
| 913 word5 = crc5 ^ words[5]; | |
| 914 #endif | |
| 915 #endif | |
| 916 #endif | |
| 917 #endif | |
| 918 #endif | |
| 919 words += N; | |
| 920 | |
| 921 /* Compute and update the CRC for each word. The loop should | |
| 922 get unrolled. */ | |
| 923 crc0 = crc_braid_big_table[0][word0 & 0xff]; | |
| 924 #if N > 1 | |
| 925 crc1 = crc_braid_big_table[0][word1 & 0xff]; | |
| 926 #if N > 2 | |
| 927 crc2 = crc_braid_big_table[0][word2 & 0xff]; | |
| 928 #if N > 3 | |
| 929 crc3 = crc_braid_big_table[0][word3 & 0xff]; | |
| 930 #if N > 4 | |
| 931 crc4 = crc_braid_big_table[0][word4 & 0xff]; | |
| 932 #if N > 5 | |
| 933 crc5 = crc_braid_big_table[0][word5 & 0xff]; | |
| 934 #endif | |
| 935 #endif | |
| 936 #endif | |
| 937 #endif | |
| 938 #endif | |
| 939 for (k = 1; k < W; k++) { | |
| 940 crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; | |
| 941 #if N > 1 | |
| 942 crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; | |
| 943 #if N > 2 | |
| 944 crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; | |
| 945 #if N > 3 | |
| 946 crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; | |
| 947 #if N > 4 | |
| 948 crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; | |
| 949 #if N > 5 | |
| 950 crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; | |
| 951 #endif | |
| 952 #endif | |
| 953 #endif | |
| 954 #endif | |
| 955 #endif | |
| 956 } | |
| 957 } | |
| 958 | |
| 959 /* | |
| 960 Process the last block, combining the CRCs of the N braids at the | |
| 961 same time. | |
| 962 */ | |
| 963 comb = crc_word_big(crc0 ^ words[0]); | |
| 964 #if N > 1 | |
| 965 comb = crc_word_big(crc1 ^ words[1] ^ comb); | |
| 966 #if N > 2 | |
| 967 comb = crc_word_big(crc2 ^ words[2] ^ comb); | |
| 968 #if N > 3 | |
| 969 comb = crc_word_big(crc3 ^ words[3] ^ comb); | |
| 970 #if N > 4 | |
| 971 comb = crc_word_big(crc4 ^ words[4] ^ comb); | |
| 972 #if N > 5 | |
| 973 comb = crc_word_big(crc5 ^ words[5] ^ comb); | |
| 974 #endif | |
| 975 #endif | |
| 976 #endif | |
| 977 #endif | |
| 978 #endif | |
| 979 words += N; | |
| 980 crc = byte_swap(comb); | |
| 981 } | |
| 982 | |
| 983 /* | |
| 984 Update the pointer to the remaining bytes to process. | |
| 985 */ | |
| 986 buf = (unsigned char const *)words; | |
| 987 } | |
| 988 | |
| 989 #endif /* W */ | |
| 990 | |
| 991 /* Complete the computation of the CRC on any remaining bytes. */ | |
| 992 while (len >= 8) { | |
| 993 len -= 8; | |
| 994 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 995 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 996 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 997 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 998 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 999 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 1000 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 1001 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 1002 } | |
| 1003 while (len) { | |
| 1004 len--; | |
| 1005 crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; | |
| 1006 } | |
| 1007 | |
| 1008 /* Return the CRC, post-conditioned. */ | |
| 1009 return crc ^ 0xffffffff; | |
| 1010 } | |
| 1011 | |
| 1012 #endif | |
| 1013 | |
| 1014 /* ========================================================================= */ | |
| 1015 unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, | |
| 1016 uInt len) { | |
| 1017 return crc32_z(crc, buf, len); | |
| 1018 } | |
| 1019 | |
| 1020 /* ========================================================================= */ | |
| 1021 uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { | |
| 1022 #ifdef DYNAMIC_CRC_TABLE | |
| 1023 once(&made, make_crc_table); | |
| 1024 #endif /* DYNAMIC_CRC_TABLE */ | |
| 1025 return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); | |
| 1026 } | |
| 1027 | |
| 1028 /* ========================================================================= */ | |
| 1029 uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { | |
| 1030 return crc32_combine64(crc1, crc2, (z_off64_t)len2); | |
| 1031 } | |
| 1032 | |
| 1033 /* ========================================================================= */ | |
| 1034 uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { | |
| 1035 #ifdef DYNAMIC_CRC_TABLE | |
| 1036 once(&made, make_crc_table); | |
| 1037 #endif /* DYNAMIC_CRC_TABLE */ | |
| 1038 return x2nmodp(len2, 3); | |
| 1039 } | |
| 1040 | |
| 1041 /* ========================================================================= */ | |
| 1042 uLong ZEXPORT crc32_combine_gen(z_off_t len2) { | |
| 1043 return crc32_combine_gen64((z_off64_t)len2); | |
| 1044 } | |
| 1045 | |
| 1046 /* ========================================================================= */ | |
| 1047 uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { | |
| 1048 return multmodp(op, crc1) ^ (crc2 & 0xffffffff); | |
| 1049 } |
