Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/brotli/c/enc/cluster_inc.h @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* NOLINT(build/header_guard) */ | |
| 2 /* Copyright 2013 Google Inc. All Rights Reserved. | |
| 3 | |
| 4 Distributed under MIT license. | |
| 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
| 6 */ | |
| 7 | |
| 8 /* template parameters: FN, CODE */ | |
| 9 | |
| 10 #define HistogramType FN(Histogram) | |
| 11 | |
| 12 /* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if | |
| 13 it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */ | |
| 14 BROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)( | |
| 15 const HistogramType* out, HistogramType* tmp, const uint32_t* cluster_size, | |
| 16 uint32_t idx1, uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs, | |
| 17 size_t* num_pairs) CODE({ | |
| 18 BROTLI_BOOL is_good_pair = BROTLI_FALSE; | |
| 19 HistogramPair p; | |
| 20 p.idx1 = p.idx2 = 0; | |
| 21 p.cost_diff = p.cost_combo = 0; | |
| 22 if (idx1 == idx2) { | |
| 23 return; | |
| 24 } | |
| 25 if (idx2 < idx1) { | |
| 26 uint32_t t = idx2; | |
| 27 idx2 = idx1; | |
| 28 idx1 = t; | |
| 29 } | |
| 30 p.idx1 = idx1; | |
| 31 p.idx2 = idx2; | |
| 32 p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]); | |
| 33 p.cost_diff -= out[idx1].bit_cost_; | |
| 34 p.cost_diff -= out[idx2].bit_cost_; | |
| 35 | |
| 36 if (out[idx1].total_count_ == 0) { | |
| 37 p.cost_combo = out[idx2].bit_cost_; | |
| 38 is_good_pair = BROTLI_TRUE; | |
| 39 } else if (out[idx2].total_count_ == 0) { | |
| 40 p.cost_combo = out[idx1].bit_cost_; | |
| 41 is_good_pair = BROTLI_TRUE; | |
| 42 } else { | |
| 43 double threshold = *num_pairs == 0 ? 1e99 : | |
| 44 BROTLI_MAX(double, 0.0, pairs[0].cost_diff); | |
| 45 double cost_combo; | |
| 46 *tmp = out[idx1]; | |
| 47 FN(HistogramAddHistogram)(tmp, &out[idx2]); | |
| 48 cost_combo = FN(BrotliPopulationCost)(tmp); | |
| 49 if (cost_combo < threshold - p.cost_diff) { | |
| 50 p.cost_combo = cost_combo; | |
| 51 is_good_pair = BROTLI_TRUE; | |
| 52 } | |
| 53 } | |
| 54 if (is_good_pair) { | |
| 55 p.cost_diff += p.cost_combo; | |
| 56 if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) { | |
| 57 /* Replace the top of the queue if needed. */ | |
| 58 if (*num_pairs < max_num_pairs) { | |
| 59 pairs[*num_pairs] = pairs[0]; | |
| 60 ++(*num_pairs); | |
| 61 } | |
| 62 pairs[0] = p; | |
| 63 } else if (*num_pairs < max_num_pairs) { | |
| 64 pairs[*num_pairs] = p; | |
| 65 ++(*num_pairs); | |
| 66 } | |
| 67 } | |
| 68 }) | |
| 69 | |
| 70 BROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out, | |
| 71 HistogramType* tmp, | |
| 72 uint32_t* cluster_size, | |
| 73 uint32_t* symbols, | |
| 74 uint32_t* clusters, | |
| 75 HistogramPair* pairs, | |
| 76 size_t num_clusters, | |
| 77 size_t symbols_size, | |
| 78 size_t max_clusters, | |
| 79 size_t max_num_pairs) CODE({ | |
| 80 double cost_diff_threshold = 0.0; | |
| 81 size_t min_cluster_size = 1; | |
| 82 size_t num_pairs = 0; | |
| 83 | |
| 84 { | |
| 85 /* We maintain a vector of histogram pairs, with the property that the pair | |
| 86 with the maximum bit cost reduction is the first. */ | |
| 87 size_t idx1; | |
| 88 for (idx1 = 0; idx1 < num_clusters; ++idx1) { | |
| 89 size_t idx2; | |
| 90 for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) { | |
| 91 FN(BrotliCompareAndPushToQueue)(out, tmp, cluster_size, clusters[idx1], | |
| 92 clusters[idx2], max_num_pairs, &pairs[0], &num_pairs); | |
| 93 } | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 while (num_clusters > min_cluster_size) { | |
| 98 uint32_t best_idx1; | |
| 99 uint32_t best_idx2; | |
| 100 size_t i; | |
| 101 if (pairs[0].cost_diff >= cost_diff_threshold) { | |
| 102 cost_diff_threshold = 1e99; | |
| 103 min_cluster_size = max_clusters; | |
| 104 continue; | |
| 105 } | |
| 106 /* Take the best pair from the top of heap. */ | |
| 107 best_idx1 = pairs[0].idx1; | |
| 108 best_idx2 = pairs[0].idx2; | |
| 109 FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]); | |
| 110 out[best_idx1].bit_cost_ = pairs[0].cost_combo; | |
| 111 cluster_size[best_idx1] += cluster_size[best_idx2]; | |
| 112 for (i = 0; i < symbols_size; ++i) { | |
| 113 if (symbols[i] == best_idx2) { | |
| 114 symbols[i] = best_idx1; | |
| 115 } | |
| 116 } | |
| 117 for (i = 0; i < num_clusters; ++i) { | |
| 118 if (clusters[i] == best_idx2) { | |
| 119 memmove(&clusters[i], &clusters[i + 1], | |
| 120 (num_clusters - i - 1) * sizeof(clusters[0])); | |
| 121 break; | |
| 122 } | |
| 123 } | |
| 124 --num_clusters; | |
| 125 { | |
| 126 /* Remove pairs intersecting the just combined best pair. */ | |
| 127 size_t copy_to_idx = 0; | |
| 128 for (i = 0; i < num_pairs; ++i) { | |
| 129 HistogramPair* p = &pairs[i]; | |
| 130 if (p->idx1 == best_idx1 || p->idx2 == best_idx1 || | |
| 131 p->idx1 == best_idx2 || p->idx2 == best_idx2) { | |
| 132 /* Remove invalid pair from the queue. */ | |
| 133 continue; | |
| 134 } | |
| 135 if (HistogramPairIsLess(&pairs[0], p)) { | |
| 136 /* Replace the top of the queue if needed. */ | |
| 137 HistogramPair front = pairs[0]; | |
| 138 pairs[0] = *p; | |
| 139 pairs[copy_to_idx] = front; | |
| 140 } else { | |
| 141 pairs[copy_to_idx] = *p; | |
| 142 } | |
| 143 ++copy_to_idx; | |
| 144 } | |
| 145 num_pairs = copy_to_idx; | |
| 146 } | |
| 147 | |
| 148 /* Push new pairs formed with the combined histogram to the heap. */ | |
| 149 for (i = 0; i < num_clusters; ++i) { | |
| 150 FN(BrotliCompareAndPushToQueue)(out, tmp, cluster_size, best_idx1, | |
| 151 clusters[i], max_num_pairs, &pairs[0], &num_pairs); | |
| 152 } | |
| 153 } | |
| 154 return num_clusters; | |
| 155 }) | |
| 156 | |
| 157 /* What is the bit cost of moving histogram from cur_symbol to candidate. */ | |
| 158 BROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)( | |
| 159 const HistogramType* histogram, const HistogramType* candidate, | |
| 160 HistogramType* tmp) CODE({ | |
| 161 if (histogram->total_count_ == 0) { | |
| 162 return 0.0; | |
| 163 } else { | |
| 164 *tmp = *histogram; | |
| 165 FN(HistogramAddHistogram)(tmp, candidate); | |
| 166 return FN(BrotliPopulationCost)(tmp) - candidate->bit_cost_; | |
| 167 } | |
| 168 }) | |
| 169 | |
| 170 /* Find the best 'out' histogram for each of the 'in' histograms. | |
| 171 When called, clusters[0..num_clusters) contains the unique values from | |
| 172 symbols[0..in_size), but this property is not preserved in this function. | |
| 173 Note: we assume that out[]->bit_cost_ is already up-to-date. */ | |
| 174 BROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in, | |
| 175 size_t in_size, const uint32_t* clusters, size_t num_clusters, | |
| 176 HistogramType* out, HistogramType* tmp, uint32_t* symbols) CODE({ | |
| 177 size_t i; | |
| 178 for (i = 0; i < in_size; ++i) { | |
| 179 uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1]; | |
| 180 double best_bits = | |
| 181 FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out], tmp); | |
| 182 size_t j; | |
| 183 for (j = 0; j < num_clusters; ++j) { | |
| 184 const double cur_bits = | |
| 185 FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]], tmp); | |
| 186 if (cur_bits < best_bits) { | |
| 187 best_bits = cur_bits; | |
| 188 best_out = clusters[j]; | |
| 189 } | |
| 190 } | |
| 191 symbols[i] = best_out; | |
| 192 } | |
| 193 | |
| 194 /* Recompute each out based on raw and symbols. */ | |
| 195 for (i = 0; i < num_clusters; ++i) { | |
| 196 FN(HistogramClear)(&out[clusters[i]]); | |
| 197 } | |
| 198 for (i = 0; i < in_size; ++i) { | |
| 199 FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]); | |
| 200 } | |
| 201 }) | |
| 202 | |
| 203 /* Reorders elements of the out[0..length) array and changes values in | |
| 204 symbols[0..length) array in the following way: | |
| 205 * when called, symbols[] contains indexes into out[], and has N unique | |
| 206 values (possibly N < length) | |
| 207 * on return, symbols'[i] = f(symbols[i]) and | |
| 208 out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length, | |
| 209 where f is a bijection between the range of symbols[] and [0..N), and | |
| 210 the first occurrences of values in symbols'[i] come in consecutive | |
| 211 increasing order. | |
| 212 Returns N, the number of unique values in symbols[]. */ | |
| 213 BROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m, | |
| 214 HistogramType* out, uint32_t* symbols, size_t length) CODE({ | |
| 215 static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; | |
| 216 uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length); | |
| 217 uint32_t next_index; | |
| 218 HistogramType* tmp; | |
| 219 size_t i; | |
| 220 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(new_index)) return 0; | |
| 221 for (i = 0; i < length; ++i) { | |
| 222 new_index[i] = kInvalidIndex; | |
| 223 } | |
| 224 next_index = 0; | |
| 225 for (i = 0; i < length; ++i) { | |
| 226 if (new_index[symbols[i]] == kInvalidIndex) { | |
| 227 new_index[symbols[i]] = next_index; | |
| 228 ++next_index; | |
| 229 } | |
| 230 } | |
| 231 /* TODO(eustas): by using idea of "cycle-sort" we can avoid allocation of | |
| 232 tmp and reduce the number of copying by the factor of 2. */ | |
| 233 tmp = BROTLI_ALLOC(m, HistogramType, next_index); | |
| 234 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(tmp)) return 0; | |
| 235 next_index = 0; | |
| 236 for (i = 0; i < length; ++i) { | |
| 237 if (new_index[symbols[i]] == next_index) { | |
| 238 tmp[next_index] = out[symbols[i]]; | |
| 239 ++next_index; | |
| 240 } | |
| 241 symbols[i] = new_index[symbols[i]]; | |
| 242 } | |
| 243 BROTLI_FREE(m, new_index); | |
| 244 for (i = 0; i < next_index; ++i) { | |
| 245 out[i] = tmp[i]; | |
| 246 } | |
| 247 BROTLI_FREE(m, tmp); | |
| 248 return next_index; | |
| 249 }) | |
| 250 | |
| 251 BROTLI_INTERNAL void FN(BrotliClusterHistograms)( | |
| 252 MemoryManager* m, const HistogramType* in, const size_t in_size, | |
| 253 size_t max_histograms, HistogramType* out, size_t* out_size, | |
| 254 uint32_t* histogram_symbols) CODE({ | |
| 255 uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size); | |
| 256 uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size); | |
| 257 size_t num_clusters = 0; | |
| 258 const size_t max_input_histograms = 64; | |
| 259 size_t pairs_capacity = max_input_histograms * max_input_histograms / 2; | |
| 260 /* For the first pass of clustering, we allow all pairs. */ | |
| 261 HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1); | |
| 262 /* TODO(eustas): move to "persistent" arena? */ | |
| 263 HistogramType* tmp = BROTLI_ALLOC(m, HistogramType, 1); | |
| 264 size_t i; | |
| 265 | |
| 266 if (BROTLI_IS_OOM(m) || BROTLI_IS_NULL(cluster_size) || | |
| 267 BROTLI_IS_NULL(clusters) || BROTLI_IS_NULL(pairs)|| BROTLI_IS_NULL(tmp)) { | |
| 268 return; | |
| 269 } | |
| 270 | |
| 271 for (i = 0; i < in_size; ++i) { | |
| 272 cluster_size[i] = 1; | |
| 273 } | |
| 274 | |
| 275 for (i = 0; i < in_size; ++i) { | |
| 276 out[i] = in[i]; | |
| 277 out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]); | |
| 278 histogram_symbols[i] = (uint32_t)i; | |
| 279 } | |
| 280 | |
| 281 for (i = 0; i < in_size; i += max_input_histograms) { | |
| 282 size_t num_to_combine = | |
| 283 BROTLI_MIN(size_t, in_size - i, max_input_histograms); | |
| 284 size_t num_new_clusters; | |
| 285 size_t j; | |
| 286 for (j = 0; j < num_to_combine; ++j) { | |
| 287 clusters[num_clusters + j] = (uint32_t)(i + j); | |
| 288 } | |
| 289 num_new_clusters = | |
| 290 FN(BrotliHistogramCombine)(out, tmp, cluster_size, | |
| 291 &histogram_symbols[i], | |
| 292 &clusters[num_clusters], pairs, | |
| 293 num_to_combine, num_to_combine, | |
| 294 max_histograms, pairs_capacity); | |
| 295 num_clusters += num_new_clusters; | |
| 296 } | |
| 297 | |
| 298 { | |
| 299 /* For the second pass, we limit the total number of histogram pairs. | |
| 300 After this limit is reached, we only keep searching for the best pair. */ | |
| 301 size_t max_num_pairs = BROTLI_MIN(size_t, | |
| 302 64 * num_clusters, (num_clusters / 2) * num_clusters); | |
| 303 BROTLI_ENSURE_CAPACITY( | |
| 304 m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1); | |
| 305 if (BROTLI_IS_OOM(m)) return; | |
| 306 | |
| 307 /* Collapse similar histograms. */ | |
| 308 num_clusters = FN(BrotliHistogramCombine)(out, tmp, cluster_size, | |
| 309 histogram_symbols, clusters, | |
| 310 pairs, num_clusters, in_size, | |
| 311 max_histograms, max_num_pairs); | |
| 312 } | |
| 313 BROTLI_FREE(m, pairs); | |
| 314 BROTLI_FREE(m, cluster_size); | |
| 315 /* Find the optimal map from original histograms to the final ones. */ | |
| 316 FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters, | |
| 317 out, tmp, histogram_symbols); | |
| 318 BROTLI_FREE(m, tmp); | |
| 319 BROTLI_FREE(m, clusters); | |
| 320 /* Convert the context map to a canonical form. */ | |
| 321 *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size); | |
| 322 if (BROTLI_IS_OOM(m)) return; | |
| 323 }) | |
| 324 | |
| 325 #undef HistogramType |
