Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/zxing-cpp/core/src/GlobalHistogramBinarizer.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/zxing-cpp/core/src/GlobalHistogramBinarizer.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,168 @@ +/* +* Copyright 2016 Nu-book Inc. +* Copyright 2016 ZXing authors +*/ +// SPDX-License-Identifier: Apache-2.0 + +#include "GlobalHistogramBinarizer.h" + +#include "BitMatrix.h" +#include "Pattern.h" +#include "ZXConfig.h" + +#include <algorithm> +#include <array> +#include <utility> + +namespace ZXing { + +static constexpr int LUMINANCE_BITS = 5; +static constexpr int LUMINANCE_SHIFT = 8 - LUMINANCE_BITS; +static constexpr int LUMINANCE_BUCKETS = 1 << LUMINANCE_BITS; + +using Histogram = std::array<uint16_t, LUMINANCE_BUCKETS>; + +GlobalHistogramBinarizer::GlobalHistogramBinarizer(const ImageView& buffer) : BinaryBitmap(buffer) {} + +GlobalHistogramBinarizer::~GlobalHistogramBinarizer() = default; + +using ImageLineView = Range<StrideIter<const uint8_t*>>; + +inline ImageLineView RowView(const ImageView& iv, int row) +{ + return {{iv.data(0, row), iv.pixStride()}, {iv.data(iv.width(), row), iv.pixStride()}}; +} + +static void ThresholdSharpened(const ImageLineView in, int threshold, std::vector<uint8_t>& out) +{ + out.resize(in.size()); + auto i = in.begin(); + auto o = out.begin(); + + *o++ = (*i++ <= threshold) * BitMatrix::SET_V; + for (auto end = in.end() - 1; i != end; ++i) + *o++ = ((-i[-1] + (int(i[0]) * 4) - i[1]) / 2 <= threshold) * BitMatrix::SET_V; + *o++ = (*i++ <= threshold) * BitMatrix::SET_V; +} + +static auto GenHistogram(const ImageLineView line) +{ + // This code causes about 20% of the total runtime on an AVX2 system for a EAN13 search on Lum input data. + // Trying to increase the performance by performing 2 or 4 "parallel" histograms helped nothing. + Histogram res = {}; + for (auto pix : line) + res[pix >> LUMINANCE_SHIFT]++; + return res; +} + +// Return -1 on error +static int EstimateBlackPoint(const Histogram& buckets) +{ + // Find the tallest peak in the histogram. + auto firstPeakPos = std::max_element(buckets.begin(), buckets.end()); + int firstPeak = narrow_cast<int>(firstPeakPos - buckets.begin()); + int firstPeakSize = *firstPeakPos; + int maxBucketCount = firstPeakSize; + + // Find the second-tallest peak which is somewhat far from the tallest peak. + int secondPeak = 0; + int secondPeakScore = 0; + for (int x = 0; x < Size(buckets); x++) { + int distanceToBiggest = x - firstPeak; + // Encourage more distant second peaks by multiplying by square of distance. + int score = buckets[x] * distanceToBiggest * distanceToBiggest; + if (score > secondPeakScore) { + secondPeak = x; + secondPeakScore = score; + } + } + + // Make sure firstPeak corresponds to the black peak. + if (firstPeak > secondPeak) { + std::swap(firstPeak, secondPeak); + } + + // If there is too little contrast in the image to pick a meaningful black point, throw rather + // than waste time trying to decode the image, and risk false positives. + if (secondPeak - firstPeak <= LUMINANCE_BUCKETS / 16) { + return -1; + } + + // Find a valley between them that is low and closer to the white peak. + int bestValley = secondPeak - 1; + int bestValleyScore = -1; + for (int x = secondPeak - 1; x > firstPeak; x--) { + int fromFirst = x - firstPeak; + int score = fromFirst * fromFirst * (secondPeak - x) * (maxBucketCount - buckets[x]); + if (score > bestValleyScore) { + bestValley = x; + bestValleyScore = score; + } + } + + return bestValley << LUMINANCE_SHIFT; +} + +bool GlobalHistogramBinarizer::getPatternRow(int row, int rotation, PatternRow& res) const +{ + auto buffer = _buffer.rotated(rotation); + auto lineView = RowView(buffer, row); + + if (buffer.width() < 3) + return false; // special casing the code below for a width < 3 makes no sense + +#if defined(__AVX__) // or defined(__ARM_NEON) + // If we are extracting a column (instead of a row), we run into cache misses on every pixel access both + // during the histogram calculation and during the sharpen+threshold operation. Additionally, if we + // perform the ThresholdSharpened function on pixStride==1 data, the auto-vectorizer makes that part + // 8x faster on an AVX2 cpu which easily recovers the extra cost that we pay for the copying. + ZX_THREAD_LOCAL std::vector<uint8_t> line; + if (std::abs(buffer.pixStride()) > 4) { + line.resize(lineView.size()); + std::copy(lineView.begin(), lineView.end(), line.begin()); + lineView = {{line.data(), 1}, {line.data() + line.size(), 1}}; + } +#endif + + auto threshold = EstimateBlackPoint(GenHistogram(lineView)) - 1; + if (threshold <= 0) + return false; + + ZX_THREAD_LOCAL std::vector<uint8_t> binarized; + // the optimizer can generate a specialized version for pixStride==1 (non-rotated input) that is about 8x faster on AVX2 hardware + if (lineView.begin().stride == 1) + ThresholdSharpened(lineView, threshold, binarized); + else + ThresholdSharpened(lineView, threshold, binarized); + GetPatternRow(Range(binarized), res); + + return true; +} + +// Does not sharpen the data, as this call is intended to only be used by 2D Readers. +std::shared_ptr<const BitMatrix> +GlobalHistogramBinarizer::getBlackMatrix() const +{ + // Quickly calculates the histogram by sampling four rows from the image. This proved to be + // more robust on the blackbox tests than sampling a diagonal as we used to do. + Histogram localBuckets = {}; + { + for (int y = 1; y < 5; y++) { + int row = height() * y / 5; + const uint8_t* luminances = _buffer.data(0, row); + int right = (width() * 4) / 5; + for (int x = width() / 5; x < right; x++) + localBuckets[luminances[x] >> LUMINANCE_SHIFT]++; + } + } + + int blackPoint = EstimateBlackPoint(localBuckets); + if (blackPoint <= 0) + return {}; + + + + return std::make_shared<const BitMatrix>(binarize(blackPoint)); +} + +} // ZXing
