Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/thirdparty/zxing-cpp/core/src/Content.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/thirdparty/zxing-cpp/core/src/Content.cpp Mon Sep 15 11:43:07 2025 +0200 @@ -0,0 +1,260 @@ +/* + * Copyright 2022 Axel Waggershauser +*/ +// SPDX-License-Identifier: Apache-2.0 + +#include "Content.h" + +#include "CharacterSet.h" +#include "ECI.h" +#include "HRI.h" +#include "TextDecoder.h" +#include "Utf.h" +#include "ZXAlgorithms.h" + +#if !defined(ZXING_READERS) && !defined(ZXING_WRITERS) +#include "Version.h" +#endif + +#include <cctype> + +namespace ZXing { + +std::string ToString(ContentType type) +{ + const char* t2s[] = {"Text", "Binary", "Mixed", "GS1", "ISO15434", "UnknownECI"}; + return t2s[static_cast<int>(type)]; +} + +template <typename FUNC> +void Content::ForEachECIBlock(FUNC func) const +{ + ECI defaultECI = hasECI ? ECI::ISO8859_1 : ECI::Unknown; + if (encodings.empty()) + func(defaultECI, 0, Size(bytes)); + else if (encodings.front().pos != 0) + func(defaultECI, 0, encodings.front().pos); + + for (int i = 0; i < Size(encodings); ++i) { + auto [eci, start] = encodings[i]; + int end = i + 1 == Size(encodings) ? Size(bytes) : encodings[i + 1].pos; + + if (start != end) + func(eci, start, end); + } +} + +void Content::switchEncoding(ECI eci, bool isECI) +{ + // remove all non-ECI entries on first ECI entry + if (isECI && !hasECI) + encodings.clear(); + if (isECI || !hasECI) + encodings.push_back({eci, Size(bytes)}); + + hasECI |= isECI; +} + +Content::Content() {} + +Content::Content(ByteArray&& bytes, SymbologyIdentifier si) : bytes(std::move(bytes)), symbology(si) {} + +void Content::switchEncoding(CharacterSet cs) +{ + switchEncoding(ToECI(cs), false); +} + +void Content::append(const Content& other) +{ + if (!hasECI && other.hasECI) + encodings.clear(); + if (other.hasECI || !hasECI) + for (auto& e : other.encodings) + encodings.push_back({e.eci, Size(bytes) + e.pos}); + append(other.bytes); + + hasECI |= other.hasECI; +} + +void Content::erase(int pos, int n) +{ + bytes.erase(bytes.begin() + pos, bytes.begin() + pos + n); + for (auto& e : encodings) + if (e.pos > pos) + pos -= n; +} + +void Content::insert(int pos, const std::string& str) +{ + bytes.insert(bytes.begin() + pos, str.begin(), str.end()); + for (auto& e : encodings) + if (e.pos > pos) + pos += Size(str); +} + +bool Content::canProcess() const +{ + return std::all_of(encodings.begin(), encodings.end(), [](Encoding e) { return CanProcess(e.eci); }); +} + +std::string Content::render(bool withECI) const +{ + if (empty() || !canProcess()) + return {}; + +#ifdef ZXING_READERS + std::string res; + if (withECI) + res = symbology.toString(true); + ECI lastECI = ECI::Unknown; + auto fallbackCS = defaultCharset; + if (!hasECI && fallbackCS == CharacterSet::Unknown) + fallbackCS = guessEncoding(); + + ForEachECIBlock([&](ECI eci, int begin, int end) { + // first determine how to decode the content (choose character set) + // * eci == ECI::Unknown implies !hasECI and we guess + // * if !IsText(eci) the ToCharcterSet(eci) will return Unknown and we decode as binary + CharacterSet cs = eci == ECI::Unknown ? fallbackCS : ToCharacterSet(eci); + + if (withECI) { + // then find the eci to report back in the ECI designator + if (IsText(ToECI(cs))) // everything decoded as text is reported as utf8 + eci = ECI::UTF8; + else if (eci == ECI::Unknown) // implies !hasECI and fallbackCS is Unknown or Binary + eci = ECI::Binary; + + if (lastECI != eci) + res += ToString(eci); + lastECI = eci; + + std::string tmp; + TextDecoder::Append(tmp, bytes.data() + begin, end - begin, cs); + for (auto c : tmp) { + res += c; + if (c == '\\') // in the ECI protocol a '\' has to be doubled + res += c; + } + } else { + TextDecoder::Append(res, bytes.data() + begin, end - begin, cs); + } + }); + + return res; +#else + //TODO: replace by proper construction from encoded data from within zint + return std::string(bytes.asString()); +#endif +} + +std::string Content::text(TextMode mode) const +{ + switch (mode) { + case TextMode::Plain: return render(false); + case TextMode::ECI: return render(true); + case TextMode::HRI: + switch (type()) { +#ifdef ZXING_READERS + case ContentType::GS1: { + auto plain = render(false); + auto hri = HRIFromGS1(plain); + return hri.empty() ? plain : hri; + } + case ContentType::ISO15434: return HRIFromISO15434(render(false)); + case ContentType::Text: return render(false); +#endif + default: return text(TextMode::Escaped); + } + case TextMode::Hex: return ToHex(bytes); + case TextMode::Escaped: return EscapeNonGraphical(render(false)); + } + + return {}; // silence compiler warning +} + +std::wstring Content::utfW() const +{ + return FromUtf8(render(false)); +} + +ByteArray Content::bytesECI() const +{ + if (empty()) + return {}; + + std::string res = symbology.toString(true); + + ForEachECIBlock([&](ECI eci, int begin, int end) { + if (hasECI) + res += ToString(eci); + + for (int i = begin; i != end; ++i) { + char c = static_cast<char>(bytes[i]); + res += c; + if (c == '\\') // in the ECI protocol a '\' has to be doubled + res += c; + } + }); + + return ByteArray(res); +} + +CharacterSet Content::guessEncoding() const +{ +#ifdef ZXING_READERS + // assemble all blocks with unknown encoding + ByteArray input; + ForEachECIBlock([&](ECI eci, int begin, int end) { + if (eci == ECI::Unknown) + input.insert(input.end(), bytes.begin() + begin, bytes.begin() + end); + }); + + if (input.empty()) + return CharacterSet::Unknown; + + return TextDecoder::GuessEncoding(input.data(), input.size(), CharacterSet::ISO8859_1); +#else + return CharacterSet::Unknown; +#endif +} + +ContentType Content::type() const +{ +#ifdef ZXING_READERS + if (empty()) + return ContentType::Text; + + if (!canProcess()) + return ContentType::UnknownECI; + + if (symbology.aiFlag == AIFlag::GS1) + return ContentType::GS1; + + // check for the absolut minimum of a ISO 15434 conforming message ("[)>" + RS + digit + digit) + if (bytes.size() > 6 && bytes.asString(0, 4) == "[)>\x1E" && std::isdigit(bytes[4]) && std::isdigit(bytes[5])) + return ContentType::ISO15434; + + ECI fallback = ToECI(guessEncoding()); + std::vector<bool> binaryECIs; + ForEachECIBlock([&](ECI eci, int begin, int end) { + if (eci == ECI::Unknown) + eci = fallback; + binaryECIs.push_back((!IsText(eci) + || (ToInt(eci) > 0 && ToInt(eci) < 28 && ToInt(eci) != 25 + && std::any_of(bytes.begin() + begin, bytes.begin() + end, + [](auto c) { return c < 0x20 && c != 0x9 && c != 0xa && c != 0xd; })))); + }); + + if (!Contains(binaryECIs, true)) + return ContentType::Text; + if (!Contains(binaryECIs, false)) + return ContentType::Binary; + + return ContentType::Mixed; +#else + //TODO: replace by proper construction from encoded data from within zint + return ContentType::Text; +#endif +} + +} // namespace ZXing
