Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/zxing-cpp/core/src/Content.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 /* | |
| 2 * Copyright 2022 Axel Waggershauser | |
| 3 */ | |
| 4 // SPDX-License-Identifier: Apache-2.0 | |
| 5 | |
| 6 #include "Content.h" | |
| 7 | |
| 8 #include "CharacterSet.h" | |
| 9 #include "ECI.h" | |
| 10 #include "HRI.h" | |
| 11 #include "TextDecoder.h" | |
| 12 #include "Utf.h" | |
| 13 #include "ZXAlgorithms.h" | |
| 14 | |
| 15 #if !defined(ZXING_READERS) && !defined(ZXING_WRITERS) | |
| 16 #include "Version.h" | |
| 17 #endif | |
| 18 | |
| 19 #include <cctype> | |
| 20 | |
| 21 namespace ZXing { | |
| 22 | |
| 23 std::string ToString(ContentType type) | |
| 24 { | |
| 25 const char* t2s[] = {"Text", "Binary", "Mixed", "GS1", "ISO15434", "UnknownECI"}; | |
| 26 return t2s[static_cast<int>(type)]; | |
| 27 } | |
| 28 | |
| 29 template <typename FUNC> | |
| 30 void Content::ForEachECIBlock(FUNC func) const | |
| 31 { | |
| 32 ECI defaultECI = hasECI ? ECI::ISO8859_1 : ECI::Unknown; | |
| 33 if (encodings.empty()) | |
| 34 func(defaultECI, 0, Size(bytes)); | |
| 35 else if (encodings.front().pos != 0) | |
| 36 func(defaultECI, 0, encodings.front().pos); | |
| 37 | |
| 38 for (int i = 0; i < Size(encodings); ++i) { | |
| 39 auto [eci, start] = encodings[i]; | |
| 40 int end = i + 1 == Size(encodings) ? Size(bytes) : encodings[i + 1].pos; | |
| 41 | |
| 42 if (start != end) | |
| 43 func(eci, start, end); | |
| 44 } | |
| 45 } | |
| 46 | |
| 47 void Content::switchEncoding(ECI eci, bool isECI) | |
| 48 { | |
| 49 // remove all non-ECI entries on first ECI entry | |
| 50 if (isECI && !hasECI) | |
| 51 encodings.clear(); | |
| 52 if (isECI || !hasECI) | |
| 53 encodings.push_back({eci, Size(bytes)}); | |
| 54 | |
| 55 hasECI |= isECI; | |
| 56 } | |
| 57 | |
| 58 Content::Content() {} | |
| 59 | |
| 60 Content::Content(ByteArray&& bytes, SymbologyIdentifier si) : bytes(std::move(bytes)), symbology(si) {} | |
| 61 | |
| 62 void Content::switchEncoding(CharacterSet cs) | |
| 63 { | |
| 64 switchEncoding(ToECI(cs), false); | |
| 65 } | |
| 66 | |
| 67 void Content::append(const Content& other) | |
| 68 { | |
| 69 if (!hasECI && other.hasECI) | |
| 70 encodings.clear(); | |
| 71 if (other.hasECI || !hasECI) | |
| 72 for (auto& e : other.encodings) | |
| 73 encodings.push_back({e.eci, Size(bytes) + e.pos}); | |
| 74 append(other.bytes); | |
| 75 | |
| 76 hasECI |= other.hasECI; | |
| 77 } | |
| 78 | |
| 79 void Content::erase(int pos, int n) | |
| 80 { | |
| 81 bytes.erase(bytes.begin() + pos, bytes.begin() + pos + n); | |
| 82 for (auto& e : encodings) | |
| 83 if (e.pos > pos) | |
| 84 pos -= n; | |
| 85 } | |
| 86 | |
| 87 void Content::insert(int pos, const std::string& str) | |
| 88 { | |
| 89 bytes.insert(bytes.begin() + pos, str.begin(), str.end()); | |
| 90 for (auto& e : encodings) | |
| 91 if (e.pos > pos) | |
| 92 pos += Size(str); | |
| 93 } | |
| 94 | |
| 95 bool Content::canProcess() const | |
| 96 { | |
| 97 return std::all_of(encodings.begin(), encodings.end(), [](Encoding e) { return CanProcess(e.eci); }); | |
| 98 } | |
| 99 | |
| 100 std::string Content::render(bool withECI) const | |
| 101 { | |
| 102 if (empty() || !canProcess()) | |
| 103 return {}; | |
| 104 | |
| 105 #ifdef ZXING_READERS | |
| 106 std::string res; | |
| 107 if (withECI) | |
| 108 res = symbology.toString(true); | |
| 109 ECI lastECI = ECI::Unknown; | |
| 110 auto fallbackCS = defaultCharset; | |
| 111 if (!hasECI && fallbackCS == CharacterSet::Unknown) | |
| 112 fallbackCS = guessEncoding(); | |
| 113 | |
| 114 ForEachECIBlock([&](ECI eci, int begin, int end) { | |
| 115 // first determine how to decode the content (choose character set) | |
| 116 // * eci == ECI::Unknown implies !hasECI and we guess | |
| 117 // * if !IsText(eci) the ToCharcterSet(eci) will return Unknown and we decode as binary | |
| 118 CharacterSet cs = eci == ECI::Unknown ? fallbackCS : ToCharacterSet(eci); | |
| 119 | |
| 120 if (withECI) { | |
| 121 // then find the eci to report back in the ECI designator | |
| 122 if (IsText(ToECI(cs))) // everything decoded as text is reported as utf8 | |
| 123 eci = ECI::UTF8; | |
| 124 else if (eci == ECI::Unknown) // implies !hasECI and fallbackCS is Unknown or Binary | |
| 125 eci = ECI::Binary; | |
| 126 | |
| 127 if (lastECI != eci) | |
| 128 res += ToString(eci); | |
| 129 lastECI = eci; | |
| 130 | |
| 131 std::string tmp; | |
| 132 TextDecoder::Append(tmp, bytes.data() + begin, end - begin, cs); | |
| 133 for (auto c : tmp) { | |
| 134 res += c; | |
| 135 if (c == '\\') // in the ECI protocol a '\' has to be doubled | |
| 136 res += c; | |
| 137 } | |
| 138 } else { | |
| 139 TextDecoder::Append(res, bytes.data() + begin, end - begin, cs); | |
| 140 } | |
| 141 }); | |
| 142 | |
| 143 return res; | |
| 144 #else | |
| 145 //TODO: replace by proper construction from encoded data from within zint | |
| 146 return std::string(bytes.asString()); | |
| 147 #endif | |
| 148 } | |
| 149 | |
| 150 std::string Content::text(TextMode mode) const | |
| 151 { | |
| 152 switch (mode) { | |
| 153 case TextMode::Plain: return render(false); | |
| 154 case TextMode::ECI: return render(true); | |
| 155 case TextMode::HRI: | |
| 156 switch (type()) { | |
| 157 #ifdef ZXING_READERS | |
| 158 case ContentType::GS1: { | |
| 159 auto plain = render(false); | |
| 160 auto hri = HRIFromGS1(plain); | |
| 161 return hri.empty() ? plain : hri; | |
| 162 } | |
| 163 case ContentType::ISO15434: return HRIFromISO15434(render(false)); | |
| 164 case ContentType::Text: return render(false); | |
| 165 #endif | |
| 166 default: return text(TextMode::Escaped); | |
| 167 } | |
| 168 case TextMode::Hex: return ToHex(bytes); | |
| 169 case TextMode::Escaped: return EscapeNonGraphical(render(false)); | |
| 170 } | |
| 171 | |
| 172 return {}; // silence compiler warning | |
| 173 } | |
| 174 | |
| 175 std::wstring Content::utfW() const | |
| 176 { | |
| 177 return FromUtf8(render(false)); | |
| 178 } | |
| 179 | |
| 180 ByteArray Content::bytesECI() const | |
| 181 { | |
| 182 if (empty()) | |
| 183 return {}; | |
| 184 | |
| 185 std::string res = symbology.toString(true); | |
| 186 | |
| 187 ForEachECIBlock([&](ECI eci, int begin, int end) { | |
| 188 if (hasECI) | |
| 189 res += ToString(eci); | |
| 190 | |
| 191 for (int i = begin; i != end; ++i) { | |
| 192 char c = static_cast<char>(bytes[i]); | |
| 193 res += c; | |
| 194 if (c == '\\') // in the ECI protocol a '\' has to be doubled | |
| 195 res += c; | |
| 196 } | |
| 197 }); | |
| 198 | |
| 199 return ByteArray(res); | |
| 200 } | |
| 201 | |
| 202 CharacterSet Content::guessEncoding() const | |
| 203 { | |
| 204 #ifdef ZXING_READERS | |
| 205 // assemble all blocks with unknown encoding | |
| 206 ByteArray input; | |
| 207 ForEachECIBlock([&](ECI eci, int begin, int end) { | |
| 208 if (eci == ECI::Unknown) | |
| 209 input.insert(input.end(), bytes.begin() + begin, bytes.begin() + end); | |
| 210 }); | |
| 211 | |
| 212 if (input.empty()) | |
| 213 return CharacterSet::Unknown; | |
| 214 | |
| 215 return TextDecoder::GuessEncoding(input.data(), input.size(), CharacterSet::ISO8859_1); | |
| 216 #else | |
| 217 return CharacterSet::Unknown; | |
| 218 #endif | |
| 219 } | |
| 220 | |
| 221 ContentType Content::type() const | |
| 222 { | |
| 223 #ifdef ZXING_READERS | |
| 224 if (empty()) | |
| 225 return ContentType::Text; | |
| 226 | |
| 227 if (!canProcess()) | |
| 228 return ContentType::UnknownECI; | |
| 229 | |
| 230 if (symbology.aiFlag == AIFlag::GS1) | |
| 231 return ContentType::GS1; | |
| 232 | |
| 233 // check for the absolut minimum of a ISO 15434 conforming message ("[)>" + RS + digit + digit) | |
| 234 if (bytes.size() > 6 && bytes.asString(0, 4) == "[)>\x1E" && std::isdigit(bytes[4]) && std::isdigit(bytes[5])) | |
| 235 return ContentType::ISO15434; | |
| 236 | |
| 237 ECI fallback = ToECI(guessEncoding()); | |
| 238 std::vector<bool> binaryECIs; | |
| 239 ForEachECIBlock([&](ECI eci, int begin, int end) { | |
| 240 if (eci == ECI::Unknown) | |
| 241 eci = fallback; | |
| 242 binaryECIs.push_back((!IsText(eci) | |
| 243 || (ToInt(eci) > 0 && ToInt(eci) < 28 && ToInt(eci) != 25 | |
| 244 && std::any_of(bytes.begin() + begin, bytes.begin() + end, | |
| 245 [](auto c) { return c < 0x20 && c != 0x9 && c != 0xa && c != 0xd; })))); | |
| 246 }); | |
| 247 | |
| 248 if (!Contains(binaryECIs, true)) | |
| 249 return ContentType::Text; | |
| 250 if (!Contains(binaryECIs, false)) | |
| 251 return ContentType::Binary; | |
| 252 | |
| 253 return ContentType::Mixed; | |
| 254 #else | |
| 255 //TODO: replace by proper construction from encoded data from within zint | |
| 256 return ContentType::Text; | |
| 257 #endif | |
| 258 } | |
| 259 | |
| 260 } // namespace ZXing |
