comparison mupdf-source/thirdparty/zxing-cpp/core/src/Content.cpp @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
comparison
equal deleted inserted replaced
1:1d09e1dec1d9 2:b50eed0cc0ef
1 /*
2 * Copyright 2022 Axel Waggershauser
3 */
4 // SPDX-License-Identifier: Apache-2.0
5
6 #include "Content.h"
7
8 #include "CharacterSet.h"
9 #include "ECI.h"
10 #include "HRI.h"
11 #include "TextDecoder.h"
12 #include "Utf.h"
13 #include "ZXAlgorithms.h"
14
15 #if !defined(ZXING_READERS) && !defined(ZXING_WRITERS)
16 #include "Version.h"
17 #endif
18
19 #include <cctype>
20
21 namespace ZXing {
22
23 std::string ToString(ContentType type)
24 {
25 const char* t2s[] = {"Text", "Binary", "Mixed", "GS1", "ISO15434", "UnknownECI"};
26 return t2s[static_cast<int>(type)];
27 }
28
29 template <typename FUNC>
30 void Content::ForEachECIBlock(FUNC func) const
31 {
32 ECI defaultECI = hasECI ? ECI::ISO8859_1 : ECI::Unknown;
33 if (encodings.empty())
34 func(defaultECI, 0, Size(bytes));
35 else if (encodings.front().pos != 0)
36 func(defaultECI, 0, encodings.front().pos);
37
38 for (int i = 0; i < Size(encodings); ++i) {
39 auto [eci, start] = encodings[i];
40 int end = i + 1 == Size(encodings) ? Size(bytes) : encodings[i + 1].pos;
41
42 if (start != end)
43 func(eci, start, end);
44 }
45 }
46
47 void Content::switchEncoding(ECI eci, bool isECI)
48 {
49 // remove all non-ECI entries on first ECI entry
50 if (isECI && !hasECI)
51 encodings.clear();
52 if (isECI || !hasECI)
53 encodings.push_back({eci, Size(bytes)});
54
55 hasECI |= isECI;
56 }
57
58 Content::Content() {}
59
60 Content::Content(ByteArray&& bytes, SymbologyIdentifier si) : bytes(std::move(bytes)), symbology(si) {}
61
62 void Content::switchEncoding(CharacterSet cs)
63 {
64 switchEncoding(ToECI(cs), false);
65 }
66
67 void Content::append(const Content& other)
68 {
69 if (!hasECI && other.hasECI)
70 encodings.clear();
71 if (other.hasECI || !hasECI)
72 for (auto& e : other.encodings)
73 encodings.push_back({e.eci, Size(bytes) + e.pos});
74 append(other.bytes);
75
76 hasECI |= other.hasECI;
77 }
78
79 void Content::erase(int pos, int n)
80 {
81 bytes.erase(bytes.begin() + pos, bytes.begin() + pos + n);
82 for (auto& e : encodings)
83 if (e.pos > pos)
84 pos -= n;
85 }
86
87 void Content::insert(int pos, const std::string& str)
88 {
89 bytes.insert(bytes.begin() + pos, str.begin(), str.end());
90 for (auto& e : encodings)
91 if (e.pos > pos)
92 pos += Size(str);
93 }
94
95 bool Content::canProcess() const
96 {
97 return std::all_of(encodings.begin(), encodings.end(), [](Encoding e) { return CanProcess(e.eci); });
98 }
99
100 std::string Content::render(bool withECI) const
101 {
102 if (empty() || !canProcess())
103 return {};
104
105 #ifdef ZXING_READERS
106 std::string res;
107 if (withECI)
108 res = symbology.toString(true);
109 ECI lastECI = ECI::Unknown;
110 auto fallbackCS = defaultCharset;
111 if (!hasECI && fallbackCS == CharacterSet::Unknown)
112 fallbackCS = guessEncoding();
113
114 ForEachECIBlock([&](ECI eci, int begin, int end) {
115 // first determine how to decode the content (choose character set)
116 // * eci == ECI::Unknown implies !hasECI and we guess
117 // * if !IsText(eci) the ToCharcterSet(eci) will return Unknown and we decode as binary
118 CharacterSet cs = eci == ECI::Unknown ? fallbackCS : ToCharacterSet(eci);
119
120 if (withECI) {
121 // then find the eci to report back in the ECI designator
122 if (IsText(ToECI(cs))) // everything decoded as text is reported as utf8
123 eci = ECI::UTF8;
124 else if (eci == ECI::Unknown) // implies !hasECI and fallbackCS is Unknown or Binary
125 eci = ECI::Binary;
126
127 if (lastECI != eci)
128 res += ToString(eci);
129 lastECI = eci;
130
131 std::string tmp;
132 TextDecoder::Append(tmp, bytes.data() + begin, end - begin, cs);
133 for (auto c : tmp) {
134 res += c;
135 if (c == '\\') // in the ECI protocol a '\' has to be doubled
136 res += c;
137 }
138 } else {
139 TextDecoder::Append(res, bytes.data() + begin, end - begin, cs);
140 }
141 });
142
143 return res;
144 #else
145 //TODO: replace by proper construction from encoded data from within zint
146 return std::string(bytes.asString());
147 #endif
148 }
149
150 std::string Content::text(TextMode mode) const
151 {
152 switch (mode) {
153 case TextMode::Plain: return render(false);
154 case TextMode::ECI: return render(true);
155 case TextMode::HRI:
156 switch (type()) {
157 #ifdef ZXING_READERS
158 case ContentType::GS1: {
159 auto plain = render(false);
160 auto hri = HRIFromGS1(plain);
161 return hri.empty() ? plain : hri;
162 }
163 case ContentType::ISO15434: return HRIFromISO15434(render(false));
164 case ContentType::Text: return render(false);
165 #endif
166 default: return text(TextMode::Escaped);
167 }
168 case TextMode::Hex: return ToHex(bytes);
169 case TextMode::Escaped: return EscapeNonGraphical(render(false));
170 }
171
172 return {}; // silence compiler warning
173 }
174
175 std::wstring Content::utfW() const
176 {
177 return FromUtf8(render(false));
178 }
179
180 ByteArray Content::bytesECI() const
181 {
182 if (empty())
183 return {};
184
185 std::string res = symbology.toString(true);
186
187 ForEachECIBlock([&](ECI eci, int begin, int end) {
188 if (hasECI)
189 res += ToString(eci);
190
191 for (int i = begin; i != end; ++i) {
192 char c = static_cast<char>(bytes[i]);
193 res += c;
194 if (c == '\\') // in the ECI protocol a '\' has to be doubled
195 res += c;
196 }
197 });
198
199 return ByteArray(res);
200 }
201
202 CharacterSet Content::guessEncoding() const
203 {
204 #ifdef ZXING_READERS
205 // assemble all blocks with unknown encoding
206 ByteArray input;
207 ForEachECIBlock([&](ECI eci, int begin, int end) {
208 if (eci == ECI::Unknown)
209 input.insert(input.end(), bytes.begin() + begin, bytes.begin() + end);
210 });
211
212 if (input.empty())
213 return CharacterSet::Unknown;
214
215 return TextDecoder::GuessEncoding(input.data(), input.size(), CharacterSet::ISO8859_1);
216 #else
217 return CharacterSet::Unknown;
218 #endif
219 }
220
221 ContentType Content::type() const
222 {
223 #ifdef ZXING_READERS
224 if (empty())
225 return ContentType::Text;
226
227 if (!canProcess())
228 return ContentType::UnknownECI;
229
230 if (symbology.aiFlag == AIFlag::GS1)
231 return ContentType::GS1;
232
233 // check for the absolut minimum of a ISO 15434 conforming message ("[)>" + RS + digit + digit)
234 if (bytes.size() > 6 && bytes.asString(0, 4) == "[)>\x1E" && std::isdigit(bytes[4]) && std::isdigit(bytes[5]))
235 return ContentType::ISO15434;
236
237 ECI fallback = ToECI(guessEncoding());
238 std::vector<bool> binaryECIs;
239 ForEachECIBlock([&](ECI eci, int begin, int end) {
240 if (eci == ECI::Unknown)
241 eci = fallback;
242 binaryECIs.push_back((!IsText(eci)
243 || (ToInt(eci) > 0 && ToInt(eci) < 28 && ToInt(eci) != 25
244 && std::any_of(bytes.begin() + begin, bytes.begin() + end,
245 [](auto c) { return c < 0x20 && c != 0x9 && c != 0xa && c != 0xd; }))));
246 });
247
248 if (!Contains(binaryECIs, true))
249 return ContentType::Text;
250 if (!Contains(binaryECIs, false))
251 return ContentType::Binary;
252
253 return ContentType::Mixed;
254 #else
255 //TODO: replace by proper construction from encoded data from within zint
256 return ContentType::Text;
257 #endif
258 }
259
260 } // namespace ZXing