Mercurial > hgrepos > Python2 > PyMuPDF
comparison mupdf-source/thirdparty/tesseract/sw.cpp @ 2:b50eed0cc0ef upstream
ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4.
The directory name has changed: no version number in the expanded directory now.
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:43:07 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:1d09e1dec1d9 | 2:b50eed0cc0ef |
|---|---|
| 1 void build(Solution &s) | |
| 2 { | |
| 3 auto &tess = s.addProject("google.tesseract", "main"); | |
| 4 tess += Git("https://github.com/tesseract-ocr/tesseract", "", "{v}"); | |
| 5 | |
| 6 auto cppstd = cpp17; | |
| 7 | |
| 8 auto &libtesseract = tess.addTarget<LibraryTarget>("libtesseract"); | |
| 9 { | |
| 10 libtesseract.setChecks("libtesseract"); | |
| 11 | |
| 12 libtesseract.PackageDefinitions = true; | |
| 13 | |
| 14 libtesseract += cppstd; | |
| 15 | |
| 16 libtesseract += "TESS_API"_api; | |
| 17 libtesseract += "include/.*"_rr; | |
| 18 libtesseract += "src/.+/.*"_rr; | |
| 19 libtesseract -= "src/lstm/.*\\.cc"_rr; | |
| 20 libtesseract -= "src/training/.*"_rr; | |
| 21 | |
| 22 libtesseract.Public += "include"_idir; | |
| 23 libtesseract.Protected += | |
| 24 "src/ccmain"_id, | |
| 25 "src/api"_id, | |
| 26 "src/dict"_id, | |
| 27 "src/viewer"_id, | |
| 28 "src/wordrec"_id, | |
| 29 "src/ccstruct"_id, | |
| 30 "src/cutil"_id, | |
| 31 "src/textord"_id, | |
| 32 "src/ccutil"_id, | |
| 33 "src/lstm"_id, | |
| 34 "src/classify"_id, | |
| 35 "src/arch"_id, | |
| 36 "src/training"_id; | |
| 37 | |
| 38 if (libtesseract.getCompilerType() == CompilerType::MSVC || | |
| 39 libtesseract.getCompilerType() == CompilerType::ClangCl) | |
| 40 { | |
| 41 libtesseract += "__SSE4_1__"_def; | |
| 42 libtesseract.CompileOptions.push_back("-arch:AVX2"); | |
| 43 | |
| 44 // openmp | |
| 45 //if (libtesseract.getOptions()["openmp"] == "true") | |
| 46 if (0) | |
| 47 { | |
| 48 if (libtesseract.getCompilerType() == CompilerType::MSVC) | |
| 49 libtesseract.CompileOptions.push_back("-openmp"); | |
| 50 else | |
| 51 libtesseract.CompileOptions.push_back("-fopenmp"); | |
| 52 libtesseract += "_OPENMP=201107"_def; | |
| 53 if (libtesseract.getBuildSettings().Native.ConfigurationType == ConfigurationType::Debug) | |
| 54 libtesseract += "vcompd.lib"_slib; | |
| 55 else | |
| 56 libtesseract += "vcomp.lib"_slib; | |
| 57 } | |
| 58 } | |
| 59 | |
| 60 auto win_or_mingw = | |
| 61 libtesseract.getBuildSettings().TargetOS.Type == OSType::Windows || | |
| 62 libtesseract.getBuildSettings().TargetOS.Type == OSType::Mingw | |
| 63 ; | |
| 64 | |
| 65 // check fma flags | |
| 66 libtesseract -= "src/arch/dotproductfma.cpp"; | |
| 67 // check arch (arm) | |
| 68 libtesseract -= "src/arch/dotproductneon.cpp"; | |
| 69 | |
| 70 if (libtesseract.getBuildSettings().TargetOS.Type != OSType::Windows && | |
| 71 libtesseract.getBuildSettings().TargetOS.Arch != ArchType::aarch64) | |
| 72 { | |
| 73 libtesseract["src/arch/dotproductavx.cpp"].args.push_back("-mavx"); | |
| 74 libtesseract["src/arch/dotproductavx512.cpp"].args.push_back("-mavx512f"); | |
| 75 libtesseract["src/arch/dotproductsse.cpp"].args.push_back("-msse4.1"); | |
| 76 libtesseract["src/arch/intsimdmatrixsse.cpp"].args.push_back("-msse4.1"); | |
| 77 libtesseract["src/arch/intsimdmatrixavx2.cpp"].args.push_back("-mavx2"); | |
| 78 } | |
| 79 if (!win_or_mingw) | |
| 80 { | |
| 81 #if SW_MODULE_ABI_VERSION > 29 | |
| 82 if (!libtesseract.getBuildSettings().TargetOS.Android) | |
| 83 #endif | |
| 84 libtesseract += "pthread"_slib; | |
| 85 } | |
| 86 if (libtesseract.getBuildSettings().TargetOS.Arch == ArchType::aarch64) | |
| 87 { | |
| 88 libtesseract += "src/arch/dotproductneon.cpp"; | |
| 89 } | |
| 90 | |
| 91 libtesseract.Public += "HAVE_CONFIG_H"_d; | |
| 92 libtesseract.Public += "_SILENCE_STDEXT_HASH_DEPRECATION_WARNINGS=1"_d; | |
| 93 libtesseract.Public += "HAVE_LIBARCHIVE"_d; | |
| 94 | |
| 95 libtesseract.Public += "org.sw.demo.danbloomberg.leptonica"_dep; | |
| 96 libtesseract.Public += "org.sw.demo.libarchive.libarchive"_dep; | |
| 97 | |
| 98 if (win_or_mingw) | |
| 99 { | |
| 100 libtesseract.Public += "ws2_32.lib"_slib; | |
| 101 libtesseract.Protected += "NOMINMAX"_def; | |
| 102 } | |
| 103 | |
| 104 if (libtesseract.getCompilerType() == CompilerType::MSVC) | |
| 105 libtesseract.Protected.CompileOptions.push_back("-utf-8"); | |
| 106 | |
| 107 libtesseract.Variables["TESSERACT_MAJOR_VERSION"] = libtesseract.Variables["PACKAGE_MAJOR_VERSION"]; | |
| 108 libtesseract.Variables["TESSERACT_MINOR_VERSION"] = libtesseract.Variables["PACKAGE_MINOR_VERSION"]; | |
| 109 libtesseract.Variables["TESSERACT_MICRO_VERSION"] = libtesseract.Variables["PACKAGE_PATCH_VERSION"]; | |
| 110 libtesseract.Variables["TESSERACT_VERSION_STR"] = "master"; | |
| 111 libtesseract.configureFile("include/tesseract/version.h.in", "tesseract/version.h"); | |
| 112 } | |
| 113 | |
| 114 // | |
| 115 auto &tesseract = tess.addExecutable("tesseract"); | |
| 116 { | |
| 117 tesseract += cppstd; | |
| 118 tesseract += "src/tesseract.cpp"; | |
| 119 tesseract += libtesseract; | |
| 120 } | |
| 121 | |
| 122 auto &svpaint = tess.addExecutable("svpaint"); | |
| 123 { | |
| 124 svpaint += cppstd; | |
| 125 svpaint += "src/svpaint.cpp"; | |
| 126 svpaint += libtesseract; | |
| 127 } | |
| 128 | |
| 129 auto &training = tess.addDirectory("training"); | |
| 130 | |
| 131 // | |
| 132 auto &common_training = training.addLibrary("common_training"); | |
| 133 { | |
| 134 common_training += "TESS_COMMON_TRAINING_API"_api; | |
| 135 common_training += cppstd; | |
| 136 common_training += "src/training/common/.*"_rr; | |
| 137 common_training.Public += "src/training/common"_idir; | |
| 138 common_training.Public += libtesseract; | |
| 139 } | |
| 140 | |
| 141 // | |
| 142 auto &unicharset_training = training.addLibrary("unicharset_training"); | |
| 143 { | |
| 144 unicharset_training += "TESS_UNICHARSET_TRAINING_API"_api; | |
| 145 unicharset_training += cppstd; | |
| 146 unicharset_training += "src/training/unicharset/.*"_rr; | |
| 147 unicharset_training.Public += "src/training/unicharset"_idir; | |
| 148 unicharset_training.Public += common_training; | |
| 149 unicharset_training.Public += "org.sw.demo.unicode.icu.i18n"_dep; | |
| 150 | |
| 151 auto win_or_mingw = | |
| 152 unicharset_training.getBuildSettings().TargetOS.Type == OSType::Windows || | |
| 153 unicharset_training.getBuildSettings().TargetOS.Type == OSType::Mingw | |
| 154 ; | |
| 155 if (!win_or_mingw) | |
| 156 unicharset_training += "pthread"_slib; | |
| 157 } | |
| 158 | |
| 159 // | |
| 160 #define ADD_EXE(n, ...) \ | |
| 161 auto &n = training.addExecutable(#n); \ | |
| 162 n += cppstd; \ | |
| 163 n += "src/training/" #n ".*"_rr; \ | |
| 164 n.Public += __VA_ARGS__; \ | |
| 165 n | |
| 166 | |
| 167 ADD_EXE(ambiguous_words, common_training); | |
| 168 ADD_EXE(classifier_tester, common_training); | |
| 169 ADD_EXE(combine_lang_model, unicharset_training); | |
| 170 ADD_EXE(combine_tessdata, common_training); | |
| 171 ADD_EXE(cntraining, common_training); | |
| 172 ADD_EXE(dawg2wordlist, common_training); | |
| 173 ADD_EXE(mftraining, common_training) += "src/training/mergenf.*"_rr; | |
| 174 ADD_EXE(shapeclustering, common_training); | |
| 175 ADD_EXE(unicharset_extractor, unicharset_training); | |
| 176 ADD_EXE(wordlist2dawg, common_training); | |
| 177 ADD_EXE(lstmeval, unicharset_training); | |
| 178 ADD_EXE(lstmtraining, unicharset_training); | |
| 179 ADD_EXE(set_unicharset_properties, unicharset_training); | |
| 180 ADD_EXE(merge_unicharsets, common_training); | |
| 181 | |
| 182 // | |
| 183 auto &pango_training = training.addLibrary("pango_training"); | |
| 184 { | |
| 185 pango_training += "TESS_PANGO_TRAINING_API"_api; | |
| 186 pango_training += cppstd; | |
| 187 pango_training += "src/training/pango/.*"_rr; | |
| 188 pango_training.Public += "src/training/pango"_idir; | |
| 189 pango_training.Public += unicharset_training; | |
| 190 pango_training.Public += "org.sw.demo.gnome.pango.pangocairo"_dep; | |
| 191 } | |
| 192 | |
| 193 ADD_EXE(text2image, pango_training); | |
| 194 { | |
| 195 text2image += cppstd; | |
| 196 text2image += | |
| 197 "src/training/degradeimage.cpp", | |
| 198 "src/training/degradeimage.h", | |
| 199 "src/training/text2image.cpp" | |
| 200 ; | |
| 201 } | |
| 202 | |
| 203 if (!s.getExternalVariables()["with-tests"]) | |
| 204 return; | |
| 205 | |
| 206 // tests | |
| 207 { | |
| 208 auto &test = tess.addDirectory("test"); | |
| 209 test.Scope = TargetScope::Test; | |
| 210 | |
| 211 String skipped_tests_str; | |
| 212 if (s.getExternalVariables()["skip-tests"]) | |
| 213 skipped_tests_str = s.getExternalVariables()["skip-tests"].getValue(); | |
| 214 auto skipped_tests = split_string(skipped_tests_str, ","); | |
| 215 | |
| 216 auto add_test = [&test, &s, &cppstd, &libtesseract, &pango_training, &skipped_tests](const String &name) -> decltype(auto) | |
| 217 { | |
| 218 auto &t = test.addTarget<ExecutableTarget>(name); | |
| 219 t += cppstd; | |
| 220 t += FileRegex("unittest", name + "_test.*", false); | |
| 221 t += "unittest"_idir; | |
| 222 | |
| 223 t += "SW_TESTING"_def; | |
| 224 | |
| 225 auto datadir = test.SourceDir / "tessdata_unittest"; | |
| 226 if (s.getExternalVariables()["test-data-dir"]) | |
| 227 datadir = fs::current_path() / s.getExternalVariables()["test-data-dir"].getValue(); | |
| 228 t += Definition("TESSBIN_DIR=\"" + ""s + "\""); | |
| 229 | |
| 230 t += Definition("TESTING_DIR=\"" + to_printable_string(normalize_path(test.SourceDir / "test/testing")) + "\""); | |
| 231 t += Definition("TESTDATA_DIR=\"" + to_printable_string(normalize_path(test.SourceDir / "test/testdata")) + "\""); | |
| 232 | |
| 233 t += Definition("LANGDATA_DIR=\"" + to_printable_string(normalize_path(datadir / "langdata_lstm")) + "\""); | |
| 234 t += Definition("TESSDATA_DIR=\"" + to_printable_string(normalize_path(datadir / "tessdata")) + "\""); | |
| 235 t += Definition("TESSDATA_BEST_DIR=\"" + to_printable_string(normalize_path(datadir / "tessdata_best")) + "\""); | |
| 236 | |
| 237 // we push all deps to all tests simplify things | |
| 238 t += pango_training; | |
| 239 t += "org.sw.demo.google.googletest.gmock.main"_dep; | |
| 240 t += "org.sw.demo.google.googletest.gtest.main"_dep; | |
| 241 | |
| 242 if (t.getCompilerType() == CompilerType::MSVC) | |
| 243 t.CompileOptions.push_back("-utf-8"); | |
| 244 | |
| 245 auto win_or_mingw = | |
| 246 t.getBuildSettings().TargetOS.Type == OSType::Windows || | |
| 247 t.getBuildSettings().TargetOS.Type == OSType::Mingw | |
| 248 ; | |
| 249 if (!win_or_mingw) | |
| 250 t += "pthread"_slib; | |
| 251 | |
| 252 auto tst = libtesseract.addTest(t, name); | |
| 253 for (auto &st : skipped_tests) | |
| 254 { | |
| 255 std::regex r(st); | |
| 256 if (std::regex_match(name, r)) | |
| 257 { | |
| 258 tst.skip(true); | |
| 259 break; | |
| 260 } | |
| 261 } | |
| 262 | |
| 263 return t; | |
| 264 }; | |
| 265 | |
| 266 Strings tests | |
| 267 { | |
| 268 "apiexample", | |
| 269 "applybox", | |
| 270 "baseapi", | |
| 271 "baseapi_thread", | |
| 272 "bitvector", | |
| 273 "capiexample", | |
| 274 "capiexample_c", | |
| 275 "cleanapi", | |
| 276 "colpartition", | |
| 277 "commandlineflags", | |
| 278 "denorm", | |
| 279 "equationdetect", | |
| 280 "fileio", | |
| 281 "heap", | |
| 282 "imagedata", | |
| 283 "indexmapbidi", | |
| 284 "intfeaturemap", | |
| 285 "intsimdmatrix", | |
| 286 "lang_model", | |
| 287 "layout", | |
| 288 "ligature_table", | |
| 289 "linlsq", | |
| 290 "list", | |
| 291 "lstm_recode", | |
| 292 "lstm_squashed", | |
| 293 "lstm", | |
| 294 "lstmtrainer", | |
| 295 "loadlang", | |
| 296 "mastertrainer", | |
| 297 "matrix", | |
| 298 "networkio", | |
| 299 "normstrngs", | |
| 300 "nthitem", | |
| 301 "osd", | |
| 302 "pagesegmode", | |
| 303 "pango_font_info", | |
| 304 "paragraphs", | |
| 305 "params_model", | |
| 306 "progress", | |
| 307 "qrsequence", | |
| 308 "recodebeam", | |
| 309 "rect", | |
| 310 "resultiterator", | |
| 311 "scanutils", | |
| 312 "shapetable", | |
| 313 "stats", | |
| 314 "stringrenderer", | |
| 315 "stridemap", | |
| 316 "tablefind", | |
| 317 "tablerecog", | |
| 318 "tabvector", | |
| 319 "textlineprojection", | |
| 320 "tfile", | |
| 321 "unichar", | |
| 322 "unicharcompress", | |
| 323 "unicharset", | |
| 324 "validate_grapheme", | |
| 325 "validate_indic", | |
| 326 "validate_khmer", | |
| 327 "validate_myanmar", | |
| 328 "validator", | |
| 329 }; | |
| 330 for (auto t : tests) | |
| 331 add_test(t); | |
| 332 auto &dt = add_test("dawg"); | |
| 333 dt += Definition("wordlist2dawg_prog=\"" + to_printable_string(normalize_path(wordlist2dawg.getOutputFile())) + "\""); | |
| 334 dt += Definition("dawg2wordlist_prog=\"" + to_printable_string(normalize_path(dawg2wordlist.getOutputFile())) + "\""); | |
| 335 | |
| 336 auto &tw = add_test("tatweel"); | |
| 337 tw += "unittest/util/.*"_rr; | |
| 338 tw += "unittest/third_party/.*"_rr; | |
| 339 tw -= "unittest/third_party/googletest/.*"_rr; | |
| 340 } | |
| 341 } | |
| 342 | |
| 343 void check(Checker &c) | |
| 344 { | |
| 345 auto &s = c.addSet("libtesseract"); | |
| 346 s.checkFunctionExists("getline"); | |
| 347 s.checkIncludeExists("dlfcn.h"); | |
| 348 s.checkIncludeExists("inttypes.h"); | |
| 349 s.checkIncludeExists("memory.h"); | |
| 350 s.checkIncludeExists("stdint.h"); | |
| 351 s.checkIncludeExists("stdlib.h"); | |
| 352 s.checkIncludeExists("string.h"); | |
| 353 s.checkIncludeExists("sys/stat.h"); | |
| 354 s.checkIncludeExists("sys/types.h"); | |
| 355 s.checkIncludeExists("tiffio.h"); | |
| 356 s.checkIncludeExists("unistd.h"); | |
| 357 s.checkTypeSize("long long int"); | |
| 358 s.checkTypeSize("size_t"); | |
| 359 s.checkTypeSize("void *"); | |
| 360 s.checkTypeSize("wchar_t"); | |
| 361 { | |
| 362 auto &c = s.checkSymbolExists("snprintf"); | |
| 363 c.Parameters.Includes.push_back("stdio.h"); | |
| 364 } | |
| 365 } | |
| 366 |
