diff mupdf-source/thirdparty/tesseract/.github/workflows/cmake.yml @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/.github/workflows/cmake.yml	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,154 @@
+name: cmake
+# cmake build of tesseract and training tools on ubuntu and macOS homebrew using Ninja.
+# test command line version of tesseract. run basicapitest.
+on:
+  #push:
+  schedule:
+    - cron: 0 21 * * *
+
+jobs:
+  basictests:
+    name: ${{ matrix.config.name }}
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - { name: macos-14-clang-15-cmake, os: macos-14, cxx: clang++ } # default
+          - { name: macos-14-gcc-14-cmake, os: macos-14, cxx: g++-14 } #installed
+          - { name: macos-15-clang-cmake, os: macos-15, cxx: clang++ } # default
+
+          - { name: ubuntu-22.04-clang-15-cmake, os: ubuntu-22.04, cxx: clang++-15 } #installed
+
+          - { name: ubuntu-22.04-gcc-12-cmake, os: ubuntu-22.04, cxx: g++-12 } #installed
+          - { name: ubuntu-22.04-gcc-11-cmake, os: ubuntu-22.04, cxx: g++-11 } #installed
+          - { name: ubuntu-20.04-gcc-10-cmake, os: ubuntu-20.04, cxx: g++-10 } #installed
+          - { name: ubuntu-20.04-gcc-9-cmake, os: ubuntu-20.04, cxx: g++-9 } #installed
+
+    steps:
+      - name: Install compilers on Linux
+        run: |
+             sudo apt-get update
+             sudo apt-get install ${{ matrix.config.cxx }} -y
+        if: runner.os == 'Linux'
+
+      - name: Install dependencies on Linux
+        run: |
+           sudo apt-get install autoconf libleptonica-dev -y
+           sudo apt-get install libarchive-dev libcurl4-openssl-dev -y
+           sudo apt-get install libpango1.0-dev -y
+           sudo apt-get install cabextract -y
+           sudo apt-get install ninja-build -y
+           cmake --version
+        if: runner.os == 'Linux'
+
+      - name: Install dependencies on macOS
+        run: |
+           brew install autoconf automake
+           brew install leptonica
+           # brew install libarchive
+           brew install pango
+           brew install icu4c && brew link icu4c
+           brew install cabextract
+           brew install ninja
+           ninja --version
+           cmake --version
+           clang++ --version
+           g++ --version
+        if: runner.os == 'macOS'
+
+      - name: Checkout Source
+        uses: actions/checkout@v4
+        with:
+             submodules: recursive
+
+      - name: Configure Tesseract (Linux)
+        run: |
+             mkdir build
+             mkdir inst
+             cmake \
+               -S . \
+               -B build \
+               -G Ninja \
+               -DCMAKE_BUILD_TYPE=Release \
+               -DOPENMP_BUILD=OFF \
+               -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
+               -DCMAKE_INSTALL_PREFIX:PATH=inst
+        if: runner.os == 'Linux'
+
+      - name: Configure Tesseract (macOS)
+        shell: bash
+        run: |
+             set -e
+             export PKG_CONFIG_PATH=$(brew --prefix)/opt/icu4c/lib/pkgconfig:$(brew --prefix)/opt/libarchive/lib/pkgconfig:/$(brew --prefix)/opt/libffi/lib/pkgconfig:$PKG_CONFIG_PATH
+             export LDFLAGS="-L/usr/local/opt/icu4c/lib"
+             export CPPFLAGS="-I/usr/local/opt/icu4c/include"
+             mkdir build
+             mkdir inst
+             cmake \
+               -S . \
+               -B build \
+               -G Ninja \
+               -DCMAKE_BUILD_TYPE=Release \
+               -DOPENMP_BUILD=OFF \
+               -DCMAKE_CXX_COMPILER=${{ matrix.config.cxx }} \
+               -DCMAKE_INSTALL_PREFIX:PATH=inst
+        if: runner.os == 'macOS'
+
+      - name: Build Tesseract
+        run: |
+             cmake --build build --config Release --target install
+
+      - name: Display Tesseract Version
+        run: |
+             build/inst/bin/tesseract -v
+
+      - name: Display Training Tools Version
+        run: |
+             build/inst/bin/lstmtraining -v
+             build/inst/bin/text2image -v
+
+      - name: Download fonts, tessdata and langdata required for tests
+        run: |
+             git clone https://github.com/egorpugin/tessdata tessdata_unittest
+             cp tessdata_unittest/fonts/* test/testing/
+             mv tessdata_unittest/* ../
+
+      - name: List languages in different tessdata-dir
+        run: |
+             build/inst/bin/tesseract  --list-langs --tessdata-dir ../tessdata
+             build/inst/bin/tesseract  --list-langs --tessdata-dir ../tessdata_best
+             build/inst/bin/tesseract  --list-langs --tessdata-dir ../tessdata_fast
+
+      - name: Run Tesseract on test images in different languages
+        run: |
+             build/inst/bin/tesseract test/testing/phototest.tif - --oem 1  --tessdata-dir ../tessdata
+             build/inst/bin/tesseract test/testing/raaj.tif - -l hin --oem 1   --tessdata-dir ../tessdata
+             build/inst/bin/tesseract test/testing/viet.tif - -l vie --oem 1   --tessdata-dir ../tessdata
+             build/inst/bin/tesseract test/testing/hebrew.png - -l heb --oem 1   --tessdata-dir ../tessdata
+             build/inst/bin/tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
+             build/inst/bin/tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ../tessdata
+
+      - name: Build and run basicapitest (Linux)
+        run: |
+             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$PKG_CONFIG_PATH"
+             cd test
+             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libarchive libcurl) -pthread -std=c++17
+             ./basicapitest
+        if: runner.os == 'Linux'
+
+      - name: Build and run basicapitest (macOS)
+        run: |
+             export "PKG_CONFIG_PATH=$GITHUB_WORKSPACE/build/inst/lib/pkgconfig/:$(brew --prefix)/opt/libarchive/lib/pkgconfig:$(brew --prefix)/Library/Homebrew/os/mac/pkgconfig/11:$PKG_CONFIG_PATH"
+             cd test
+             ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp "-I$GITHUB_WORKSPACE/build/inst/include" "-L$GITHUB_WORKSPACE/build/inst/lib" $(pkg-config --cflags --libs tesseract lept libcurl) -pthread -std=c++17
+             ./basicapitest
+        if: runner.os == 'macOS'
+
+      - name: Display Compiler Version
+        run: |
+             ${{ matrix.config.cxx }} --version
+             pwd
+             ls -la
+             # git log -3 --pretty=format:'%h %ad %s | %an'
+        if: always()