diff mupdf-source/thirdparty/tesseract/.github/workflows/autotools.yml @ 2:b50eed0cc0ef upstream

ADD: MuPDF v1.26.7: the MuPDF source as downloaded by a default build of PyMuPDF 1.26.4. The directory name has changed: no version number in the expanded directory now.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:43:07 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mupdf-source/thirdparty/tesseract/.github/workflows/autotools.yml	Mon Sep 15 11:43:07 2025 +0200
@@ -0,0 +1,129 @@
+name: autotools
+# autotools build of tesseract and training tools on Ubuntu.
+# run command line tests, basicapitest and unittests. '--disable-openmp'
+on:
+  #push:
+  schedule:
+    - cron: 0 20 * * *
+jobs:
+
+  linux:
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - { name: ubuntu-22.04-clang-15-autotools, os: ubuntu-22.04, cxx: clang++-15 } #installed
+
+          - { name: ubuntu-22.04-gcc-12-autotools, os: ubuntu-22.04, cxx: g++-12 } #installed
+          - { name: ubuntu-22.04-gcc-11-autotools, os: ubuntu-22.04, cxx: g++-11 } #installed
+          - { name: ubuntu-20.04-gcc-10-autotools, os: ubuntu-20.04, cxx: g++-10 } #installed
+          - { name: ubuntu-20.04-gcc-9-autotools, os: ubuntu-20.04, cxx: g++-9 } #installed
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: recursive
+
+    - name: Download fonts, tessdata and langdata required for tests
+      run: |
+           git clone https://github.com/egorpugin/tessdata tessdata_unittest
+           cp tessdata_unittest/fonts/* test/testing/
+           mv tessdata_unittest/* ../
+
+    - name: Install Compiler
+      run: |
+           sudo apt-get update
+           sudo apt-get install -y ${{ matrix.config.cxx }}
+
+    - name: Install dependencies
+      run: |
+           sudo apt-get install autoconf libleptonica-dev -y
+           sudo apt-get install libpango1.0-dev -y
+           sudo apt-get install cabextract libarchive-dev -y
+           sudo apt-get install libcurl4-openssl-dev libcurl4 curl -y
+
+    - name: Setup Tesseract
+      run: |
+           ./autogen.sh
+
+    - name: Configure Tesseract
+      run: |
+           ./configure '--disable-shared' '--disable-openmp' '--disable-doc' 'CXX=${{ matrix.config.cxx }}' 'CXXFLAGS=-g -O2'
+
+    - name: Make and Install Tesseract
+      run: |
+           make -j 8
+           sudo make install install
+
+    - name: Make and Install Training Tools
+      run: |
+           make training -j 8
+           sudo make install training-install
+
+    - name: Make and run Unit Tests
+      run: |
+           make check
+
+    - name: Display Version for tesseract, lstmtraining, text2image
+      run: |
+           tesseract -v
+           lstmtraining -v
+           text2image -v
+      if: success() || failure()
+
+    - name: List languages in different test tessdata-dir
+      run: |
+           tesseract  --list-langs --tessdata-dir ../tessdata
+           tesseract  --list-langs --tessdata-dir ../tessdata_best
+           tesseract  --list-langs --tessdata-dir ../tessdata_fast
+
+    - name: Run Tesseract on test images in different languages
+      run: |
+           tesseract test/testing/phototest.tif - --oem 1  --tessdata-dir ../tessdata
+           tesseract test/testing/raaj.tif - -l hin --oem 1   --tessdata-dir ../tessdata
+           tesseract test/testing/viet.tif - -l vie --oem 1   --tessdata-dir ../tessdata
+           tesseract test/testing/hebrew.png - -l heb --oem 1   --tessdata-dir ../tessdata
+           tesseract test/testing/eurotext.tif - -l fra --oem 1 --tessdata-dir ../tessdata_best
+           tesseract test/testing/arabic.tif - -l ara --oem 1 --psm 6  --tessdata-dir ../tessdata
+
+    - name: Run Tesseract basicapitest
+      run: |
+           export "PKG_CONFIG_PATH=/usr/local/lib/pkgconfig"
+           cd test
+           ${{ matrix.config.cxx }} -o basicapitest testing/basicapitest.cpp -I/usr/local/include -L/usr/local/lib `pkg-config --cflags --libs tesseract lept ` -pthread -std=c++17
+           ./basicapitest
+
+    - name: Setup for Tesseract benchmark using image from issue 263 fifteen times in a list file
+      run: |
+           wget -O i263_speed.jpg https://cloud.githubusercontent.com/assets/9968625/13674495/ac261db4-e6ab-11e5-9b4a-ad91d5b4ff87.jpg
+           printf 'i263_speed.jpg\n%.0s' {1..15} > benchmarks.list
+           lscpu
+           free
+           tesseract -v
+
+    - name: Run Tesseract using image from issue 263 with tessdata_fast
+      run: |
+           time tesseract benchmarks.list - --tessdata-dir ../tessdata_fast > /dev/null 2>&1
+           echo "tessdata_fast - disable-openmp"
+
+    - name: Run Tesseract using image from issue 263 with tessdata_best
+      run: |
+           time tesseract benchmarks.list - --tessdata-dir ../tessdata_best > /dev/null 2>&1
+           echo "tessdata_best - disable-openmp"
+
+    - name: Run Tesseract using image from issue 263 with tessdata_fast
+      run: |
+           time tesseract benchmarks.list - --tessdata-dir ../tessdata > /dev/null 2>&1
+           echo "tessdata - disable-openmp"
+
+    - name: Display Compiler Version
+      run: |
+           ${{ matrix.config.cxx }} --version
+           git log -3 --pretty=format:'%h %ad %s | %an'
+      if: always()
+
+    - name: Display Unit Tests Report
+      run: |
+           cat test-suite.log
+      if: always()