Mercurial > hgrepos > Python2 > PyMuPDF
diff mupdf-source/scripts/pdftohtml.sh @ 3:2c135c81b16c
MERGE: upstream PyMuPDF 1.26.4 with MuPDF 1.26.7
| author | Franz Glasner <fzglas.hg@dom66.de> |
|---|---|
| date | Mon, 15 Sep 2025 11:44:09 +0200 |
| parents | b50eed0cc0ef |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mupdf-source/scripts/pdftohtml.sh Mon Sep 15 11:44:09 2025 +0200 @@ -0,0 +1,63 @@ +#!/bin/bash +# +# Convert PDF to hybrid HTML output with images and line art rendered to a +# background image, and text overlaid on top as absolutely positioned HTML +# text. + +input=$1 +out=${2:-out} +fmt=${3:-png} +dpi=${4:-96} + +scale=$(expr 72 '*' $dpi / 96) + +if test -f "$1" +then + echo Processing "$input" out=$out fmt=$fmt dpi=$dpi +else + echo "usage: pdftohtml.sh input.pdf output-stem image-format dpi" + echo " example: pdftohtml.sh input.pdf output png 96" + exit +fi + +title=$(basename "$input" | sed 's/.pdf$//') + +mutool convert -Oresolution=$dpi -o $out.html "$input" + +sed -i -e "/<head>/a<title>$title</title>" $out.html +sed -i -e "/^<div/s/page\([0-9]*\)\" style=\"/page\1\" style=\"background-image:url('$out\1.$fmt');/" $out.html + +mutool draw -K -r$dpi -o$out%d.png "$input" + +echo Converting to $fmt +for png in $out*.png +do + xxx=$(basename $png .png).$fmt + case $fmt in + png) + if command -v optipng >/dev/null + then + optipng -silent -strip all $png + fi + ;; + jpg) + if command -v mozjpeg >/dev/null + then + mozjpeg -outfile $xxx $png + else + convert -format $fmt $png $xxx + fi + ;; + webp) + if command -v cwebp >/dev/null + then + cwebp -quiet -o $xxx $png + else + convert -format $fmt $png $xxx + fi + ;; + *) + convert -format $fmt $png $xxx + ;; + esac +done
