comparison tests/test_extractimage.py @ 1:1d09e1dec1d9 upstream

ADD: PyMuPDF v1.26.4: the original sdist. It does not yet contain MuPDF. This normally will be downloaded when building PyMuPDF.
author Franz Glasner <fzglas.hg@dom66.de>
date Mon, 15 Sep 2025 11:37:51 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 1:1d09e1dec1d9
1 """
2 Extract images from a PDF file, confirm number of images found.
3 """
4 import os
5 import pymupdf
6
7 scriptdir = os.path.abspath(os.path.dirname(__file__))
8 filename = os.path.join(scriptdir, "resources", "joined.pdf")
9 known_image_count = 21
10
11
12 def test_extract_image():
13 doc = pymupdf.open(filename)
14
15 image_count = 1
16 for xref in range(1, doc.xref_length() - 1):
17 if doc.xref_get_key(xref, "Subtype")[1] != "/Image":
18 continue
19 img = doc.extract_image(xref)
20 if isinstance(img, dict):
21 image_count += 1
22
23 assert image_count == known_image_count # this number is know about the file
24
25 def test_2348():
26
27 pdf_path = f'{scriptdir}/test_2348.pdf'
28 document = pymupdf.open()
29 page = document.new_page(width=500, height=842)
30 rect = pymupdf.Rect(20, 20, 480, 820)
31 page.insert_image(rect, filename=f'{scriptdir}/resources/nur-ruhig.jpg')
32 page = document.new_page(width=500, height=842)
33 page.insert_image(rect, filename=f'{scriptdir}/resources/img-transparent.png')
34 document.ez_save(pdf_path)
35 document.close()
36
37 document = pymupdf.open(pdf_path)
38 page = document[0]
39 imlist = page.get_images()
40 image = document.extract_image(imlist[0][0])
41 jpeg_extension = image['ext']
42
43 page = document[1]
44 imlist = page.get_images()
45 image = document.extract_image(imlist[0][0])
46 png_extension = image['ext']
47
48 print(f'jpeg_extension={jpeg_extension!r} png_extension={png_extension!r}')
49 assert jpeg_extension == 'jpeg'
50 assert png_extension == 'png'
51
52 def test_delete_image():
53
54 doc = pymupdf.open(os.path.abspath(f'{__file__}/../../tests/resources/test_delete_image.pdf'))
55 page = doc[0]
56 xref = page.get_images()[0][0]
57 page.delete_image(xref)