Skip to content

Commit

Permalink
TST: Allow loading truncated images if required (#2586)
Browse files Browse the repository at this point in the history
* TST: Allow loading truncated images if required
  • Loading branch information
stefan6419846 authored Apr 6, 2024
1 parent 956fd03 commit ae0d27b
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 15 deletions.
22 changes: 22 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,25 @@ def test_csv_consistency():

# Ensure the urls are unique
assert len(pdfs) == len({pdf["url"] for pdf in pdfs})


class PILContext:
"""Allow changing the PIL/Pillow configuration for some limited scope."""

def __init__(self):
self._saved_load_truncated_images = False

def __enter__(self):
# Allow loading incomplete images.
from PIL import ImageFile
self._saved_load_truncated_images = ImageFile.LOAD_TRUNCATED_IMAGES
ImageFile.LOAD_TRUNCATED_IMAGES = True
return self

def __exit__(self, type_, value, traceback):
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = self._saved_load_truncated_images
if type_:
# Error.
return
return True
17 changes: 9 additions & 8 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from pypdf.generic import ArrayObject, DictionaryObject, NameObject, NumberObject

from . import get_data_from_url
from . import PILContext, get_data_from_url
from .test_encryption import HAS_AES
from .test_images import image_similarity

Expand Down Expand Up @@ -371,13 +371,14 @@ def test_tiff_predictor():
@pytest.mark.enable_socket()
def test_rgba():
"""Decode rgb with transparency"""
reader = PdfReader(BytesIO(get_data_from_url(name="tika-972174.pdf")))
data = reader.pages[0].images[0]
assert ".jp2" in data.name
similarity = image_similarity(
data.image, BytesIO(get_data_from_url(name="tika-972174_p0-im0.png"))
)
assert similarity > 0.99
with PILContext():
reader = PdfReader(BytesIO(get_data_from_url(name="tika-972174.pdf")))
data = reader.pages[0].images[0]
assert ".jp2" in data.name
similarity = image_similarity(
data.image, BytesIO(get_data_from_url(name="tika-972174_p0-im0.png"))
)
assert similarity > 0.99


@pytest.mark.enable_socket()
Expand Down
15 changes: 8 additions & 7 deletions tests/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
read_object,
)

from . import get_data_from_url, normalize_warnings
from . import PILContext, get_data_from_url, normalize_warnings

TESTS_ROOT = Path(__file__).parent.resolve()
PROJECT_ROOT = TESTS_ROOT.parent
Expand Down Expand Up @@ -672,12 +672,13 @@ def test_image_extraction(url, name):
if not root.exists():
root.mkdir()

for page in reader.pages:
for image in page.images:
filename = root / image.name
with open(filename, "wb") as img:
img.write(image.data)
images_extracted.append(filename)
with PILContext():
for page in reader.pages:
for image in page.images:
filename = root / image.name
with open(filename, "wb") as img:
img.write(image.data)
images_extracted.append(filename)

# Cleanup
do_cleanup = True # set this to False for manual inspection
Expand Down

0 comments on commit ae0d27b

Please sign in to comment.