From 0987346bb145e97b11b9ecebed8aff9f5206a75c Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 6 Aug 2023 08:52:23 +0200 Subject: [PATCH] BUG: JPX image without ColorSpace (#2062) Closes #2061 --- pypdf/filters.py | 7 +++++++ tests/test_filters.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pypdf/filters.py b/pypdf/filters.py index 82d2e0c9b..b2bffa385 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -721,6 +721,8 @@ def _get_imagemode( Image mode not taking into account mask(transparency) ColorInversion is required (like for some DeviceCMYK) """ + if isinstance(color_space, NullObject): + return "", False if isinstance(color_space, str): pass elif not isinstance(color_space, list): @@ -931,6 +933,9 @@ def _handle_jpx( extension = ".jp2" # mime_type = "image/x-jp2" img1 = Image.open(BytesIO(data), formats=("JPEG2000",)) mode, invert_color = _get_imagemode(color_space, colors, mode) + if mode == "": + mode = cast(mode_str_type, img1.mode) + invert_color = mode in ("CMYK",) if img1.mode == "RGBA" and mode == "RGB": mode = "RGBA" # we need to convert to the good mode @@ -1028,6 +1033,8 @@ def _handle_jpx( False, ) else: + if mode == "": + raise PdfReadError(f"ColorSpace field not found in {x_object_obj}") img, image_format, extension, invert_color = ( Image.frombytes(mode, size, data), "PNG", diff --git a/tests/test_filters.py b/tests/test_filters.py index 54afcfc0e..3bd9422d1 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -626,3 +626,18 @@ def test_singleton_device(): name = "pypdf_with_arr_deviceRGB.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) reader.pages[0].images[0] + + +@pytest.mark.enable_socket() +def test_jpx_no_spacecode(): + """From #2061""" + url = "https://github.com/py-pdf/pypdf/files/12253581/tt2.pdf" + name = "jpx_no_spacecode.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + im = reader.pages[0].images[0] + # create an object without filter and without colorspace + # just for coverage + del im.indirect_reference.get_object()["/Filter"] + with pytest.raises(PdfReadError) as exc: + reader.pages[0].images[0] + assert exc.value.args[0].startswith("ColorSpace field not found")