From 37b6b2eaaccc36392c4efe85ccb771a24e8c2adb Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Thu, 3 Aug 2023 20:18:46 +0200 Subject: [PATCH 1/3] BUG : JPX image wit no ColorSpace closes #2061 --- pypdf/filters.py | 7 +++++++ tests/test_filters.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/pypdf/filters.py b/pypdf/filters.py index 82d2e0c9b..e2d77b3dc 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -721,6 +721,8 @@ def _get_imagemode( Image mode not taking into account mask(transparency) ColorInversion is required (like for some DeviceCMYK) """ + if isinstance(color_space, NullObject): + return "", False if isinstance(color_space, str): pass elif not isinstance(color_space, list): @@ -931,6 +933,9 @@ def _handle_jpx( extension = ".jp2" # mime_type = "image/x-jp2" img1 = Image.open(BytesIO(data), formats=("JPEG2000",)) mode, invert_color = _get_imagemode(color_space, colors, mode) + if mode == "": + mode = img1.mode + invert_color = mode in ("CMYK",) if img1.mode == "RGBA" and mode == "RGB": mode = "RGBA" # we need to convert to the good mode @@ -1028,6 +1033,8 @@ def _handle_jpx( False, ) else: + if mode == "": + raise PdfReadError(f"ColorSpace field not found in {x_object_obj}") img, image_format, extension, invert_color = ( Image.frombytes(mode, size, data), "PNG", diff --git a/tests/test_filters.py b/tests/test_filters.py index 54afcfc0e..ca3d62c3f 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -626,3 +626,12 @@ def test_singleton_device(): name = "pypdf_with_arr_deviceRGB.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) reader.pages[0].images[0] + + +@pytest.mark.enable_socket() +def test_jpx_no_spacecode(): + """From #2061""" + url = "https://github.com/py-pdf/pypdf/files/12253581/tt2.pdf" + name = "jpx_no_spacecode.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + reader.pages[0].images[0] From d6e60cc68dea8d4739afb4b343e41c5510d6883b Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Thu, 3 Aug 2023 20:39:15 +0200 Subject: [PATCH 2/3] mypy --- pypdf/filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/filters.py b/pypdf/filters.py index e2d77b3dc..b2bffa385 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -934,7 +934,7 @@ def _handle_jpx( img1 = Image.open(BytesIO(data), formats=("JPEG2000",)) mode, invert_color = _get_imagemode(color_space, colors, mode) if mode == "": - mode = img1.mode + mode = cast(mode_str_type, img1.mode) invert_color = mode in ("CMYK",) if img1.mode == "RGBA" and mode == "RGB": mode = "RGBA" From 224d8b5f099175e0a9b3e10e54be79e2f3eea9d5 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Thu, 3 Aug 2023 22:25:11 +0200 Subject: [PATCH 3/3] coverage --- tests/test_filters.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_filters.py b/tests/test_filters.py index ca3d62c3f..3bd9422d1 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -634,4 +634,10 @@ def test_jpx_no_spacecode(): url = "https://github.com/py-pdf/pypdf/files/12253581/tt2.pdf" name = "jpx_no_spacecode.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) - reader.pages[0].images[0] + im = reader.pages[0].images[0] + # create an object without filter and without colorspace + # just for coverage + del im.indirect_reference.get_object()["/Filter"] + with pytest.raises(PdfReadError) as exc: + reader.pages[0].images[0] + assert exc.value.args[0].startswith("ColorSpace field not found")