From 0ca4d37a01b529377c0af6c72ebc5847b6a3fa45 Mon Sep 17 00:00:00 2001 From: pubpub-zz <4083478+pubpub-zz@users.noreply.github.com> Date: Sun, 10 Sep 2023 13:49:01 +0200 Subject: [PATCH] BUG: Cope with indirect objects in filters and remove deprecated code (#2177) closes #2158 closes #2159 --- pypdf/filters.py | 91 ++++++++++++++++++++----------------------- tests/test_filters.py | 9 ++--- 2 files changed, 46 insertions(+), 54 deletions(-) diff --git a/pypdf/filters.py b/pypdf/filters.py index f308a9010..59599e9f1 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -43,6 +43,7 @@ from ._utils import ( b_, deprecate_with_replacement, + deprecation_no_replacement, logger_warning, ord_, ) @@ -53,7 +54,7 @@ from .constants import ImageAttributes as IA from .constants import LzwFilterParameters as LZW from .constants import StreamAttributes as SA -from .errors import PdfReadError, PdfStreamError +from .errors import DeprecationError, PdfReadError, PdfStreamError from .generic import ( ArrayObject, DictionaryObject, @@ -93,7 +94,7 @@ class FlateDecode: @staticmethod def decode( data: bytes, - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> bytes: """ @@ -113,17 +114,15 @@ def decode( if "decodeParms" in kwargs: # deprecated deprecate_with_replacement("decodeParms", "parameters", "4.0.0") decode_parms = kwargs["decodeParms"] + if isinstance(decode_parms, ArrayObject): # type: ignore + raise DeprecationError("decode_parms as ArrayObject is depreciated") + str_data = decompress(data) predictor = 1 if decode_parms: try: - if isinstance(decode_parms, ArrayObject): - for decode_parm in decode_parms: - if "/Predictor" in decode_parm: - predictor = decode_parm["/Predictor"] - else: - predictor = decode_parms.get("/Predictor", 1) + predictor = decode_parms.get("/Predictor", 1) except (AttributeError, TypeError): # Type Error is NullObject pass # Usually an array with a null object was read # predictor 1 == no predictor @@ -131,24 +130,21 @@ def decode( # The /Columns param. has 1 as the default value; see ISO 32000, # ยง7.4.4.3 LZWDecode and FlateDecode Parameters, Table 8 DEFAULT_BITS_PER_COMPONENT = 8 - if isinstance(decode_parms, ArrayObject): + try: + columns = cast(int, decode_parms[LZW.COLUMNS].get_object()) # type: ignore + except (TypeError, KeyError): columns = 1 - bits_per_component = DEFAULT_BITS_PER_COMPONENT - for decode_parm in decode_parms: - if "/Columns" in decode_parm: - columns = decode_parm["/Columns"] - if LZW.BITS_PER_COMPONENT in decode_parm: - bits_per_component = decode_parm[LZW.BITS_PER_COMPONENT] - else: - columns = ( - 1 if decode_parms is None else decode_parms.get(LZW.COLUMNS, 1) - ) - colors = 1 if decode_parms is None else decode_parms.get(LZW.COLORS, 1) - bits_per_component = ( - decode_parms.get(LZW.BITS_PER_COMPONENT, DEFAULT_BITS_PER_COMPONENT) - if decode_parms - else DEFAULT_BITS_PER_COMPONENT + try: + colors = cast(int, decode_parms[LZW.COLORS].get_object()) # type: ignore + except (TypeError, KeyError): + colors = 1 + try: + bits_per_component = cast( + int, + decode_parms[LZW.BITS_PER_COMPONENT].get_object(), # type: ignore ) + except (TypeError, KeyError): + bits_per_component = DEFAULT_BITS_PER_COMPONENT # PNG predictor can vary by row and so is the lead byte on each row rowlength = ( @@ -259,7 +255,7 @@ class ASCIIHexDecode: @staticmethod def decode( data: Union[str, bytes], - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> bytes: """ @@ -278,9 +274,8 @@ def decode( Raises: PdfStreamError: """ - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] # noqa: F841 + # decode_parms is unused here + if isinstance(data, str): data = data.encode() retval = b"" @@ -321,7 +316,7 @@ class RunLengthDecode: @staticmethod def decode( data: bytes, - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> bytes: """ @@ -337,9 +332,8 @@ def decode( Raises: PdfStreamError: """ - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] # noqa: F841 + # decode_parms is unused here + lst = [] index = 0 while True: @@ -453,7 +447,7 @@ def decode(self) -> str: @staticmethod def decode( data: bytes, - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> str: """ @@ -466,9 +460,8 @@ def decode( Returns: decoded data. """ - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] # noqa: F841 + # decode_parms is unused here + return LZWDecode.Decoder(data).decode() @@ -478,12 +471,11 @@ class ASCII85Decode: @staticmethod def decode( data: Union[str, bytes], - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> bytes: - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] # noqa: F841 + # decode_parms is unused here + if isinstance(data, str): data = data.encode("ascii") group_index = b = 0 @@ -511,12 +503,10 @@ class DCTDecode: @staticmethod def decode( data: bytes, - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> bytes: - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] # noqa: F841 + # decode_parms is unused here return data @@ -524,12 +514,10 @@ class JPXDecode: @staticmethod def decode( data: bytes, - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, **kwargs: Any, ) -> bytes: - if "decodeParms" in kwargs: # deprecated - deprecate_with_replacement("decodeParms", "parameters", "4.0.0") - decode_parms = kwargs["decodeParms"] # noqa: F841 + # decode_parms is unused here return data @@ -591,13 +579,18 @@ def _get_parameters( @staticmethod def decode( data: bytes, - decode_parms: Union[None, ArrayObject, DictionaryObject] = None, + decode_parms: Optional[DictionaryObject] = None, height: int = 0, **kwargs: Any, ) -> bytes: + # decode_parms is unused here if "decodeParms" in kwargs: # deprecated deprecate_with_replacement("decodeParms", "parameters", "4.0.0") decode_parms = kwargs["decodeParms"] + if isinstance(decode_parms, ArrayObject): # deprecated + deprecation_no_replacement( + "decode_parms being an ArrayObject", removed_in="3.15.5" + ) parms = CCITTFaxDecode._get_parameters(decode_parms, height) img_size = len(data) diff --git a/tests/test_filters.py b/tests/test_filters.py index 9268186aa..d2765c86e 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -10,7 +10,7 @@ from PIL import Image from pypdf import PdfReader -from pypdf.errors import PdfReadError, PdfStreamError +from pypdf.errors import DeprecationError, PdfReadError, PdfStreamError from pypdf.filters import ( ASCII85Decode, ASCIIHexDecode, @@ -69,16 +69,15 @@ def test_flatedecode_unsupported_predictor(): codec.decode(codec.encode(s), DictionaryObject({"/Predictor": predictor})) -@pytest.mark.parametrize( - "params", [ArrayObject([]), ArrayObject([{"/Predictor": 1}]), "a"] -) +@pytest.mark.parametrize("params", [ArrayObject([]), ArrayObject([{"/Predictor": 1}])]) def test_flate_decode_decompress_with_array_params(params): """FlateDecode decode() method works correctly with array parameters.""" codec = FlateDecode() s = "" s = s.encode() encoded = codec.encode(s) - assert codec.decode(encoded, params) == s + with pytest.raises(DeprecationError): + assert codec.decode(encoded, params) == s @pytest.mark.parametrize(