diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dd3f32cb5..546a42b8f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,29 +27,29 @@ repos: - id: black args: [--target-version, py36] - repo: https://github.com/asottile/blacken-docs - rev: 1.15.0 + rev: 1.16.0 hooks: - id: blacken-docs additional_dependencies: [black==22.1.0] exclude: "docs/user/robustness.md" - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: 'v0.0.280' + rev: 'v0.0.285' hooks: - id: ruff args: ['--fix'] - repo: https://github.com/asottile/pyupgrade - rev: v3.9.0 + rev: v3.10.1 hooks: - id: pyupgrade args: [--py36-plus] - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 args: ["--ignore", "E,W,F"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.4.1' + rev: 'v1.5.1' hooks: - id: mypy files: ^pypdf/.* diff --git a/docs/user/encryption-decryption.md b/docs/user/encryption-decryption.md index 6ab5899f4..c8a8700cd 100644 --- a/docs/user/encryption-decryption.md +++ b/docs/user/encryption-decryption.md @@ -1,7 +1,7 @@ # Encryption and Decryption of PDFs -PDF encryption makes use of [`RC4`](https://en.wikipedia.org/wiki/RC4) and -[`AES`](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) algorithms +PDF encryption makes use of [`RC4`](https://en.wikipedia.org/wiki/RC4) and +[`AES`](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) algorithms with different key length. `pypdf` supports all of them until `PDF-2.0`, which is the latest PDF standard. diff --git a/pypdf/_page.py b/pypdf/_page.py index 124a025e1..6fb5feb92 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -703,7 +703,7 @@ def rotation(self) -> int: return rotate_obj if isinstance(rotate_obj, int) else rotate_obj.get_object() @rotation.setter - def rotation(self, r: Union[int, float]) -> None: + def rotation(self, r: float) -> None: self[NameObject(PG.ROTATE)] = NumberObject((((int(r) + 45) // 90) * 90) % 360) def transfer_rotation_to_content(self) -> None: diff --git a/pypdf/filters.py b/pypdf/filters.py index 303838114..a90e5651b 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -656,7 +656,7 @@ def decode( return tiff_header + data -def decode_stream_data(stream: Any) -> Union[str, bytes]: # utils.StreamObject +def decode_stream_data(stream: Any) -> bytes: # utils.StreamObject """ Decode the stream data based on the specified filters. diff --git a/pypdf/generic/_base.py b/pypdf/generic/_base.py index ac023af13..c26bf0af3 100644 --- a/pypdf/generic/_base.py +++ b/pypdf/generic/_base.py @@ -312,7 +312,7 @@ def get_object(self) -> Optional["PdfObject"]: def __repr__(self) -> str: return f"IndirectObject({self.idnum!r}, {self.generation!r}, {id(self.pdf)})" - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: return ( other is not None and isinstance(other, IndirectObject) @@ -321,7 +321,7 @@ def __eq__(self, other: Any) -> bool: and self.pdf is other.pdf ) - def __ne__(self, other: Any) -> bool: + def __ne__(self, other: object) -> bool: return not self.__eq__(other) def write_to_stream( diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 03cd848f5..5cf235517 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -785,7 +785,7 @@ def _reset_node_tree_relationship(child_obj: Any) -> None: class StreamObject(DictionaryObject): def __init__(self) -> None: - self.__data: Optional[str] = None + self._data: bytes = b"" self.decoded_self: Optional[DecodedStreamObject] = None def _clone( @@ -833,14 +833,6 @@ def decodedSelf(self, value: "DecodedStreamObject") -> None: # deprecated deprecation_with_replacement("decodedSelf", "decoded_self", "3.0.0") self.decoded_self = value - @property - def _data(self) -> Any: - return self.__data - - @_data.setter - def _data(self, value: Any) -> None: - self.__data = value - def write_to_stream( self, stream: StreamType, encryption_key: Union[None, str, bytes] = None ) -> None: @@ -1181,8 +1173,8 @@ def _read_inline_image(self, stream: StreamType) -> Dict[str, Any]: data.write(info) return {"settings": settings, "data": data.getvalue()} - @property - def _data(self) -> bytes: + @property # type: ignore + def _data(self) -> bytes: # type: ignore new_data = BytesIO() for operands, operator in self.operations: if operator == b"INLINE IMAGE": diff --git a/pypdf/pagerange.py b/pypdf/pagerange.py index 66169e61e..bd52e801b 100644 --- a/pypdf/pagerange.py +++ b/pypdf/pagerange.py @@ -12,7 +12,7 @@ from .errors import ParseError _INT_RE = r"(0|-?[1-9]\d*)" # A decimal int, don't allow "-0". -PAGE_RANGE_RE = "^({int}|({int}?(:{int}?(:{int}?)?)))$".format(int=_INT_RE) +PAGE_RANGE_RE = f"^({_INT_RE}|({_INT_RE}?(:{_INT_RE}?(:{_INT_RE}?)?)))$" # groups: 12 34 5 6 7 8 @@ -127,7 +127,7 @@ def indices(self, n: int) -> Tuple[int, int, int]: """ return self._slice.indices(n) - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: if not isinstance(other, PageRange): return False return self._slice == other._slice diff --git a/pypdf/papersizes.py b/pypdf/papersizes.py index 51aa2de59..105a641d8 100644 --- a/pypdf/papersizes.py +++ b/pypdf/papersizes.py @@ -1,8 +1,11 @@ """Helper to get paper sizes.""" -from collections import namedtuple +from typing import NamedTuple -Dimensions = namedtuple("Dimensions", ["width", "height"]) + +class Dimensions(NamedTuple): + width: int + height: int class PaperSize: diff --git a/pyproject.toml b/pyproject.toml index da821b5f2..efbf681b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -194,6 +194,9 @@ ignore = [ "PD011", # Use `.to_numpy()` instead of `.values` "FA102", # Missing `from __future__ import annotations`, but uses PEP 604 union "PERF203", # `try`-`except` within a loop incurs performance overhead + "PYI042", # Type alias `mode_str_type` should be CamelCase + # Ruff bug + "PT014", # Duplicate of test case at index 1 in `@pytest_mark.parametrize` ] [tool.ruff.per-file-ignores] diff --git a/requirements/ci-3.11.txt b/requirements/ci-3.11.txt index 85a7fb8f4..973294f32 100644 --- a/requirements/ci-3.11.txt +++ b/requirements/ci-3.11.txt @@ -6,9 +6,9 @@ # attrs==23.1.0 # via flake8-bugbear -coverage==7.2.7 +coverage==7.3.0 # via -r requirements/ci.in -flake8==6.0.0 +flake8==6.1.0 # via # -r requirements/ci.in # flake8-bugbear @@ -25,7 +25,7 @@ iniconfig==2.0.0 # via pytest mccabe==0.7.0 # via flake8 -mypy==1.4.1 +mypy==1.5.1 # via -r requirements/ci.in mypy-extensions==1.0.0 # via mypy @@ -39,13 +39,13 @@ pluggy==1.2.0 # via pytest py-cpuinfo==9.0.0 # via pytest-benchmark -pycodestyle==2.10.0 +pycodestyle==2.11.0 # via # flake8 # flake8-print pycryptodome==3.18.0 # via -r requirements/ci.in -pyflakes==3.0.1 +pyflakes==3.1.0 # via flake8 pytest==7.4.0 # via @@ -59,9 +59,9 @@ pytest-socket==0.6.0 # via -r requirements/ci.in pytest-timeout==2.1.0 # via -r requirements/ci.in -ruff==0.0.280 +ruff==0.0.285 # via -r requirements/ci.in -typeguard==4.0.1 +typeguard==4.1.2 # via -r requirements/ci.in types-dataclasses==0.6.6 # via -r requirements/ci.in diff --git a/tests/test_generic.py b/tests/test_generic.py index fe91b7184..bdb244abc 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -426,7 +426,7 @@ class Tst: # to replace pdf # TODO: What should happen with the stream? assert do == {"/S": "/GoTo"} if length in (6, 10): - assert b"BT /F1" in do._StreamObject__data + assert b"BT /F1" in do._data raise PdfReadError("__ALLGOOD__") assert should_fail ^ (exc.value.args[0] == "__ALLGOOD__") diff --git a/tests/test_reader.py b/tests/test_reader.py index 87c5a688f..82761d51c 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1458,4 +1458,4 @@ def test_issue_140(): name = "issue-140.pdf" b = get_data_from_url(url, name=name) reader = PdfReader(BytesIO(b)) - assert (len(reader.pages) == 54) + assert len(reader.pages) == 54 diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py index 83e05bc64..de39c1ace 100644 --- a/tests/test_text_extraction.py +++ b/tests/test_text_extraction.py @@ -57,20 +57,19 @@ def test_multi_language(visitor_text): assert "حَبيبي" in reader.pages[3].extract_text(visitor_text=visitor_text) -@pytest.mark.parametrize(("file_name", "constraints"), - [ - ("inkscape-abc.pdf", - {"A": lambda x, y: - 0 < x < 94 and - 189 < y < 283, # In upper left - "B": lambda x, y: - 94 < x < 189 and - 94 < y < 189, # In the center - "C": lambda x, y: - 189 < x < 283 and - 0 < y < 94} # In lower right - ) - ]) +@pytest.mark.parametrize( + ("file_name", "constraints"), + [ + ( + "inkscape-abc.pdf", + { + "A": lambda x, y: 0 < x < 94 and 189 < y < 283, # In upper left + "B": lambda x, y: 94 < x < 189 and 94 < y < 189, # In the center + "C": lambda x, y: 189 < x < 283 and 0 < y < 94, + }, # In lower right + ) + ], +) def test_visitor_text_matrices(file_name, constraints): """ Checks if the matrices given to the visitor_text function when calling