From dfb3c03cfd4bff4c53e27af4501444f07983b6fb Mon Sep 17 00:00:00 2001 From: exiledkingcc Date: Sat, 7 Oct 2023 15:47:17 +0800 Subject: [PATCH] DEV: Unify mypy options and warn redundant workarounds (#2223) --- .github/workflows/github-ci.yaml | 18 ++++----------- .pre-commit-config.yaml | 2 +- pypdf/_crypt_providers/_cryptography.py | 10 ++++---- pypdf/_crypt_providers/_fallback.py | 4 ++-- pypdf/_crypt_providers/_pycryptodome.py | 6 ++--- pypdf/_encryption.py | 2 +- pypdf/_merger.py | 2 +- pypdf/_page.py | 5 ++-- pypdf/_protocols.py | 4 ++-- pypdf/_reader.py | 14 ++++++------ pypdf/_utils.py | 8 +++---- pypdf/_writer.py | 29 ++++++++++++------------ pypdf/_xobj_image_helpers.py | 6 ++--- pypdf/annotations/_markup_annotations.py | 4 ++-- pypdf/filters.py | 8 ++++--- pypdf/generic/_data_structures.py | 2 +- pypdf/generic/_rectangle.py | 2 +- pypdf/types.py | 6 ++--- pyproject.toml | 10 ++++++++ tests/test_cmap.py | 4 ++-- 20 files changed, 74 insertions(+), 72 deletions(-) diff --git a/.github/workflows/github-ci.yaml b/.github/workflows/github-ci.yaml index 3f9420a73..12cd35c86 100644 --- a/.github/workflows/github-ci.yaml +++ b/.github/workflows/github-ci.yaml @@ -22,14 +22,12 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] use-crypto-lib: ["cryptography"] include: - python-version: "3.6" use-crypto-lib: "pycryptodome" - - python-version: "3.9" - use-crypto-lib: "pycryptodome" - - python-version: "3.10" + - python-version: "3.7" use-crypto-lib: "" steps: - name: Update APT packages @@ -94,9 +92,6 @@ jobs: - name: Test with pytest run: | python -m coverage run --parallel-mode -m pytest tests -vv - - name: Test with mypy - run : | - mypy pypdf --show-error-codes --disallow-untyped-defs --disallow-incomplete-defs - name: Upload coverage data uses: actions/upload-artifact@v3 with: @@ -112,12 +107,6 @@ jobs: uses: actions/checkout@v4 with: submodules: 'recursive' - - name: Cache Downloaded Files - id: cache-downloaded-files - uses: actions/cache@v3 - with: - path: '**/tests/pdf_cache/*' - key: cache-downloaded-files - name: Setup Python 3.11 uses: actions/setup-python@v4 with: @@ -137,6 +126,9 @@ jobs: run: | echo `ruff --version` ruff . + - name: Test with mypy + run : | + mypy pypdf package: name: Build & verify package diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5f5c8c125..312bc8dfc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -48,5 +48,5 @@ repos: rev: 'v1.5.1' hooks: - id: mypy + additional_dependencies: [types-Pillow==10.0.0.2] files: ^pypdf/.* - args: [--ignore-missing-imports] diff --git a/pypdf/_crypt_providers/_cryptography.py b/pypdf/_crypt_providers/_cryptography.py index cd991535e..b8515cd44 100644 --- a/pypdf/_crypt_providers/_cryptography.py +++ b/pypdf/_crypt_providers/_cryptography.py @@ -27,11 +27,11 @@ import secrets -from cryptography import __version__ # type: ignore[import] -from cryptography.hazmat.primitives import padding # type: ignore[import] -from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4 # type: ignore[import] -from cryptography.hazmat.primitives.ciphers.base import Cipher # type: ignore[import] -from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB # type: ignore[import] +from cryptography import __version__ +from cryptography.hazmat.primitives import padding +from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4 +from cryptography.hazmat.primitives.ciphers.base import Cipher +from cryptography.hazmat.primitives.ciphers.modes import CBC, ECB from pypdf._crypt_providers._base import CryptBase diff --git a/pypdf/_crypt_providers/_fallback.py b/pypdf/_crypt_providers/_fallback.py index 3b52fbd32..631fec195 100644 --- a/pypdf/_crypt_providers/_fallback.py +++ b/pypdf/_crypt_providers/_fallback.py @@ -34,7 +34,7 @@ crypt_provider = ("local_crypt_fallback", "0.0.0") -class CryptRC4(CryptBase): # type: ignore +class CryptRC4(CryptBase): def __init__(self, key: bytes) -> None: self.s = bytearray(range(256)) j = 0 @@ -58,7 +58,7 @@ def decrypt(self, data: bytes) -> bytes: return self.encrypt(data) -class CryptAES(CryptBase): # type: ignore +class CryptAES(CryptBase): def __init__(self, key: bytes) -> None: pass diff --git a/pypdf/_crypt_providers/_pycryptodome.py b/pypdf/_crypt_providers/_pycryptodome.py index cd0b70b58..30a13e18a 100644 --- a/pypdf/_crypt_providers/_pycryptodome.py +++ b/pypdf/_crypt_providers/_pycryptodome.py @@ -27,9 +27,9 @@ import secrets -from Crypto import __version__ # type: ignore[import] -from Crypto.Cipher import AES, ARC4 # type: ignore[import] -from Crypto.Util.Padding import pad # type: ignore[import] +from Crypto import __version__ +from Crypto.Cipher import AES, ARC4 +from Crypto.Util.Padding import pad from pypdf._crypt_providers._base import CryptBase diff --git a/pypdf/_encryption.py b/pypdf/_encryption.py index 26065b8e7..66adcc7c0 100644 --- a/pypdf/_encryption.py +++ b/pypdf/_encryption.py @@ -86,7 +86,7 @@ def encrypt_object(self, obj: PdfObject) -> PdfObject: obj2[key] = self.encrypt_object(value) obj = obj2 elif isinstance(obj, ArrayObject): - obj = ArrayObject(self.encrypt_object(x) for x in obj) # type: ignore + obj = ArrayObject(self.encrypt_object(x) for x in obj) return obj def decrypt_object(self, obj: PdfObject) -> PdfObject: diff --git a/pypdf/_merger.py b/pypdf/_merger.py index 924f18495..ab0d3b25f 100644 --- a/pypdf/_merger.py +++ b/pypdf/_merger.py @@ -642,7 +642,7 @@ def find_outline_item( if isinstance(oi_enum, list): # oi_enum is still an inner node # (OutlineType, if recursive types were supported by mypy) - res = self.find_outline_item(outline_item, oi_enum) # type: ignore + res = self.find_outline_item(outline_item, oi_enum) if res: # deprecated return [i] + res elif ( diff --git a/pypdf/_page.py b/pypdf/_page.py index 520d71653..2e48927a9 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -1080,8 +1080,7 @@ def _merge_page( if PG.ANNOTS in page: annots = page[PG.ANNOTS] if isinstance(annots, ArrayObject): - for ref in annots: - new_annots.append(ref) + new_annots.extend(annots) for res in ( RES.EXT_G_STATE, @@ -2323,7 +2322,7 @@ def _get_fonts(self) -> Tuple[Set[str], Set[str]]: assert isinstance(obj, DictionaryObject) fonts: Set[str] = set() embedded: Set[str] = set() - fonts, embedded = _get_fonts_walk(cast(DictionaryObject, obj), fonts, embedded) + fonts, embedded = _get_fonts_walk(obj, fonts, embedded) unembedded = fonts - embedded return embedded, unembedded diff --git a/pypdf/_protocols.py b/pypdf/_protocols.py index c6f2bbebd..de0fa78ce 100644 --- a/pypdf/_protocols.py +++ b/pypdf/_protocols.py @@ -5,9 +5,9 @@ try: # Python 3.8+: https://peps.python.org/pep-0586 - from typing import Protocol # type: ignore[attr-defined] + from typing import Protocol except ImportError: - from typing_extensions import Protocol # type: ignore[assignment,misc] + from typing_extensions import Protocol # type: ignore[assignment] from ._utils import StrByteType, StreamType diff --git a/pypdf/_reader.py b/pypdf/_reader.py index 75738fd17..7871ce143 100644 --- a/pypdf/_reader.py +++ b/pypdf/_reader.py @@ -320,7 +320,7 @@ def __init__( self._page_id2num: Optional[ Dict[Any, Any] ] = None # map page indirect_reference number to Page Number - if hasattr(stream, "mode") and "b" not in stream.mode: # type: ignore + if hasattr(stream, "mode") and "b" not in stream.mode: logger_warning( "PdfReader stream/file object is not in binary mode. " "It may not be read correctly.", @@ -1035,7 +1035,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]: except KeyError: if self.strict: raise PdfReadError(f"Outline Entry Missing /Title attribute: {node!r}") - title = "" # type: ignore + title = "" if "/A" in node: # Action, PDFv1.7 Section 12.6 (only type GoTo supported) @@ -1074,7 +1074,7 @@ def _build_outline_item(self, node: DictionaryObject) -> Optional[Destination]: f"Removed unexpected destination {dest!r} from destination", __name__, ) - outline_item = self._build_destination(title, None) # type: ignore + outline_item = self._build_destination(title, None) # if outline item created, add color, format, and child count if present if outline_item: @@ -1229,7 +1229,7 @@ def _flatten( self.flattened_pages = [] if PA.TYPE in pages: - t = pages[PA.TYPE] # type: ignore + t = pages[PA.TYPE] # if pdf has no type, considered as a page if /Kids is missing elif PA.KIDS not in pages: t = "/Page" @@ -1358,7 +1358,7 @@ def get_object( idnum, generation = self.read_object_header(self.stream) except Exception: if hasattr(self.stream, "getbuffer"): - buf = bytes(self.stream.getbuffer()) # type: ignore + buf = bytes(self.stream.getbuffer()) else: p = self.stream.tell() self.stream.seek(0, 0) @@ -1412,7 +1412,7 @@ def get_object( ) else: if hasattr(self.stream, "getbuffer"): - buf = bytes(self.stream.getbuffer()) # type: ignore + buf = bytes(self.stream.getbuffer()) else: p = self.stream.tell() self.stream.seek(0, 0) @@ -1705,7 +1705,7 @@ def _read_standard_xref_table(self, stream: StreamType) -> None: except Exception: # if something wrong occurred if hasattr(stream, "getbuffer"): - buf = bytes(stream.getbuffer()) # type: ignore + buf = bytes(stream.getbuffer()) else: p = stream.tell() stream.seek(0, 0) diff --git a/pypdf/_utils.py b/pypdf/_utils.py index 0bc12c0bf..b9493439c 100644 --- a/pypdf/_utils.py +++ b/pypdf/_utils.py @@ -53,7 +53,7 @@ try: # Python 3.10+: https://www.python.org/dev/peps/pep-0484/ - from typing import TypeAlias # type: ignore[attr-defined] + from typing import TypeAlias except ImportError: from typing_extensions import TypeAlias @@ -461,9 +461,9 @@ def deprecation_bookmark(**aliases: str) -> Callable: outline = a collection of outline items. """ - def decoration(func: Callable) -> Any: # type: ignore + def decoration(func: Callable) -> Any: @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Any: # type: ignore + def wrapper(*args: Any, **kwargs: Any) -> Any: rename_kwargs(func.__name__, kwargs, aliases, fail=True) return func(*args, **kwargs) @@ -472,7 +472,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: # type: ignore return decoration -def rename_kwargs( # type: ignore +def rename_kwargs( func_name: str, kwargs: Dict[str, Any], aliases: Dict[str, str], fail: bool = False ) -> None: """ diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 11a0b77e6..0545e8200 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -278,7 +278,7 @@ def get_object( return self._objects[indirect_reference - 1] if indirect_reference.pdf != self: raise ValueError("pdf must be self") - return self._objects[indirect_reference.idnum - 1] # type: ignore + return self._objects[indirect_reference.idnum - 1] def getObject(self, ido: Union[int, IndirectObject]) -> PdfObject: # deprecated """ @@ -301,7 +301,7 @@ def _replace_object( return self._objects[indirect_reference - 1] if indirect_reference.pdf != self: raise ValueError("pdf must be self") - return self._objects[indirect_reference.idnum - 1] # type: ignore + return self._objects[indirect_reference.idnum - 1] def _add_page( self, @@ -327,8 +327,8 @@ def _add_page( if page_org.pdf is not None: other = page_org.pdf.pdf_header if isinstance(other, str): - other = other.encode() # type: ignore - self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) # type: ignore + other = other.encode() + self.pdf_header = _get_max_pdf_version_header(self.pdf_header, other) page[NameObject(PA.PARENT)] = self._pages pages = cast(DictionaryObject, self.get_object(self._pages)) assert page.indirect_reference is not None @@ -372,7 +372,7 @@ def set_need_appearances_writer(self, state: bool = True) -> None: @property def viewer_preferences(self) -> Optional[ViewerPreferences]: """Returns the existing ViewerPreferences as an overloaded dictionary.""" - o = cast(DictionaryObject, self._root_object).get(CD.VIEWER_PREFERENCES, None) + o = self._root_object.get(CD.VIEWER_PREFERENCES, None) if o is None: return None o = o.get_object() @@ -624,7 +624,7 @@ def open_destination( return create_string_object(str(oa)) elif isinstance(oa, ArrayObject): try: - page, typ = oa[0:2] # type: ignore + page, typ = oa[0:2] array = oa[2:] fit = Fit(typ, tuple(array)) return Destination("OpenAction", page, fit) @@ -1153,7 +1153,7 @@ def _flatten( for attr in inheritable_page_attributes: if attr in pages: inherit[attr] = pages[attr] - for page in cast(ArrayObject, cast(DictionaryObject, pages)[PA.KIDS]): + for page in cast(ArrayObject, pages[PA.KIDS]): addt = {} if isinstance(page, IndirectObject): addt["indirect_reference"] = page @@ -1192,7 +1192,7 @@ def clone_document_from_reader( if TK.INFO in reader.trailer: self._info = reader.trailer[TK.INFO].clone(self).indirect_reference # type: ignore try: - self._ID = cast(ArrayObject, reader.trailer[TK.ID].clone(self)) # type: ignore + self._ID = cast(ArrayObject, reader.trailer[TK.ID].clone(self)) except KeyError: pass if callable(after_page_append): @@ -1333,7 +1333,7 @@ def encrypt( def write_stream(self, stream: StreamType) -> None: if hasattr(stream, "mode") and "b" not in stream.mode: logger_warning( - f"File <{stream.name}> to write to is not in binary mode. " # type: ignore + f"File <{stream.name}> to write to is not in binary mode. " "It may not be written to correctly.", __name__, ) @@ -2205,9 +2205,9 @@ def clean(content: ContentStream, images: List[str], forms: List[str]) -> None: del content.operations[i] elif operator == b"Do": if ( - cast(ObjectDeletionFlag, to_delete) & ObjectDeletionFlag.IMAGES + to_delete & ObjectDeletionFlag.IMAGES and operands[0] in images - or cast(ObjectDeletionFlag, to_delete) & ObjectDeletionFlag.TEXT + or to_delete & ObjectDeletionFlag.TEXT and operands[0] in forms ): del content.operations[i] @@ -2234,7 +2234,7 @@ def clean_forms( try: content: Any = None if ( - cast(ObjectDeletionFlag, to_delete) & ObjectDeletionFlag.IMAGES + to_delete & ObjectDeletionFlag.IMAGES and o["/Subtype"] == "/Image" ): content = NullObject() @@ -2276,9 +2276,9 @@ def clean_forms( content = ContentStream(content, page) images, forms = clean_forms(page, []) - clean(cast(ContentStream, content), images, forms) + clean(content, images, forms) if isinstance(page["/Contents"], ArrayObject): - for o in cast(ArrayObject, page["/Contents"]): + for o in page["/Contents"]: self._objects[o.idnum - 1] = NullObject() try: self._objects[ @@ -3244,7 +3244,6 @@ def _insert_filtered_annotations( p = self._get_cloned_page(d[0], pages, reader) if p is not None: anc = ano.clone(self, ignore_fields=("/D",)) - anc = cast("DictionaryObject", anc) cast("DictionaryObject", anc["/A"])[ NameObject("/D") ] = ArrayObject([p] + d[1:]) diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index 22e3438ae..bf3c869f7 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -15,11 +15,11 @@ ) try: - from typing import Literal, TypeAlias # type: ignore[attr-defined] + from typing import Literal, TypeAlias except ImportError: # PEP 586 introduced typing.Literal with Python 3.8 # For older Python versions, the backport typing_extensions is necessary: - from typing_extensions import Literal, TypeAlias # type: ignore[misc, assignment] + from typing_extensions import Literal, TypeAlias # type: ignore[assignment] try: @@ -94,7 +94,7 @@ def _get_imagemode( mode_map.get(color_space) # type: ignore or list(mode_map.values())[color_components] or prev_mode - ) # type: ignore + ) return mode, mode == "CMYK" diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index e4233412d..09c9c256e 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -15,11 +15,11 @@ from ._base import NO_FLAGS, AnnotationDictionary try: - from typing import TypeAlias # type: ignore[attr-defined] + from typing import TypeAlias except ImportError: # PEP 613 introduced typing.TypeAlias with Python 3.10 # For older Python versions, the backport typing_extensions is necessary: - from typing_extensions import TypeAlias # type: ignore[misc] + from typing_extensions import TypeAlias Vertex: TypeAlias = Tuple[float, float] diff --git a/pypdf/filters.py b/pypdf/filters.py index 59599e9f1..b6d198f55 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -114,7 +114,7 @@ def decode( if "decodeParms" in kwargs: # deprecated deprecate_with_replacement("decodeParms", "parameters", "4.0.0") decode_parms = kwargs["decodeParms"] - if isinstance(decode_parms, ArrayObject): # type: ignore + if isinstance(decode_parms, ArrayObject): raise DeprecationError("decode_parms as ArrayObject is depreciated") str_data = decompress(data) @@ -162,7 +162,9 @@ def decode( str_data = bytes(str_data) # PNG prediction: elif 10 <= predictor <= 15: - str_data = FlateDecode._decode_png_prediction(str_data, columns, rowlength) # type: ignore + str_data = FlateDecode._decode_png_prediction( + str_data, columns, rowlength + ) else: # unsupported predictor raise PdfReadError(f"Unsupported flatedecode predictor {predictor!r}") @@ -677,7 +679,7 @@ def decode_stream_data(stream: Any) -> Union[bytes, str]: # utils.StreamObject if filter_type in (FT.FLATE_DECODE, FTA.FL): data = FlateDecode.decode(data, params) elif filter_type in (FT.ASCII_HEX_DECODE, FTA.AHx): - data = ASCIIHexDecode.decode(data) # type: ignore + data = ASCIIHexDecode.decode(data) elif filter_type in (FT.RUN_LENGTH_DECODE, FTA.RL): data = RunLengthDecode.decode(data) elif filter_type in (FT.LZW_DECODE, FTA.LZW): diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 88a17d85a..86970bb81 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -115,7 +115,7 @@ def clone( arr.append(data.clone(pdf_dest, force_duplicate, ignore_fields)) else: arr.append(data) - return cast("ArrayObject", arr) + return arr def items(self) -> Iterable[Any]: """Emulate DictionaryObject.items for a list (index, object).""" diff --git a/pypdf/generic/_rectangle.py b/pypdf/generic/_rectangle.py index 676450d2a..2d93d7717 100644 --- a/pypdf/generic/_rectangle.py +++ b/pypdf/generic/_rectangle.py @@ -188,7 +188,7 @@ def getUpperRight( def setLowerLeft(self, value: Tuple[float, float]) -> None: # deprecated deprecation_with_replacement("setLowerLeft", "lower_left", "3.0.0") - self.lower_left = value # type: ignore + self.lower_left = value def setLowerRight(self, value: Tuple[float, float]) -> None: # deprecated deprecation_with_replacement("setLowerRight", "lower_right", "3.0.0") diff --git a/pypdf/types.py b/pypdf/types.py index 4f91614c6..04d52c36c 100644 --- a/pypdf/types.py +++ b/pypdf/types.py @@ -4,13 +4,13 @@ try: # Python 3.8+: https://peps.python.org/pep-0586 - from typing import Literal # type: ignore[attr-defined] + from typing import Literal except ImportError: - from typing_extensions import Literal # type: ignore[misc, assignment] + from typing_extensions import Literal # type: ignore[assignment] try: # Python 3.10+: https://www.python.org/dev/peps/pep-0484/ - from typing import TypeAlias # type: ignore[attr-defined] + from typing import TypeAlias except ImportError: from typing_extensions import TypeAlias diff --git a/pyproject.toml b/pyproject.toml index 7af17e9d1..86095ac69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -220,3 +220,13 @@ max-statements = 176 # Recommended: 50 pre-summary-newline = true wrap-summaries = 0 wrap-descriptions = 0 + +[tool.mypy] +show_error_codes = true +ignore_missing_imports = true +check_untyped_defs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_unused_configs = true diff --git a/tests/test_cmap.py b/tests/test_cmap.py index 9d231cc49..09cac1bb5 100644 --- a/tests/test_cmap.py +++ b/tests/test_cmap.py @@ -53,8 +53,8 @@ def test_text_extraction_slow(caplog, url: str, name: str, strict: bool): [ # bfchar_on_2_chars: issue #1293 ( - "https://github.com/xyegithub/myBlog/raw/main/posts/c94b2364/paper_pdfs/ImageClassification/" - "2007%2CASurveyofImageClassificationBasedTechniques.pdf", + "https://raw.githubusercontent.com/xyegithub/myBlog/12127c712ac2008782616c743224b187a4069477/posts/" + "c94b2364/paper_pdfs/ImageClassification/2007%2CASurveyofImageClassificationBasedTechniques.pdf", "ASurveyofImageClassificationBasedTechniques.pdf", False, ),