Skip to content

Commit

Permalink
MAINT: Reduce usage of b_ (#2100)
Browse files Browse the repository at this point in the history
When we know the parameter is utf8-encodable and longer than two characters, pypdf doesn't need to use b_. It can use `.encode()` directly.
  • Loading branch information
MartinThoma authored Aug 20, 2023
1 parent b39ac96 commit 3033122
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 27 deletions.
5 changes: 2 additions & 3 deletions pypdf/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
from ._utils import (
StrByteType,
StreamType,
b_,
deprecate_no_replacement,
deprecation_no_replacement,
deprecation_with_replacement,
Expand Down Expand Up @@ -1257,7 +1256,7 @@ def _get_object_from_stream(
assert cast(str, obj_stm["/Type"]) == "/ObjStm"
# /N is the number of indirect objects in the stream
assert idx < obj_stm["/N"]
stream_data = BytesIO(b_(obj_stm.get_data())) # type: ignore
stream_data = BytesIO(obj_stm.get_data())
for i in range(obj_stm["/N"]): # type: ignore
read_non_whitespace(stream_data)
stream_data.seek(-1, 1)
Expand Down Expand Up @@ -1868,7 +1867,7 @@ def _read_pdf15_xref_stream(
xrefstream = cast(ContentStream, read_object(stream, self))
assert cast(str, xrefstream["/Type"]) == "/XRef"
self.cache_indirect_object(generation, idnum, xrefstream)
stream_data = BytesIO(b_(xrefstream.get_data()))
stream_data = BytesIO(xrefstream.get_data())
# Index pairs specify the subsections in the dictionary. If
# none create one subsection that spans everything.
idx_pairs = xrefstream.get("/Index", [0, xrefstream.get("/Size")])
Expand Down
12 changes: 6 additions & 6 deletions pypdf/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ def add_attachment(self, filename: str, data: Union[str, bytes]) -> None:
# endobj

file_entry = DecodedStreamObject()
file_entry.set_data(data)
file_entry.set_data(b_(data))
file_entry.update({NameObject(PA.TYPE): NameObject("/EmbeddedFile")})

# The Filespec entry
Expand Down Expand Up @@ -1363,7 +1363,7 @@ def _write_pdf_structure(self, stream: StreamType) -> List[int]:
if obj is not None:
idnum = i + 1
object_positions.append(stream.tell())
stream.write(b_(str(idnum)) + b" 0 obj\n")
stream.write(f"{idnum} 0 obj\n".encode())
if self._encryption and obj != self._encrypt_entry:
obj = self._encryption.encrypt_object(obj, idnum, 0)
obj.write_to_stream(stream)
Expand All @@ -1373,10 +1373,10 @@ def _write_pdf_structure(self, stream: StreamType) -> List[int]:
def _write_xref_table(self, stream: StreamType, object_positions: List[int]) -> int:
xref_location = stream.tell()
stream.write(b"xref\n")
stream.write(b_(f"0 {len(self._objects) + 1}\n"))
stream.write(b_(f"{0:0>10} {65535:0>5} f \n"))
stream.write(f"0 {len(self._objects) + 1}\n".encode())
stream.write(f"{0:0>10} {65535:0>5} f \n".encode())
for offset in object_positions:
stream.write(b_(f"{offset:0>10} {0:0>5} n \n"))
stream.write(f"{offset:0>10} {0:0>5} n \n".encode())
return xref_location

def _write_trailer(self, stream: StreamType, xref_location: int) -> None:
Expand All @@ -1401,7 +1401,7 @@ def _write_trailer(self, stream: StreamType, xref_location: int) -> None:
if self._encrypt_entry:
trailer[NameObject(TK.ENCRYPT)] = self._encrypt_entry.indirect_reference
trailer.write_to_stream(stream)
stream.write(b_(f"\nstartxref\n{xref_location}\n%%EOF\n")) # eof
stream.write(f"\nstartxref\n{xref_location}\n%%EOF\n".encode()) # eof

def add_metadata(self, infos: Dict[str, Any]) -> None:
"""
Expand Down
2 changes: 0 additions & 2 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
from typing import Any, Dict, List, Optional, Tuple, Union, cast

from ._utils import (
b_,
deprecate_with_replacement,
logger_warning,
ord_,
Expand Down Expand Up @@ -1035,7 +1034,6 @@ def _handle_jpx(
else:
extension = ".png" # mime_type = "image/png"
image_format = "PNG"
data = b_(data)
img = Image.open(BytesIO(data), formats=("TIFF", "PNG"))
elif lfilters == FT.DCT_DECODE:
img, image_format, extension = Image.open(BytesIO(data)), "JPEG", ".jpg"
Expand Down
8 changes: 4 additions & 4 deletions pypdf/generic/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def write_to_stream(
deprecate_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(b_(f"{self.idnum} {self.generation} R"))
stream.write(f"{self.idnum} {self.generation} R".encode())

def writeToStream(
self, stream: StreamType, encryption_key: Union[None, str, bytes]
Expand Down Expand Up @@ -602,10 +602,10 @@ def write_to_stream(
for c in bytearr:
if not chr(c).isalnum() and c != b" ":
# This:
# stream.write(b_(rf"\{c:0>3o}"))
# stream.write(rf"\{c:0>3o}".encode())
# gives
# https://github.com/davidhalter/parso/issues/207
stream.write(b_("\\%03o" % c))
stream.write(("\\%03o" % c).encode())
else:
stream.write(b_(chr(c)))
stream.write(b")")
Expand Down Expand Up @@ -648,7 +648,7 @@ def write_to_stream(
deprecate_no_replacement(
"the encryption_key parameter of write_to_stream", "5.0.0"
)
stream.write(self.renumber()) # b_(renumber(self)))
stream.write(self.renumber())

def writeToStream(
self, stream: StreamType, encryption_key: Union[None, str, bytes]
Expand Down
26 changes: 14 additions & 12 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ def _clone(

def hash_value_data(self) -> bytes:
data = super().hash_value_data()
data += b_(self._data)
data += self._data
return data

@property
Expand Down Expand Up @@ -906,10 +906,10 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject":


class DecodedStreamObject(StreamObject):
def get_data(self) -> Any:
def get_data(self) -> bytes:
return self._data

def set_data(self, data: Any) -> Any:
def set_data(self, data: bytes) -> None:
self._data = data

def getData(self) -> Any: # deprecated
Expand All @@ -935,7 +935,7 @@ def decodedSelf(self, value: DecodedStreamObject) -> None: # deprecated
deprecation_with_replacement("decodedSelf", "decoded_self", "3.0.0")
self.decoded_self = value

def get_data(self) -> Union[None, str, bytes]:
def get_data(self) -> bytes:
from ..filters import decode_stream_data

if self.decoded_self is not None:
Expand All @@ -956,7 +956,7 @@ def getData(self) -> Union[None, str, bytes]: # deprecated
deprecation_with_replacement("getData", "get_data", "3.0.0")
return self.get_data()

def set_data(self, data: Any) -> None: # deprecated
def set_data(self, data: bytes) -> None: # deprecated
from ..filters import FlateDecode

if self.get(SA.FILTER, "") == FT.FLATE_DECODE:
Expand Down Expand Up @@ -996,14 +996,14 @@ def __init__(
if isinstance(stream, ArrayObject):
data = b""
for s in stream:
data += b_(s.get_object().get_data())
data += s.get_object().get_data()
if len(data) == 0 or data[-1] != b"\n":
data += b"\n"
stream_bytes = BytesIO(data)
else:
stream_data = stream.get_data()
assert stream_data is not None
stream_data_bytes = b_(stream_data)
stream_data_bytes = b_(stream_data) # this is necessary
stream_bytes = BytesIO(stream_data_bytes)
self.forced_encoding = forced_encoding
self.__parse_content_stream(stream_bytes)
Expand Down Expand Up @@ -1194,8 +1194,8 @@ def _data(self) -> bytes: # type: ignore
return new_data.getvalue()

@_data.setter
def _data(self, value: Union[str, bytes]) -> None:
self.__parse_content_stream(BytesIO(b_(value)))
def _data(self, value: bytes) -> None:
self.__parse_content_stream(BytesIO(value))


def read_object(
Expand Down Expand Up @@ -1276,10 +1276,12 @@ def __init__(self, data: DictionaryObject) -> None:
if isinstance(self.get("/V"), EncodedStreamObject):
d = cast(EncodedStreamObject, self[NameObject("/V")]).get_data()
if isinstance(d, bytes):
d = d.decode()
d_str = d.decode()
elif d is None:
d = ""
self[NameObject("/V")] = TextStringObject(d)
d_str = ""
else:
raise Exception("Should never happen")
self[NameObject("/V")] = TextStringObject(d_str)

# TABLE 8.69 Entries common to all field dictionaries
@property
Expand Down

0 comments on commit 3033122

Please sign in to comment.