Skip to content

Commit

Permalink
PI: optimize _decode_png_prediction (#2068)
Browse files Browse the repository at this point in the history
this commit makes tests/test_page.py::test_image_new_property cost from 8.98s to 5.69s on my machine, about x1.5 faster.

On another machine, a speedup of 1.47x to 1.78x was measured (OLD: 22s - 25s, NEW: 14s - 15s)
  • Loading branch information
exiledkingcc authored Aug 10, 2023
1 parent ff89ea3 commit f0781db
Showing 1 changed file with 36 additions and 17 deletions.
53 changes: 36 additions & 17 deletions pypdf/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
deprecate_with_replacement,
logger_warning,
ord_,
paeth_predictor,
)
from .constants import CcittFaxDecodeParameters as CCITT
from .constants import ColorSpaces
Expand Down Expand Up @@ -182,17 +181,15 @@ def decode(
return str_data

@staticmethod
def _decode_png_prediction(data: str, columns: int, rowlength: int) -> bytes:
output = BytesIO()
def _decode_png_prediction(data: bytes, columns: int, rowlength: int) -> bytes:
# PNG prediction can vary from row to row
if len(data) % rowlength != 0:
raise PdfReadError("Image data is not rectangular")
output = []
prev_rowdata = (0,) * rowlength
bpp = (rowlength - 1) // columns # recomputed locally to not change params
for row in range(len(data) // rowlength):
rowdata = [
ord_(x) for x in data[(row * rowlength) : ((row + 1) * rowlength)]
]
for row in range(0, len(data), rowlength):
rowdata: List[int] = list(data[row : row + rowlength])
filter_byte = rowdata[0]

if filter_byte == 0:
Expand All @@ -204,25 +201,47 @@ def _decode_png_prediction(data: str, columns: int, rowlength: int) -> bytes:
for i in range(1, rowlength):
rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
elif filter_byte == 3:
for i in range(1, rowlength):
left = rowdata[i - bpp] if i > bpp else 0
floor = math.floor(left + prev_rowdata[i]) / 2
rowdata[i] = (rowdata[i] + int(floor)) % 256
for i in range(1, bpp + 1):
# left = 0
floor = prev_rowdata[i] // 2
rowdata[i] = (rowdata[i] + floor) % 256
for i in range(bpp + 1, rowlength):
left = rowdata[i - bpp]
floor = (left + prev_rowdata[i]) // 2
rowdata[i] = (rowdata[i] + floor) % 256
elif filter_byte == 4:
for i in range(1, rowlength):
left = rowdata[i - bpp] if i > bpp else 0
for i in range(1, bpp + 1):
# left = 0
up = prev_rowdata[i]
# up_left = 0
paeth = up
rowdata[i] = (rowdata[i] + paeth) % 256
for i in range(bpp + 1, rowlength):
left = rowdata[i - bpp]
up = prev_rowdata[i]
up_left = prev_rowdata[i - bpp] if i > bpp else 0
paeth = paeth_predictor(left, up, up_left)
up_left = prev_rowdata[i - bpp]

p = left + up - up_left
dist_left = abs(p - left)
dist_up = abs(p - up)
dist_up_left = abs(p - up_left)

if dist_left <= dist_up and dist_left <= dist_up_left:
paeth = left
elif dist_up <= dist_up_left:
paeth = up
else:
paeth = up_left

rowdata[i] = (rowdata[i] + paeth) % 256
else:
# unsupported PNG filter
raise PdfReadError(
f"Unsupported PNG filter {filter_byte!r}"
) # pragma: no cover
prev_rowdata = tuple(rowdata)
output.write(bytearray(rowdata[1:]))
return output.getvalue()
output.extend(rowdata[1:])
return bytes(output)

@staticmethod
def encode(data: bytes, level: int = -1) -> bytes:
Expand Down

0 comments on commit f0781db

Please sign in to comment.