Skip to content

Commit

Permalink
bang on literal detection bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
rocky committed Apr 24, 2022
1 parent 9a464b5 commit 067ef52
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 87 deletions.
95 changes: 12 additions & 83 deletions decompyle3/scanners/scanner37.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2016-2019, 2021 by Rocky Bernstein
# Copyright (c) 2016-2019, 2021-2022 by Rocky Bernstein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
Expand All @@ -13,7 +13,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Python 3.7 bytecode decompiler scanner
Python 3.7 bytecode decompiler scanner.
Does some additional massaging of xdis-disassembled instructions to
make things easier for decompilation.
Expand All @@ -22,100 +22,28 @@
scanner routine for Python 3.
"""

from decompyle3.scanners.scanner37base import Scanner37Base
from typing import Tuple

from decompyle3.scanner import Token
from decompyle3.scanners.scanner37base import Scanner37Base

# bytecode verification, verify(), uses JUMP_OPs from here
from xdis.opcodes import opcode_37 as opc

# bytecode verification, verify(), uses JUMP_OPS from here
JUMP_OPs = opc.JUMP_OPS

CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")


class Scanner37(Scanner37Base):
def __init__(self, show_asm=None, debug=False, is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm, is_pypy)
def __init__(self, show_asm=None, debug="", is_pypy=False):
Scanner37Base.__init__(self, (3, 7), show_asm, debug, is_pypy)
self.debug = debug
return

pass

def bound_collection(self, tokens: list, t: Token, i: int, collection_type: str):
count = t.attr
assert isinstance(count, int)

assert count <= i

# For small lists don't bother
if count < 5:
return tokens[: i + 1]

if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1

collection_start = i - count

for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return tokens[: i + 1]

collection_enum = CONST_COLLECTIONS.index(collection_type)

# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = tokens[:collection_start]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens

def ingest(self, co, classname=None, code_objects={}, show_asm=None) -> tuple:
def ingest(
self, co, classname=None, code_objects={}, show_asm=None
) -> Tuple[list, dict]:
tokens, customize = Scanner37Base.ingest(
self, co, classname, code_objects, show_asm
)
Expand All @@ -133,12 +61,13 @@ def ingest(self, co, classname=None, code_objects={}, show_asm=None) -> tuple:
else t.kind.split("_")[1]
)
new_tokens = self.bound_collection(
tokens, t, i, f"CONST_{collection_type}"
tokens, new_tokens, t, i, f"CONST_{collection_type}"
)
continue

# The lowest bit of flags indicates whether the
# var-keyword argument is placed at the top of the stack
elif t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
if t.op == self.opc.CALL_FUNCTION_EX and t.attr & 1:
t.kind = "CALL_FUNCTION_EX_KW"
pass
elif t.op == self.opc.BUILD_STRING:
Expand Down
77 changes: 77 additions & 0 deletions decompyle3/scanners/scanner37base.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
globals().update(op3.opmap)


CONST_COLLECTIONS = ("CONST_LIST", "CONST_SET", "CONST_DICT")


class Scanner37Base(Scanner):
def __init__(self, version: Tuple[int], show_asm=None, debug="", is_pypy=False):
super(Scanner37Base, self).__init__(version, show_asm, is_pypy)
Expand Down Expand Up @@ -183,6 +186,80 @@ def __init__(self, version: Tuple[int], show_asm=None, debug="", is_pypy=False):
# self.varargs_ops = frozenset(self.opc.hasvargs)
return

def bound_collection(
self, tokens: list, next_tokens: list, t: Token, i: int, collection_type: str
):
count = t.attr
assert isinstance(count, int)

assert count <= i

if collection_type == "CONST_DICT":
# constant dictonaries work via BUILD_CONST_KEY_MAP and
# handle the values() like sets and lists.
# However the keys() are an LOAD_CONST of the keys.
# adjust offset to account for this
count += 1

# For small lists don't bother
if count < 5:
return next_tokens + [t]

collection_start = i - count

for j in range(collection_start, i):
if tokens[j].kind not in (
"LOAD_CONST",
"LOAD_FAST",
"LOAD_GLOBAL",
"LOAD_NAME",
):
return next_tokens + [t]

collection_enum = CONST_COLLECTIONS.index(collection_type)

# If we go there all instructions before tokens[i] are LOAD_CONST and we can replace
# add a boundary marker and change LOAD_CONST to something else
new_tokens = next_tokens[:-count]
start_offset = tokens[collection_start].offset
new_tokens.append(
Token(
opname="COLLECTION_START",
attr=collection_enum,
pattr=collection_type,
offset=f"{start_offset}_0",
has_arg=True,
opc=self.opc,
has_extended_arg=False,
)
)
for j in range(collection_start, i):
new_tokens.append(
Token(
opname="ADD_VALUE",
attr=tokens[j].attr,
pattr=tokens[j].pattr,
offset=tokens[j].offset,
has_arg=True,
linestart=tokens[j].linestart,
opc=self.opc,
has_extended_arg=False,
)
)
new_tokens.append(
Token(
opname=f"BUILD_{collection_type}",
attr=t.attr,
pattr=t.pattr,
offset=t.offset,
has_arg=t.has_arg,
linestart=t.linestart,
opc=t.opc,
has_extended_arg=False,
)
)
return new_tokens

def ingest(self, co, classname=None, code_objects={}, show_asm=None):
"""
Pick out tokens from an decompyle3 code object, and transform them,
Expand Down
9 changes: 7 additions & 2 deletions decompyle3/scanners/scanner38.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def ingest(
print(jump_back_targets)
loop_ends: List[int] = []
next_end = tokens[len(tokens) - 1].off2int() + 10

new_tokens = []
for i, token in enumerate(tokens):
opname = token.kind
offset = token.offset
Expand All @@ -81,6 +83,8 @@ def ingest(
else tokens[len(tokens) - 1].off2int() + 10
)

# things that smash new_tokens like BUILD_LIST have to come first.

if offset in jump_back_targets:
next_end = off2int(jump_back_targets[offset], prefer_last=False)
if self.debug:
Expand All @@ -97,6 +101,7 @@ def ingest(
if opname == "JUMP_ABSOLUTE" and jump_target <= next_end:
# Not a forward-enough jump to break out of the next loop, so continue.
# FIXME: Do we need "continue" detection?
new_tokens.append(token)
continue

# We also want to avoid confusing BREAK_LOOPS with parts of the
Expand Down Expand Up @@ -136,8 +141,8 @@ def ingest(
):
token.kind = "BREAK_LOOP"
pass
pass
return tokens, customize
new_tokens.append(token)
return new_tokens, customize


if __name__ == "__main__":
Expand Down
12 changes: 10 additions & 2 deletions decompyle3/semantics/make_function36.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,16 @@ def build_param(ast, name, default, annotation=None):
# FIXME: handle free_tup, ann_dict, and default_tup
if kw_dict:
assert kw_dict == "dict"
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
names = eval(self.traverse(kw_dict[-2]))
const_list = kw_dict[0]
if kw_dict[0] == "const_list":
add_consts = const_list[1]
assert add_consts == "add_consts"
names = add_consts[-1].attr
defaults = [v.pattr for v in add_consts[:-1]]
else:
defaults = [self.traverse(n, indent="") for n in kw_dict[:-2]]
names = eval(self.traverse(kw_dict[-2]))

assert len(defaults) == len(names)
# FIXME: possibly handle line breaks
for i, n in enumerate(names):
Expand Down

0 comments on commit 067ef52

Please sign in to comment.