Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove dynamic return address concept #1716

Merged
merged 5 commits into from
Aug 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions capa/features/address.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,29 +116,6 @@ def __lt__(self, other):
return (self.thread, self.id) < (other.thread, other.id)


class DynamicReturnAddress(Address):
"""an address from a dynamic analysis trace"""

def __init__(self, call: DynamicCallAddress, return_address: int):
assert return_address >= 0
self.call = call
self.return_address = return_address

def __repr__(self):
return f"{self.call}, dynamic-call(return-address: 0x{self.return_address:x})"

def __hash__(self):
return hash((self.call, self.return_address))

def __eq__(self, other):
assert isinstance(other, DynamicReturnAddress)
return (self.call, self.return_address) == (other.call, other.return_address)

def __lt__(self, other):
assert isinstance(other, DynamicReturnAddress)
return (self.call, self.return_address) < (other.call, other.return_address)


class RelativeVirtualAddress(int, Address):
"""a memory address relative to a base address"""

Expand Down
9 changes: 4 additions & 5 deletions capa/features/extractors/cape/call.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import capa.features.extractors.cape.process
from capa.features.insn import API, Number
from capa.features.common import String, Feature
from capa.features.address import Address, DynamicReturnAddress
from capa.features.address import Address
from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -44,14 +44,13 @@ def extract_call_features(
calls: List[Dict[str, Any]] = process["calls"]
call = calls[ch.address.id]
assert call["thread_id"] == str(th.address.tid)
caller = DynamicReturnAddress(call=ch.address, return_address=int(call["caller"], 16))
# list similar to disassembly: arguments right-to-left, call
for arg in call["arguments"][::-1]:
try:
yield Number(int(arg["value"], 16)), caller
yield Number(int(arg["value"], 16)), ch.address
except ValueError:
yield String(arg["value"]), caller
yield API(call["api"]), caller
yield String(arg["value"]), ch.address
yield API(call["api"]), ch.address


def extract_features(
Expand Down
34 changes: 5 additions & 29 deletions capa/features/freeze/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,6 @@ def from_capa(cls, a: capa.features.address.Address) -> "Address":
elif isinstance(a, capa.features.address.DynamicCallAddress):
return cls(type=AddressType.CALL, value=(a.thread.process.ppid, a.thread.process.pid, a.thread.tid, a.id))

elif isinstance(a, capa.features.address.DynamicReturnAddress):
return cls(
type=AddressType.DYNAMIC,
value=(
a.call.thread.process.ppid,
a.call.thread.process.pid,
a.call.thread.tid,
a.call.id,
a.return_address,
),
)

elif a == capa.features.address.NO_ADDRESS or isinstance(a, capa.features.address._NoAddress):
return cls(type=AddressType.NO_ADDRESS, value=None)

Expand Down Expand Up @@ -159,19 +147,6 @@ def to_capa(self) -> capa.features.address.Address:
id=id_,
)

elif self.type is AddressType.DYNAMIC:
assert isinstance(self.value, tuple)
ppid, pid, tid, id_, return_address = self.value
return capa.features.address.DynamicReturnAddress(
call=capa.features.address.DynamicCallAddress(
thread=capa.features.address.ThreadAddress(
process=capa.features.address.ProcessAddress(ppid=ppid, pid=pid), tid=tid
),
id=id_,
),
return_address=return_address,
)

elif self.type is AddressType.NO_ADDRESS:
return capa.features.address.NO_ADDRESS

Expand Down Expand Up @@ -233,8 +208,10 @@ class ThreadFeature(HashableModel):
class CallFeature(HashableModel):
"""
args:
call: the call id to which this feature belongs.
address: the address at which this feature is found (it's dynamic return address).
call: the address of the call to which this feature belongs.
address: the address at which this feature is found.

call != address for consistency with Process and Thread.
"""

call: Address
Expand Down Expand Up @@ -279,8 +256,7 @@ class InstructionFeature(HashableModel):
instruction: the address of the instruction to which this feature belongs.
address: the address at which this feature is found.

instruction != address because, e.g., the feature may be found *within* the scope (basic block),
versus right at its starting address.
instruction != address because, for consistency with Function and BasicBlock.
"""

instruction: Address
Expand Down
10 changes: 6 additions & 4 deletions capa/render/verbose.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ def format_address(address: frz.Address) -> str:
return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}"
elif address.type == frz.AddressType.DYNAMIC:
assert isinstance(address.value, tuple)
id_, return_address = address.value
assert isinstance(id_, int)
assert isinstance(return_address, int)
return f"event: {id_}, retaddr: 0x{return_address:x}"
ppid, pid, tid, id_, return_address = address.value
return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}, return address: {capa.helpers.hex(return_address)}"
elif address.type == frz.AddressType.PROCESS:
assert isinstance(address.value, tuple)
ppid, pid = address.value
Expand All @@ -71,6 +69,10 @@ def format_address(address: frz.Address) -> str:
tid = address.value
assert isinstance(tid, int)
return f"thread id: {tid}"
elif address.type == frz.AddressType.CALL:
assert isinstance(address.value, tuple)
ppid, pid, tid, id_ = address.value
return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}"
elif address.type == frz.AddressType.NO_ADDRESS:
return "global"
else:
Expand Down
4 changes: 2 additions & 2 deletions scripts/show-features.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,8 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor):
continue

if isinstance(feature, API):
assert isinstance(addr, capa.features.address.DynamicReturnAddress)
apis.append((addr.call.id, str(feature.value)))
assert isinstance(addr, capa.features.address.DynamicCallAddress)
apis.append((addr.id, str(feature.value)))

if isinstance(feature, (Number, String)):
arguments.append(str(feature.value))
Expand Down
59 changes: 10 additions & 49 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
BBHandle,
CallHandle,
InsnHandle,
SampleHashes,
ThreadHandle,
ProcessHandle,
FunctionHandle,
Expand Down Expand Up @@ -364,8 +363,18 @@ def get_data_path_by_name(name) -> Path:
/ "data"
/ "dynamic"
/ "cape"
/ "v2.2"
/ "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz"
)
elif name.startswith("d46900"):
return (
CD
/ "data"
/ "dynamic"
/ "cape"
/ "v2.2"
/ "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz"
)
elif name.startswith("ea2876"):
return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_"
else:
Expand Down Expand Up @@ -653,54 +662,6 @@ def parametrize(params, values, **kwargs):
return pytest.mark.parametrize(params, values, ids=ids, **kwargs)


EXTRACTOR_HASHING_TESTS = [
# viv extractor
(
get_viv_extractor(get_data_path_by_name("mimikatz")),
SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
),
),
# PE extractor
(
get_pefile_extractor(get_data_path_by_name("mimikatz")),
SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
),
),
# dnFile extractor
(
get_dnfile_extractor(get_data_path_by_name("b9f5b")),
SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
),
),
# dotnet File
(
get_dotnetfile_extractor(get_data_path_by_name("b9f5b")),
SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
),
),
# cape extractor
(
get_cape_extractor(get_data_path_by_name("0000a657")),
SampleHashes(
md5="e2147b5333879f98d515cd9aa905d489",
sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f",
sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82",
),
),
]

DYNAMIC_FEATURE_PRESENCE_TESTS = sorted(
[
# file/string
Expand Down
48 changes: 42 additions & 6 deletions tests/test_extractor_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,48 @@
logger = logging.getLogger(__name__)


@fixtures.parametrize(
"extractor,hashes",
fixtures.EXTRACTOR_HASHING_TESTS,
)
def test_hash_extraction(extractor, hashes):
assert extractor.get_sample_hashes() == hashes
def test_viv_hash_extraction():
assert fixtures.get_viv_extractor(fixtures.get_data_path_by_name("mimikatz")).get_sample_hashes() == SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
)


def test_pefile_hash_extraction():
assert fixtures.get_pefile_extractor(
fixtures.get_data_path_by_name("mimikatz")
).get_sample_hashes() == SampleHashes(
md5="5f66b82558ca92e54e77f216ef4c066c",
sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38",
sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d",
)


def test_dnfile_hash_extraction():
assert fixtures.get_dnfile_extractor(fixtures.get_data_path_by_name("b9f5b")).get_sample_hashes() == SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
)


def test_dotnetfile_hash_extraction():
assert fixtures.get_dotnetfile_extractor(
fixtures.get_data_path_by_name("b9f5b")
).get_sample_hashes() == SampleHashes(
md5="b9f5bd514485fb06da39beff051b9fdc",
sha1="c72a2e50410475a51d897d29ffbbaf2103754d53",
sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1",
)


def test_cape_hash_extraction():
assert fixtures.get_cape_extractor(fixtures.get_data_path_by_name("0000a657")).get_sample_hashes() == SampleHashes(
md5="e2147b5333879f98d515cd9aa905d489",
sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f",
sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82",
)


# We need to skip the binja test if we cannot import binaryninja, e.g., in GitHub CI.
Expand Down
58 changes: 58 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.
import gzip
import json
import textwrap
from pathlib import Path

import pytest
import fixtures
Expand Down Expand Up @@ -582,3 +584,59 @@ def test_main_rd():
assert capa.main.main([path, "-j"]) == 0
assert capa.main.main([path, "-q"]) == 0
assert capa.main.main([path]) == 0


def extract_cape_report(tmp_path: Path, gz: Path) -> Path:
report = tmp_path / "report.json"
report.write_bytes(gzip.decompress(gz.read_bytes()))
return report


def test_main_cape1(tmp_path):
path = extract_cape_report(tmp_path, fixtures.get_data_path_by_name("0000a657"))

# TODO(williballenthin): use default rules set
# https://github.com/mandiant/capa/pull/1696
rules = tmp_path / "rules"
rules.mkdir()
(rules / "create-or-open-registry-key.yml").write_text(
textwrap.dedent(
"""
rule:
meta:
name: create or open registry key
authors:
- testing
scopes:
static: instruction
dynamic: call
features:
- or:
- api: advapi32.RegOpenKey
- api: advapi32.RegOpenKeyEx
- api: advapi32.RegCreateKey
- api: advapi32.RegCreateKeyEx
- api: advapi32.RegOpenCurrentUser
- api: advapi32.RegOpenKeyTransacted
- api: advapi32.RegOpenUserClassesRoot
- api: advapi32.RegCreateKeyTransacted
- api: ZwOpenKey
- api: ZwOpenKeyEx
- api: ZwCreateKey
- api: ZwOpenKeyTransacted
- api: ZwOpenKeyTransactedEx
- api: ZwCreateKeyTransacted
- api: NtOpenKey
- api: NtCreateKey
- api: SHRegOpenUSKey
- api: SHRegCreateUSKey
- api: RtlCreateRegistryKey
"""
)
)

assert capa.main.main([str(path), "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-j", "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-v", "-r", str(rules)]) == 0
assert capa.main.main([str(path), "-vv", "-r", str(rules)]) == 0
Loading