diff --git a/capa/features/address.py b/capa/features/address.py index 4df11f892..800cefcd3 100644 --- a/capa/features/address.py +++ b/capa/features/address.py @@ -116,29 +116,6 @@ def __lt__(self, other): return (self.thread, self.id) < (other.thread, other.id) -class DynamicReturnAddress(Address): - """an address from a dynamic analysis trace""" - - def __init__(self, call: DynamicCallAddress, return_address: int): - assert return_address >= 0 - self.call = call - self.return_address = return_address - - def __repr__(self): - return f"{self.call}, dynamic-call(return-address: 0x{self.return_address:x})" - - def __hash__(self): - return hash((self.call, self.return_address)) - - def __eq__(self, other): - assert isinstance(other, DynamicReturnAddress) - return (self.call, self.return_address) == (other.call, other.return_address) - - def __lt__(self, other): - assert isinstance(other, DynamicReturnAddress) - return (self.call, self.return_address) < (other.call, other.return_address) - - class RelativeVirtualAddress(int, Address): """a memory address relative to a base address""" diff --git a/capa/features/extractors/cape/call.py b/capa/features/extractors/cape/call.py index 405902da3..8e2167304 100644 --- a/capa/features/extractors/cape/call.py +++ b/capa/features/extractors/cape/call.py @@ -15,7 +15,7 @@ import capa.features.extractors.cape.process from capa.features.insn import API, Number from capa.features.common import String, Feature -from capa.features.address import Address, DynamicReturnAddress +from capa.features.address import Address from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle logger = logging.getLogger(__name__) @@ -44,14 +44,13 @@ def extract_call_features( calls: List[Dict[str, Any]] = process["calls"] call = calls[ch.address.id] assert call["thread_id"] == str(th.address.tid) - caller = DynamicReturnAddress(call=ch.address, return_address=int(call["caller"], 16)) # list similar to disassembly: arguments right-to-left, call for arg in call["arguments"][::-1]: try: - yield Number(int(arg["value"], 16)), caller + yield Number(int(arg["value"], 16)), ch.address except ValueError: - yield String(arg["value"]), caller - yield API(call["api"]), caller + yield String(arg["value"]), ch.address + yield API(call["api"]), ch.address def extract_features( diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index 7af642f6a..7b56751b0 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -84,18 +84,6 @@ def from_capa(cls, a: capa.features.address.Address) -> "Address": elif isinstance(a, capa.features.address.DynamicCallAddress): return cls(type=AddressType.CALL, value=(a.thread.process.ppid, a.thread.process.pid, a.thread.tid, a.id)) - elif isinstance(a, capa.features.address.DynamicReturnAddress): - return cls( - type=AddressType.DYNAMIC, - value=( - a.call.thread.process.ppid, - a.call.thread.process.pid, - a.call.thread.tid, - a.call.id, - a.return_address, - ), - ) - elif a == capa.features.address.NO_ADDRESS or isinstance(a, capa.features.address._NoAddress): return cls(type=AddressType.NO_ADDRESS, value=None) @@ -159,19 +147,6 @@ def to_capa(self) -> capa.features.address.Address: id=id_, ) - elif self.type is AddressType.DYNAMIC: - assert isinstance(self.value, tuple) - ppid, pid, tid, id_, return_address = self.value - return capa.features.address.DynamicReturnAddress( - call=capa.features.address.DynamicCallAddress( - thread=capa.features.address.ThreadAddress( - process=capa.features.address.ProcessAddress(ppid=ppid, pid=pid), tid=tid - ), - id=id_, - ), - return_address=return_address, - ) - elif self.type is AddressType.NO_ADDRESS: return capa.features.address.NO_ADDRESS @@ -233,8 +208,10 @@ class ThreadFeature(HashableModel): class CallFeature(HashableModel): """ args: - call: the call id to which this feature belongs. - address: the address at which this feature is found (it's dynamic return address). + call: the address of the call to which this feature belongs. + address: the address at which this feature is found. + + call != address for consistency with Process and Thread. """ call: Address @@ -279,8 +256,7 @@ class InstructionFeature(HashableModel): instruction: the address of the instruction to which this feature belongs. address: the address at which this feature is found. - instruction != address because, e.g., the feature may be found *within* the scope (basic block), - versus right at its starting address. + instruction != address because, for consistency with Function and BasicBlock. """ instruction: Address diff --git a/capa/render/verbose.py b/capa/render/verbose.py index a5787f920..77392cf92 100644 --- a/capa/render/verbose.py +++ b/capa/render/verbose.py @@ -56,10 +56,8 @@ def format_address(address: frz.Address) -> str: return f"token({capa.helpers.hex(token)})+{capa.helpers.hex(offset)}" elif address.type == frz.AddressType.DYNAMIC: assert isinstance(address.value, tuple) - id_, return_address = address.value - assert isinstance(id_, int) - assert isinstance(return_address, int) - return f"event: {id_}, retaddr: 0x{return_address:x}" + ppid, pid, tid, id_, return_address = address.value + return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}, return address: {capa.helpers.hex(return_address)}" elif address.type == frz.AddressType.PROCESS: assert isinstance(address.value, tuple) ppid, pid = address.value @@ -71,6 +69,10 @@ def format_address(address: frz.Address) -> str: tid = address.value assert isinstance(tid, int) return f"thread id: {tid}" + elif address.type == frz.AddressType.CALL: + assert isinstance(address.value, tuple) + ppid, pid, tid, id_ = address.value + return f"process ppid: {ppid}, process pid: {pid}, thread id: {tid}, call: {id_}" elif address.type == frz.AddressType.NO_ADDRESS: return "global" else: diff --git a/scripts/show-features.py b/scripts/show-features.py index 8f2e87679..d909d95b7 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -273,8 +273,8 @@ def print_dynamic_features(processes, extractor: DynamicFeatureExtractor): continue if isinstance(feature, API): - assert isinstance(addr, capa.features.address.DynamicReturnAddress) - apis.append((addr.call.id, str(feature.value))) + assert isinstance(addr, capa.features.address.DynamicCallAddress) + apis.append((addr.id, str(feature.value))) if isinstance(feature, (Number, String)): arguments.append(str(feature.value)) diff --git a/tests/fixtures.py b/tests/fixtures.py index af6e5f8f1..2bf81e67d 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -42,7 +42,6 @@ BBHandle, CallHandle, InsnHandle, - SampleHashes, ThreadHandle, ProcessHandle, FunctionHandle, @@ -364,8 +363,18 @@ def get_data_path_by_name(name) -> Path: / "data" / "dynamic" / "cape" + / "v2.2" / "0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82.json.gz" ) + elif name.startswith("d46900"): + return ( + CD + / "data" + / "dynamic" + / "cape" + / "v2.2" + / "d46900384c78863420fb3e297d0a2f743cd2b6b3f7f82bf64059a168e07aceb7.json.gz" + ) elif name.startswith("ea2876"): return CD / "data" / "ea2876e9175410b6f6719f80ee44b9553960758c7d0f7bed73c0fe9a78d8e669.dll_" else: @@ -653,54 +662,6 @@ def parametrize(params, values, **kwargs): return pytest.mark.parametrize(params, values, ids=ids, **kwargs) -EXTRACTOR_HASHING_TESTS = [ - # viv extractor - ( - get_viv_extractor(get_data_path_by_name("mimikatz")), - SampleHashes( - md5="5f66b82558ca92e54e77f216ef4c066c", - sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38", - sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d", - ), - ), - # PE extractor - ( - get_pefile_extractor(get_data_path_by_name("mimikatz")), - SampleHashes( - md5="5f66b82558ca92e54e77f216ef4c066c", - sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38", - sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d", - ), - ), - # dnFile extractor - ( - get_dnfile_extractor(get_data_path_by_name("b9f5b")), - SampleHashes( - md5="b9f5bd514485fb06da39beff051b9fdc", - sha1="c72a2e50410475a51d897d29ffbbaf2103754d53", - sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1", - ), - ), - # dotnet File - ( - get_dotnetfile_extractor(get_data_path_by_name("b9f5b")), - SampleHashes( - md5="b9f5bd514485fb06da39beff051b9fdc", - sha1="c72a2e50410475a51d897d29ffbbaf2103754d53", - sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1", - ), - ), - # cape extractor - ( - get_cape_extractor(get_data_path_by_name("0000a657")), - SampleHashes( - md5="e2147b5333879f98d515cd9aa905d489", - sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f", - sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82", - ), - ), -] - DYNAMIC_FEATURE_PRESENCE_TESTS = sorted( [ # file/string diff --git a/tests/test_extractor_hashing.py b/tests/test_extractor_hashing.py index 9bb2fe5e1..4fa10a202 100644 --- a/tests/test_extractor_hashing.py +++ b/tests/test_extractor_hashing.py @@ -16,12 +16,48 @@ logger = logging.getLogger(__name__) -@fixtures.parametrize( - "extractor,hashes", - fixtures.EXTRACTOR_HASHING_TESTS, -) -def test_hash_extraction(extractor, hashes): - assert extractor.get_sample_hashes() == hashes +def test_viv_hash_extraction(): + assert fixtures.get_viv_extractor(fixtures.get_data_path_by_name("mimikatz")).get_sample_hashes() == SampleHashes( + md5="5f66b82558ca92e54e77f216ef4c066c", + sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38", + sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d", + ) + + +def test_pefile_hash_extraction(): + assert fixtures.get_pefile_extractor( + fixtures.get_data_path_by_name("mimikatz") + ).get_sample_hashes() == SampleHashes( + md5="5f66b82558ca92e54e77f216ef4c066c", + sha1="e4f82e4d7f22938dc0a0ff8a4a7ad2a763643d38", + sha256="131314a6f6d1d263c75b9909586b3e1bd837036329ace5e69241749e861ac01d", + ) + + +def test_dnfile_hash_extraction(): + assert fixtures.get_dnfile_extractor(fixtures.get_data_path_by_name("b9f5b")).get_sample_hashes() == SampleHashes( + md5="b9f5bd514485fb06da39beff051b9fdc", + sha1="c72a2e50410475a51d897d29ffbbaf2103754d53", + sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1", + ) + + +def test_dotnetfile_hash_extraction(): + assert fixtures.get_dotnetfile_extractor( + fixtures.get_data_path_by_name("b9f5b") + ).get_sample_hashes() == SampleHashes( + md5="b9f5bd514485fb06da39beff051b9fdc", + sha1="c72a2e50410475a51d897d29ffbbaf2103754d53", + sha256="34acc4c0b61b5ce0b37c3589f97d1f23e6d84011a241e6f85683ee517ce786f1", + ) + + +def test_cape_hash_extraction(): + assert fixtures.get_cape_extractor(fixtures.get_data_path_by_name("0000a657")).get_sample_hashes() == SampleHashes( + md5="e2147b5333879f98d515cd9aa905d489", + sha1="ad4d520fb7792b4a5701df973d6bd8a6cbfbb57f", + sha256="0000a65749f5902c4d82ffa701198038f0b4870b00a27cfca109f8f933476d82", + ) # We need to skip the binja test if we cannot import binaryninja, e.g., in GitHub CI. diff --git a/tests/test_main.py b/tests/test_main.py index da592dc45..d09f33975 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -6,8 +6,10 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. +import gzip import json import textwrap +from pathlib import Path import pytest import fixtures @@ -582,3 +584,59 @@ def test_main_rd(): assert capa.main.main([path, "-j"]) == 0 assert capa.main.main([path, "-q"]) == 0 assert capa.main.main([path]) == 0 + + +def extract_cape_report(tmp_path: Path, gz: Path) -> Path: + report = tmp_path / "report.json" + report.write_bytes(gzip.decompress(gz.read_bytes())) + return report + + +def test_main_cape1(tmp_path): + path = extract_cape_report(tmp_path, fixtures.get_data_path_by_name("0000a657")) + + # TODO(williballenthin): use default rules set + # https://github.com/mandiant/capa/pull/1696 + rules = tmp_path / "rules" + rules.mkdir() + (rules / "create-or-open-registry-key.yml").write_text( + textwrap.dedent( + """ + rule: + meta: + name: create or open registry key + authors: + - testing + scopes: + static: instruction + dynamic: call + features: + - or: + - api: advapi32.RegOpenKey + - api: advapi32.RegOpenKeyEx + - api: advapi32.RegCreateKey + - api: advapi32.RegCreateKeyEx + - api: advapi32.RegOpenCurrentUser + - api: advapi32.RegOpenKeyTransacted + - api: advapi32.RegOpenUserClassesRoot + - api: advapi32.RegCreateKeyTransacted + - api: ZwOpenKey + - api: ZwOpenKeyEx + - api: ZwCreateKey + - api: ZwOpenKeyTransacted + - api: ZwOpenKeyTransactedEx + - api: ZwCreateKeyTransacted + - api: NtOpenKey + - api: NtCreateKey + - api: SHRegOpenUSKey + - api: SHRegCreateUSKey + - api: RtlCreateRegistryKey + """ + ) + ) + + assert capa.main.main([str(path), "-r", str(rules)]) == 0 + assert capa.main.main([str(path), "-q", "-r", str(rules)]) == 0 + assert capa.main.main([str(path), "-j", "-r", str(rules)]) == 0 + assert capa.main.main([str(path), "-v", "-r", str(rules)]) == 0 + assert capa.main.main([str(path), "-vv", "-r", str(rules)]) == 0