Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a script for upgrading rules into the new dynamic format #1696

Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
50b5750
initial draft commit
yelhamer Aug 9, 2023
7c8f0ae
update changelog
yelhamer Aug 9, 2023
cd776ea
upse uppercase for constants
yelhamer Aug 9, 2023
dce17f2
fix typo in script naming
yelhamer Aug 9, 2023
4392c2a
finished-up script
yelhamer Aug 9, 2023
1be7035
removed old script with wrong name from the PR
yelhamer Aug 9, 2023
3368e1e
added type annotation and logging
yelhamer Aug 13, 2023
f9946df
Update scripts/upgrade-legacy-rules.py
yelhamer Aug 14, 2023
90601a6
Merge remote-tracking branch 'parentrepo/dynamic-feature-extraction' …
yelhamer Aug 15, 2023
6428de2
upgrade-legacy-rules.py: refactor code and fix missing features
yelhamer Aug 15, 2023
1daff9e
Merge branch 'upgrade-legacy-rules-script' of https://github.com/yelh…
yelhamer Aug 15, 2023
dabef68
removed newline identation
yelhamer Aug 16, 2023
fb62d88
Update scripts/upgrade-legacy-rules.py
yelhamer Aug 16, 2023
c0f7ac8
added corner-case for `or` statements
yelhamer Aug 16, 2023
81a592d
use %s instead of f-string in logging
yelhamer Aug 16, 2023
8fd88cb
perserve numbers' hexadecimal representation
yelhamer Aug 16, 2023
82553e3
Merge branch 'upgrade-legacy-rules-script' of https://github.com/yelh…
yelhamer Aug 16, 2023
10d852d
add double quotes around strings
yelhamer Aug 16, 2023
cc9975d
handle escape characters
yelhamer Aug 17, 2023
dd14824
fixed several issues
yelhamer Aug 17, 2023
1170c55
remove debugging statements
yelhamer Aug 17, 2023
dc151c0
bugfix for or ceng statements
yelhamer Aug 17, 2023
3175725
update
yelhamer Aug 17, 2023
2ac4423
fix author's quoting
yelhamer Aug 20, 2023
a9c049e
fix strings issue
yelhamer Aug 20, 2023
7a68a18
fix autor's quotes
yelhamer Aug 21, 2023
06eaafd
other fixes
yelhamer Aug 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- Add dynamic capability extraction @yelhamer
- Add support for mixed-scopes rules @yelhamer
- Add a call scope @yelhamer
- Add a rule migration script @yelhamer
yelhamer marked this conversation as resolved.
Show resolved Hide resolved

### Breaking Changes

Expand Down
145 changes: 145 additions & 0 deletions scripts/upgrage-legacy-rules.py
yelhamer marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
#!/usr/bin/env python3
# Copyright (C) 2023 Mandiant, Inc. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at: [package root]/LICENSE.txt
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and limitations under the License.


import sys
import argparse
import textwrap
from typing import List, Optional
from pathlib import Path

import yaml

from capa.main import collect_rule_file_paths
from capa.rules import Rule
from capa.features.address import NO_ADDRESS

dynamic_features = ("api", "string", "substring", "number", "description", "regex", "match", "os")
engine_words = ("and", "or", "optional", "not")
williballenthin marked this conversation as resolved.
Show resolved Hide resolved
static_scopes = ("function", "basic block", "instruction")
dynamic_scopes = ("thread",)
yelhamer marked this conversation as resolved.
Show resolved Hide resolved


def rec_features_list(static, context=False):
"""
takes in a list of static features, and returns it alongside a list of dynamic-only features
"""
dynamic = []
for node in static:
for key, value in node.items():
pass
if isinstance(value, list):
# is either subscope or ceng
if key in (*static_scopes, *dynamic_scopes):
# is subscope
stat, dyn = rec_scope(key, value, context)
if not context and dyn:
dynamic.append({"or": [stat, dyn]})
elif context == "d" and dyn:
yelhamer marked this conversation as resolved.
Show resolved Hide resolved
dynamic.append(dyn)
elif key in engine_words or key.endswith("or more"):
# is ceng
stat, dyn = rec_bool(key, value, context)
if dyn:
dynamic.append(dyn)
else:
raise ValueError(f"key: {key}, value: {value}")
if key.startswith("count"):
key = key.split("(")[1].split(")")[0]
if key in dynamic_features:
dynamic.append(node)
return static, dynamic


def rec_scope(key, value, context=False):
"""
takes in a static subscope, and returns it alongside its dynamic counterpart.
"""
if len(value) > 1 or (key == "instruction" and key not in engine_words):
static, _ = rec_bool("and", value, "s")
_, dynamic = rec_bool("and", value, "d")
else:
static, _ = rec_features_list(value, "s")
_, dynamic = rec_features_list(value, "d")
return {key: static}, {"thread": dynamic}


def rec_bool(key, value, context=False):
"""
takes in a capa logical statement and returns a static and dynamic variation of it.
"""
stat, dyn = rec_features_list(value, context)
if key == "and" and sorted(map(lambda s: s.keys(), stat)) != sorted(map(lambda s: s.keys(), dyn)):
return {key: value}, {}
if dyn:
return {key: value}, {key: dyn}
return {key: value}, {}


def upgrade_rule(content):
features = content["rule"]["features"]
print(f"original: {features[0]}\n")
for key, value in features[0].items():
pass
if key in static_scopes:
print(f"modified: {rec_scope(key, value)[1]}")
elif key in engine_words:
print(f"modified: {rec_bool(key, value)[1]}")
else:
print(f"modified: {rec_features_list([{key: value}])[1]}")

print("\n\n")


def main(argv: Optional[List[str]] = None):
desc = (
"Upgrade legacy-format rulesets into the new rules format which supports static and dynamic analysis flavors."
)
parser = argparse.ArgumentParser(description=desc, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("--old-rules-path", default="../rules", help="path to the legacy ruleset")
parser.add_argument("--new-rules-save-path", default="../upgraded-rules/", help="where to save the upgraded rules")
yelhamer marked this conversation as resolved.
Show resolved Hide resolved
args = parser.parse_args(args=argv)

# check args
old_rules_path = Path(args.old_rules_path)
new_rules_save_path = Path(args.new_rules_save_path)
if old_rules_path == new_rules_save_path:
print(
textwrap.dedent(
"""
WARNING: you've specified the same directory as the old-rules' path and the new rules' save path,
which will cause this script to overwrite your old rules with the new upgraded ones.
Are you sure you want proceed with overwritting the old rules [O]verwrite/[E]xit:
"""
)
)
response = ""
while response not in ("o", "e"):
response = input().lower()
if response == "o":
print("Old rules' folder will be overwritten.")
elif response == "e":
print("The ruleset will not been upgraded.")
sys.exit(0)
else:
print("Please provide a valid answer [O]verwrite/[E]xit: ")
yelhamer marked this conversation as resolved.
Show resolved Hide resolved

# Get rules
rule_file_paths: List[Path] = collect_rule_file_paths([old_rules_path])
rule_contents = [rule_path.read_bytes() for rule_path in rule_file_paths]

rules = [] # type: List[Rule]
for path, content in zip(rule_file_paths, rule_contents):
content = content.decode("utf-8")
yelhamer marked this conversation as resolved.
Show resolved Hide resolved
content = yaml.load(content, Loader=yaml.Loader)
upgrade_rule(content)


if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
Loading