Skip to content

Commit

Permalink
Merge pull request #175 from epi052/174-add-similar-page-filter
Browse files Browse the repository at this point in the history
add fuzzy page filter
  • Loading branch information
epi052 authored Dec 27, 2020
2 parents bfb228e + 8835707 commit 0c29f3d
Show file tree
Hide file tree
Showing 17 changed files with 487 additions and 127 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "feroxbuster"
version = "1.10.3"
version = "1.11.0"
authors = ["Ben 'epi' Risher <[email protected]>"]
license = "MIT"
edition = "2018"
Expand Down Expand Up @@ -41,6 +41,7 @@ regex = "1"
crossterm = "0.18"
rlimit = "0.5"
ctrlc = "3.1"
fuzzyhash = "0.2"

[dev-dependencies]
tempfile = "3.1"
Expand Down
330 changes: 216 additions & 114 deletions README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions ferox-config.toml.example
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# depth = 1
# filter_size = [5174]
# filter_regex = ["^ignore me$"]
# filter_similar = ["https://somesite.com/soft404"]
# filter_word_count = [993]
# filter_line_count = [35, 36]
# queries = [["name","value"], ["rick", "astley"]]
Expand Down
1 change: 1 addition & 0 deletions shell_completions/_feroxbuster
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ _feroxbuster() {
'*--filter-lines=[Filter out messages of a particular line count (ex: -N 20 -N 31,30)]' \
'*-C+[Filter out status codes (deny list) (ex: -C 200 -C 401)]' \
'*--filter-status=[Filter out status codes (deny list) (ex: -C 200 -C 401)]' \
'*--filter-similar-to=[Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)]' \
'-L+[Limit total number of concurrent scans (default: 0, i.e. no limit)]' \
'--scan-limit=[Limit total number of concurrent scans (default: 0, i.e. no limit)]' \
'--time-limit=[Limit total run time of all scans (ex: --time-limit 10m)]' \
Expand Down
1 change: 1 addition & 0 deletions shell_completions/_feroxbuster.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ Register-ArgumentCompleter -Native -CommandName 'feroxbuster' -ScriptBlock {
[CompletionResult]::new('--filter-lines', 'filter-lines', [CompletionResultType]::ParameterName, 'Filter out messages of a particular line count (ex: -N 20 -N 31,30)')
[CompletionResult]::new('-C', 'C', [CompletionResultType]::ParameterName, 'Filter out status codes (deny list) (ex: -C 200 -C 401)')
[CompletionResult]::new('--filter-status', 'filter-status', [CompletionResultType]::ParameterName, 'Filter out status codes (deny list) (ex: -C 200 -C 401)')
[CompletionResult]::new('--filter-similar-to', 'filter-similar-to', [CompletionResultType]::ParameterName, 'Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)')
[CompletionResult]::new('-L', 'L', [CompletionResultType]::ParameterName, 'Limit total number of concurrent scans (default: 0, i.e. no limit)')
[CompletionResult]::new('--scan-limit', 'scan-limit', [CompletionResultType]::ParameterName, 'Limit total number of concurrent scans (default: 0, i.e. no limit)')
[CompletionResult]::new('--time-limit', 'time-limit', [CompletionResultType]::ParameterName, 'Limit total run time of all scans (ex: --time-limit 10m)')
Expand Down
6 changes: 5 additions & 1 deletion shell_completions/feroxbuster.bash
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ _feroxbuster() {

case "${cmd}" in
feroxbuster)
opts=" -v -q -D -r -k -n -f -e -h -V -w -u -t -d -T -p -P -R -s -o -a -x -H -Q -S -X -W -N -C -L --verbosity --quiet --json --dont-filter --redirects --insecure --no-recursion --add-slash --stdin --extract-links --help --version --wordlist --url --threads --depth --timeout --proxy --replay-proxy --replay-codes --status-codes --output --resume-from --debug-log --user-agent --extensions --headers --query --filter-size --filter-regex --filter-words --filter-lines --filter-status --scan-limit --time-limit "
opts=" -v -q -D -r -k -n -f -e -h -V -w -u -t -d -T -p -P -R -s -o -a -x -H -Q -S -X -W -N -C -L --verbosity --quiet --json --dont-filter --redirects --insecure --no-recursion --add-slash --stdin --extract-links --help --version --wordlist --url --threads --depth --timeout --proxy --replay-proxy --replay-codes --status-codes --output --resume-from --debug-log --user-agent --extensions --headers --query --filter-size --filter-regex --filter-words --filter-lines --filter-status --filter-similar-to --scan-limit --time-limit "
if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
return 0
Expand Down Expand Up @@ -187,6 +187,10 @@ _feroxbuster() {
COMPREPLY=($(compgen -f "${cur}"))
return 0
;;
--filter-similar-to)
COMPREPLY=($(compgen -f "${cur}"))
return 0
;;
--scan-limit)
COMPREPLY=($(compgen -f "${cur}"))
return 0
Expand Down
1 change: 1 addition & 0 deletions shell_completions/feroxbuster.fish
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ complete -c feroxbuster -n "__fish_use_subcommand" -s X -l filter-regex -d 'Filt
complete -c feroxbuster -n "__fish_use_subcommand" -s W -l filter-words -d 'Filter out messages of a particular word count (ex: -W 312 -W 91,82)'
complete -c feroxbuster -n "__fish_use_subcommand" -s N -l filter-lines -d 'Filter out messages of a particular line count (ex: -N 20 -N 31,30)'
complete -c feroxbuster -n "__fish_use_subcommand" -s C -l filter-status -d 'Filter out status codes (deny list) (ex: -C 200 -C 401)'
complete -c feroxbuster -n "__fish_use_subcommand" -l filter-similar-to -d 'Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)'
complete -c feroxbuster -n "__fish_use_subcommand" -s L -l scan-limit -d 'Limit total number of concurrent scans (default: 0, i.e. no limit)'
complete -c feroxbuster -n "__fish_use_subcommand" -l time-limit -d 'Limit total run time of all scans (ex: --time-limit 10m)'
complete -c feroxbuster -n "__fish_use_subcommand" -s v -l verbosity -d 'Increase verbosity level (use -vv or more for greater effect. [CAUTION] 4 -v\'s is probably too much)'
Expand Down
11 changes: 11 additions & 0 deletions src/banner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,17 @@ by Ben "epi" Risher {} ver: {}"#,
}
}

if !config.filter_similar.is_empty() {
for filter in &config.filter_similar {
writeln!(
&mut writer,
"{}",
format_banner_entry!(format_emoji("💢"), "Similarity Filter", filter)
)
.unwrap_or_default(); // 💢
}
}

for filter in &config.filter_word_count {
writeln!(
&mut writer,
Expand Down
24 changes: 24 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ pub struct Configuration {
/// non-negative integer and the next character is either s, m, h, or d (case insensitive)
#[serde(default)]
pub time_limit: String,

/// Filter out response bodies that meet a certain threshold of similarity
#[serde(default)]
pub filter_similar: Vec<String>,
}

// functions timeout, threads, status_codes, user_agent, wordlist, save_state, and depth are used to provide
Expand Down Expand Up @@ -328,6 +332,7 @@ impl Default for Configuration {
filter_line_count: Vec::new(),
filter_word_count: Vec::new(),
filter_status: Vec::new(),
filter_similar: Vec::new(),
headers: HashMap::new(),
depth: depth(),
threads: threads(),
Expand Down Expand Up @@ -359,6 +364,7 @@ impl Configuration {
/// - **insecure**: `false` (don't be insecure, i.e. don't allow invalid certs)
/// - **extensions**: `None`
/// - **filter_size**: `None`
/// - **filter_similar**: `None`
/// - **filter_regex**: `None`
/// - **filter_word_count**: `None`
/// - **filter_line_count**: `None`
Expand Down Expand Up @@ -552,6 +558,10 @@ impl Configuration {
config.filter_regex = arg.map(|val| val.to_string()).collect();
}

if let Some(arg) = args.values_of("filter_similar") {
config.filter_similar = arg.map(|val| val.to_string()).collect();
}

if let Some(arg) = args.values_of("filter_size") {
config.filter_size = arg
.map(|size| {
Expand Down Expand Up @@ -760,6 +770,11 @@ impl Configuration {
new.filter_regex,
Vec::<String>::new()
);
update_if_not_default!(
&mut conf.filter_similar,
new.filter_similar,
Vec::<String>::new()
);
update_if_not_default!(
&mut conf.filter_word_count,
new.filter_word_count,
Expand Down Expand Up @@ -893,6 +908,7 @@ mod tests {
depth = 1
filter_size = [4120]
filter_regex = ["^ignore me$"]
filter_similar = ["https://somesite.com/soft404"]
filter_word_count = [994, 992]
filter_line_count = [34]
filter_status = [201]
Expand Down Expand Up @@ -936,6 +952,7 @@ mod tests {
assert_eq!(config.extensions, Vec::<String>::new());
assert_eq!(config.filter_size, Vec::<u64>::new());
assert_eq!(config.filter_regex, Vec::<String>::new());
assert_eq!(config.filter_similar, Vec::<String>::new());
assert_eq!(config.filter_word_count, Vec::<usize>::new());
assert_eq!(config.filter_line_count, Vec::<usize>::new());
assert_eq!(config.filter_status, Vec::<u16>::new());
Expand Down Expand Up @@ -1103,6 +1120,13 @@ mod tests {
assert_eq!(config.filter_regex, vec!["^ignore me$"]);
}

#[test]
/// parse the test config and see that the value parsed is correct
fn config_reads_filter_similar() {
let config = setup_config_test();
assert_eq!(config.filter_similar, vec!["https://somesite.com/soft404"]);
}

#[test]
/// parse the test config and see that the value parsed is correct
fn config_reads_filter_size() {
Expand Down
93 changes: 92 additions & 1 deletion src/filters.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::config::CONFIGURATION;
use crate::utils::get_url_path_length;
use crate::FeroxResponse;
use crate::{FeroxResponse, FeroxSerialize};
use fuzzyhash::FuzzyHash;
use regex::Regex;
use std::any::Any;
use std::fmt::Debug;
Expand Down Expand Up @@ -282,6 +283,44 @@ impl PartialEq for RegexFilter {
}
}

/// Simple implementor of FeroxFilter; used to filter out responses based on the similarity of a
/// Response body with a known response; specified using --filter-similar-to
#[derive(Default, Debug, PartialEq)]
pub struct SimilarityFilter {
/// Response's body to be used for comparison for similarity
pub text: String,

/// Percentage of similarity at which a page is determined to be a near-duplicate of another
pub threshold: u32,
}

/// implementation of FeroxFilter for SimilarityFilter
impl FeroxFilter for SimilarityFilter {
/// Check `FeroxResponse::text` against what was requested from the site passed in via
/// --filter-similar-to
fn should_filter_response(&self, response: &FeroxResponse) -> bool {
let other = FuzzyHash::new(&response.text);

if let Ok(result) = FuzzyHash::compare(&self.text, &other.to_string()) {
return result >= self.threshold;
}

// couldn't hash the response, don't filter
log::warn!("Could not hash body from {}", response.as_str());
false
}

/// Compare one SimilarityFilter to another
fn box_eq(&self, other: &dyn Any) -> bool {
other.downcast_ref::<Self>().map_or(false, |a| self == a)
}

/// Return self as Any for dynamic dispatch purposes
fn as_any(&self) -> &dyn Any {
self
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -419,4 +458,56 @@ mod tests {

assert!(filter.should_filter_response(&resp));
}

#[test]
/// a few simple tests for similarity filter
fn similarity_filter_is_accurate() {
let mut resp = FeroxResponse {
text: String::from("sitting"),
wildcard: false,
url: Url::parse("http://localhost/stuff").unwrap(),
content_length: 100,
word_count: 50,
line_count: 25,
headers: reqwest::header::HeaderMap::new(),
status: reqwest::StatusCode::OK,
};

let mut filter = SimilarityFilter {
text: FuzzyHash::new("kitten").to_string(),
threshold: 95,
};

// kitten/sitting is 57% similar, so a threshold of 95 should not be filtered
assert!(!filter.should_filter_response(&resp));

resp.text = String::new();
filter.text = String::new();
filter.threshold = 100;

// two empty strings are the same, however ssdeep doesn't accept empty strings, expect false
assert!(!filter.should_filter_response(&resp));

resp.text = String::from("some data to hash for the purposes of running a test");
filter.text =
FuzzyHash::new("some data to hash for the purposes of running a te").to_string();
filter.threshold = 17;

assert!(filter.should_filter_response(&resp));
}

#[test]
/// just a simple test to increase code coverage by hitting as_any and the inner value
fn similarity_filter_as_any() {
let filter = SimilarityFilter {
text: String::from("stuff"),
threshold: 95,
};

assert_eq!(filter.text, "stuff");
assert_eq!(
*filter.as_any().downcast_ref::<SimilarityFilter>().unwrap(),
filter
);
}
}
3 changes: 3 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ pub const VERSION: &str = env!("CARGO_PKG_VERSION");
/// Maximum number of file descriptors that can be opened during a scan
pub const DEFAULT_OPEN_FILE_LIMIT: usize = 8192;

/// Default value used to determine near-duplicate web pages (equivalent to 95%)
pub const SIMILARITY_THRESHOLD: u32 = 95;

/// Default wordlist to use when `-w|--wordlist` isn't specified and not `wordlist` isn't set
/// in a [ferox-config.toml](constant.DEFAULT_CONFIG_NAME.html) config file.
///
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ async fn scan(
return Err(Box::new(err));
}

scanner::initialize(words.len(), &CONFIGURATION);
scanner::initialize(words.len(), &CONFIGURATION).await;

if CONFIGURATION.resumed {
if let Ok(scans) = SCANNED_URLS.scans.lock() {
Expand Down
11 changes: 11 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,17 @@ pub fn initialize() -> App<'static, 'static> {
"Filter out status codes (deny list) (ex: -C 200 -C 401)",
),
)
.arg(
Arg::with_name("filter_similar")
.long("filter-similar-to")
.value_name("UNWANTED_PAGE")
.takes_value(true)
.multiple(true)
.use_delimiter(true)
.help(
"Filter out pages that are similar to the given page (ex. --filter-similar-to http://site.xyz/soft404)",
),
)
.arg(
Arg::with_name("extract_links")
.short("e")
Expand Down
4 changes: 2 additions & 2 deletions src/scan_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1035,10 +1035,10 @@ mod tests {

let json_state = ferox_state.as_json();
let expected = format!(
r#"{{"scans":[{{"id":"{}","url":"https://spiritanimal.com","scan_type":"Directory","complete":false}}],"config":{{"type":"configuration","wordlist":"/usr/share/seclists/Discovery/Web-Content/raft-medium-directories.txt","config":"","proxy":"","replay_proxy":"","target_url":"","status_codes":[200,204,301,302,307,308,401,403,405],"replay_codes":[200,204,301,302,307,308,401,403,405],"filter_status":[],"threads":50,"timeout":7,"verbosity":0,"quiet":false,"json":false,"output":"","debug_log":"","user_agent":"feroxbuster/{}","redirects":false,"insecure":false,"extensions":[],"headers":{{}},"queries":[],"no_recursion":false,"extract_links":false,"add_slash":false,"stdin":false,"depth":4,"scan_limit":0,"filter_size":[],"filter_line_count":[],"filter_word_count":[],"filter_regex":[],"dont_filter":false,"resumed":false,"save_state":false,"time_limit":""}},"responses":[{{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{{"server":"nginx/1.16.1"}}}}]}}"#,
r#"{{"scans":[{{"id":"{}","url":"https://spiritanimal.com","scan_type":"Directory","complete":false}}],"config":{{"type":"configuration","wordlist":"/usr/share/seclists/Discovery/Web-Content/raft-medium-directories.txt","config":"","proxy":"","replay_proxy":"","target_url":"","status_codes":[200,204,301,302,307,308,401,403,405],"replay_codes":[200,204,301,302,307,308,401,403,405],"filter_status":[],"threads":50,"timeout":7,"verbosity":0,"quiet":false,"json":false,"output":"","debug_log":"","user_agent":"feroxbuster/{}","redirects":false,"insecure":false,"extensions":[],"headers":{{}},"queries":[],"no_recursion":false,"extract_links":false,"add_slash":false,"stdin":false,"depth":4,"scan_limit":0,"filter_size":[],"filter_line_count":[],"filter_word_count":[],"filter_regex":[],"dont_filter":false,"resumed":false,"save_state":false,"time_limit":"","filter_similar":[]}},"responses":[{{"type":"response","url":"https://nerdcore.com/css","path":"/css","wildcard":true,"status":301,"content_length":173,"line_count":10,"word_count":16,"headers":{{"server":"nginx/1.16.1"}}}}]}}"#,
saved_id, VERSION
);

println!("{}\n{}", expected, json_state);
assert!(predicates::str::similar(expected).eval(&json_state));
}

Expand Down
Loading

0 comments on commit 0c29f3d

Please sign in to comment.