Skip to content

Commit

Permalink
feat: add gaussian mutator (#474)
Browse files Browse the repository at this point in the history
* feat: add gaussian mutator

* chore: fmt

* chore: format

---------

Co-authored-by: Chaofan Shou <[email protected]>
  • Loading branch information
plotchy and shouc authored May 25, 2024
1 parent 9fc7ae2 commit b7c3a23
Showing 1 changed file with 151 additions and 0 deletions.
151 changes: 151 additions & 0 deletions src/mutation_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,155 @@ impl ConstantPoolMetadata {
}

impl_serdeany!(ConstantPoolMetadata);
/// [`GaussianNoiseMutator`] is a mutator that adds Gaussian noise to the input
/// value.
///
/// This mutator scales the input by a factor derived from a Gaussian
/// distribution, with varying ranges based on randomly chosen percentages. The
/// goal is to mutate the input within a general range of itself, independent of
/// its potential size.
///
/// The Gaussian mutator will modify the input to be anywhere in the space of
/// `input +- {10%, 25%, 50%, 100%, 200%, ..., 1000%}`. For example, a uint256
/// of 10,000 can be mutated to become somewhere between 7,500-12,500 if the 25%
/// multiplier is chosen. These percentages were chosen to be able to both focus
/// close to the input value but also be able to explore the space around it
/// aggressively.
///
/// This probably isn't useful for signed integers, since the bytes
/// representation is treated as a uint and negative values will always be
/// scaled according to the max size.
///
/// It clamps the mutated value between 0 and the maximum value for the size of
/// the input.
#[derive(Default)]
pub struct GaussianNoiseMutator;

impl Named for GaussianNoiseMutator {
fn name(&self) -> &str {
"GaussianNoiseMutator"
}
}

impl GaussianNoiseMutator {
pub fn new() -> Self {
Self
}
}

impl<I, S> Mutator<I, S> for GaussianNoiseMutator
where
S: State + HasRand + HasMetadata,
I: Input + HasBytesVec,
{
/// Mutate the input by adding Gaussian noise to the entire input value.
///
/// The mutation process involves:
/// 1. Selecting a multiplier from a predefined set of percentages that act
/// as the standard deviation to the distribution.
/// 2. Generating a scaling factor based on the chosen multiplier and a
/// Gaussian distribution.
/// 3. Scaling the input bytes by the calculated factor, with special
/// handling for overflow and underflow.
///
/// # Parameters
/// - `state`: The current state, which provides randomness.
/// - `input`: The input to be mutated.
/// - `_stage_idx`: The stage index (unused in this implementation).
///
/// # Returns
/// - `Ok(MutationResult::Mutated)` if the input was successfully mutated.
/// - `Ok(MutationResult::Skipped)` if the mutation was skipped.
/// - `Err(Error)` if an error occurred during mutation.
fn mutate(&mut self, state: &mut S, input: &mut I, _stage_idx: i32) -> Result<MutationResult, Error> {
// A gaussian distribution takes a mean and a standard deviation to define a
// curve. A value chosen within +-3 standard deviations is ~99.7% likely
// We are going to define a curve where the values at the +-3std mark are chosen
// according to the input scaled by a multiplier.
let three_sigma_multipliers = [0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0]; // These 3rd_sigma values are +10%, 25%, 50%, etc of the original value
let sigma_index = state.rand_mut().below(three_sigma_multipliers.len() as u64) as usize;
let chosen_3rd_sigma = three_sigma_multipliers[sigma_index];
let mut scale_factor = {
let num_samples = 8; // 8 is chosen to be performant and still provide a reasonable distribution
let mut sum = 0.0;

// Generate uniformly distributed random variables and sum them up
for _ in 0..num_samples {
sum += state.rand_mut().next() as f64 / u64::MAX as f64;
}

// Normalize the sum to approximate a standard normal distribution
let standard_normal = (sum - (num_samples as f64 / 2.0));
chosen_3rd_sigma / 3.0 * standard_normal // Adjust 3rd sigma to std,
// then mul by normal. this
// is expected to be in
// range of -num_samples*
// 3sigma/2 to
// +num_samples*3sigma/2,
// centered at 0.
};
scale_factor += 1.0; // we are scaling our input by scale_factor, so re-centering to 1.0 means we
// multiply by 1.0 in most common case

if scale_factor < 0.0 {
// anything lower than 0.0 makes all bytes 0. do so and return Mutated
// This is a common result, since the range is centered around 1.0 and is often
// able to reach abs values of ~3-4.
let input_bytes = input.bytes_mut();
input_bytes.iter_mut().for_each(|byte| *byte = 0);
return Ok(MutationResult::Mutated);
}

if (scale_factor - 1.0).abs() < f64::EPSILON {
// The scale factor is within f64 err range of 1.0
// Skip mutation
return Ok(MutationResult::Skipped);
} else {
// iterate in normal order byte by byte, if underflow, set all to 0.
let input_bytes = input.bytes_mut();
let mut carry_down = 0.0;
let mut carry_up = 0.0;

// The loop is complicated because the input can be arbitrarily sized.
// This handles scaling a Vec<u8> that represents a number of arbitrary size.
'arbitrary_sized_scaling_loop: for i in 0..input_bytes.len() {
// convert u8 to f64, add any carry scaled by 256, and scale by scale_factor
let scaled_value = (input_bytes[i] as f64 + carry_down * 256.0) * scale_factor;

// find divided value and carry
input_bytes[i] = (scaled_value % 256.0).floor() as u8;
// special condition: if i is 0, and scaled_value is >=256, we overflowed our
// input. set all bytes to 255 and break all loops. this gets max_clamped
if i == 0 && scaled_value >= 256.0 {
input_bytes.iter_mut().for_each(|byte| *byte = 255);
break 'arbitrary_sized_scaling_loop;
}
// for example: if a byte gets mutated from 200 to 260.8, we need to carry up
// the overflow to the prior byte and carry down the decimal to the next byte
carry_up = (scaled_value / 256.0).floor();
carry_down = (scaled_value % 1.0) / scale_factor;

// Propagate carry up if necessary
let mut j = i;
while carry_up > 0.0 && j > 0 {
j -= 1;
let new_value = input_bytes[j] as f64 + carry_up;
// special condition: if j is 0, and new_value is >=256, set all bytes to 255
// and break all loops. this value overflowed and gets max_clamped
if j == 0 && new_value >= 256.0 {
input_bytes.iter_mut().for_each(|byte| *byte = 255);
break 'arbitrary_sized_scaling_loop;
}

input_bytes[j] = (new_value % 256.0).floor() as u8;
carry_up = (new_value / 256.0).floor();
}
}
}

Ok(MutationResult::Mutated)
}
}

/// [`IncDecValue`] is a mutator that mutates the input by overflowing_add 1 or
/// overflowing_sub 1
Expand Down Expand Up @@ -245,6 +394,7 @@ where
WordInterestingMutator::new(),
DwordInterestingMutator::new(),
ConstantHintedMutator::new(),
GaussianNoiseMutator::new(),
IncDecValue::new(),
);

Expand Down Expand Up @@ -281,6 +431,7 @@ where
BytesInsertMutator::new(),
BytesRandInsertMutator::new(),
ConstantHintedMutator::new(),
GaussianNoiseMutator::new(),
IncDecValue::new(),
);

Expand Down

0 comments on commit b7c3a23

Please sign in to comment.