diff --git a/src/FSharp.Stats/Seq.fs b/src/FSharp.Stats/Seq.fs index 90509a56..d7fc792b 100644 --- a/src/FSharp.Stats/Seq.fs +++ b/src/FSharp.Stats/Seq.fs @@ -1,24 +1,48 @@ namespace FSharp.Stats -/// Module to compute common statistical measure +/// +/// Module to compute common statistical measures. +/// [] module Seq = module OpsS = SpecializedGenericImpl + /// + /// Computes the range of the input sequence. + /// + /// The input sequence. + /// The range of the input sequence as an . + /// + /// + /// let values = [1; 2; 3; 4; 5] + /// let r = Seq.range values // returns Interval.Closed(1, 5) + /// + /// let inline range (items:seq<_>) = use e = items.GetEnumerator() let rec loop (minimum) (maximum) = match e.MoveNext() with | true -> loop (min e.Current minimum) (max e.Current maximum) | false -> Interval.CreateClosed<'a> (minimum,maximum) - //Init by fist value + //Init by first value match e.MoveNext() with | true -> loop e.Current e.Current | false -> Interval.Empty - + /// + /// Computes the range of the input sequence by applying a function to each element. + /// + /// A function applied to transform each element of the sequence. + /// The input sequence. + /// The range of the transformed input sequence as an . + /// + /// + /// let values = [1; 2; 3; 4; 5] + /// let r = Seq.rangeBy (fun x -> x * 2) values // returns Interval.Closed(1, 5) + /// + /// let inline rangeBy f (items:seq<_>) = use e = items.GetEnumerator() let rec loop minimum maximum minimumV maximumV = @@ -29,7 +53,7 @@ module Seq = let mmax,mmaxV = if current > maximum then current,e.Current else maximum,maximumV loop mmin mmax mminV mmaxV | false -> Interval.CreateClosed<'a> (minimumV,maximumV) - //Init by fist value + //Init by first value match e.MoveNext() with | true -> let current = f e.Current @@ -40,12 +64,18 @@ module Seq = // #region means /// - /// Computes the population mean (Normalized by N) + /// Computes the population mean (Normalized by N). /// - /// /// The input sequence. - /// Returns default value if data is empty or if any entry is NaN. - /// population mean (Normalized by N) + /// The population mean (Normalized by N). + /// Thrown if the sequence is empty and type cannot divide by zero. + /// Returns NaN if data is empty or if any entry is NaN. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.mean values // returns 3.0 + /// + /// let inline mean (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -57,13 +87,19 @@ module Seq = /// - /// Computes the population mean (Normalized by N)s + /// Computes the population mean (Normalized by N) by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. + /// The population mean (Normalized by N) of the transformed sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// population mean (Normalized by N) + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanBy (fun x -> x * 2.0) values // returns 6.0 + /// + /// let inline meanBy (f : 'T -> ^U) (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -72,10 +108,22 @@ module Seq = | true -> loop (n + 1 ) (acc + f e.Current) | false -> if (n > 0) then LanguagePrimitives.DivideByInt< 'U > acc n else (zero / zero) loop 0 zero - + /// - /// Computes the Weighted Mean of the given values. + /// Computes the Weighted Mean of the given values. /// + /// The sequence of weights. + /// The input sequence. + /// The weighted mean of the input sequence. + /// Thrown when the items and weights sequences have different lengths. + /// Thrown if the sequence is empty and type cannot divide by zero. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let weights = [0.1; 0.2; 0.3; 0.2; 0.2] + /// let m = Seq.weightedMean weights values // returns 3.2 + /// + /// let inline weightedMean (weights:seq<'T>) (items:seq<'T>) = use e = items.GetEnumerator() use w = weights.GetEnumerator() @@ -84,35 +132,46 @@ module Seq = match e.MoveNext(),w.MoveNext() with | true,true -> loop (n + 1 ) (eAcc + e.Current * w.Current) (wAcc + w.Current) | false,false -> if (n > 0) then eAcc / wAcc else (zero / zero) - | _ -> failwithf "The items and weights must have the same length" - loop 0 LanguagePrimitives.GenericZero< 'U > + | _ -> invalidOp "The items and weights must have the same length" + loop 0 zero zero /// - /// Computes harmonic mean + /// Computes the harmonic mean. /// - /// /// The input sequence. + /// The harmonic mean of the input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// harmonic mean - let inline meanHarmonic (items:seq<'T>) : 'U = + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanHarmonic values // returns approximately 2.18978 + /// + /// + let inline meanHarmonic (items:seq<'T>) : 'T = use e = items.GetEnumerator() - let zero = LanguagePrimitives.GenericZero< 'U > - let one = LanguagePrimitives.GenericOne< 'U > + let zero = LanguagePrimitives.GenericZero< 'T > + let one = LanguagePrimitives.GenericOne< 'T > let rec loop n (acc) = match e.MoveNext() with | true -> loop (n + one ) (acc + (one / e.Current)) | false -> if (LanguagePrimitives.GenericGreaterThan n zero) then (n / acc) else (zero / zero) loop zero zero - /// - /// Computes harmonic mean + /// Computes the harmonic mean by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. + /// The harmonic mean of the transformed input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// harmonic mean + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanHarmonicBy (fun x -> x * 2.0) values // returns approximately 4.37956 + /// + /// let inline meanHarmonicBy (f : 'T -> ^U) (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -123,14 +182,19 @@ module Seq = | false -> if (LanguagePrimitives.GenericGreaterThan n zero) then (n / acc) else (zero / zero) loop zero zero - /// - /// Computes gemetric mean + /// Computes the geometric mean. /// - /// /// The input sequence. + /// The geometric mean of the input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// gemetric mean + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanGeometric values // returns approximately 2.60517 + /// + /// let inline meanGeometric (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -143,13 +207,19 @@ module Seq = /// - /// Computes gemetric mean + /// Computes the geometric mean by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. + /// The geometric mean of the transformed input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// gemetric mean + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanGeometricBy (fun x -> x * 2.0) values // returns approximately 5.21034 + /// + /// let inline meanGeometricBy f (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -160,14 +230,19 @@ module Seq = if (n > 0) then exp (LanguagePrimitives.DivideByInt< 'U > acc n) else (zero / zero) loop 0 zero - /// - /// Computes quadratic mean + /// Computes the quadratic mean. /// - /// /// The input sequence. + /// The quadratic mean of the input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// quadratic mean + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanQuadratic values // returns approximately 3.31662 + /// + /// let inline meanQuadratic (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -180,13 +255,19 @@ module Seq = /// - /// Computes quadratic mean + /// Computes the quadratic mean by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. + /// The quadratic mean of the transformed input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// quadratic mean + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanQuadraticBy (fun x -> x * 2.0) values // returns approximately 4.69041576 + /// + /// let inline meanQuadraticBy f (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -204,19 +285,25 @@ module Seq = // // Computes the mean of the means of several subsample - /// - /// Computes the truncated (trimmed) mean where x percent of the highest, and x percent of the lowest values are discarded (total 2x) + /// Computes the truncated (trimmed) mean where x*count of the highest, and x*count of the lowest values are discarded (total 2x). /// - /// - /// The input sequence. + /// The proportion of values to discard from each end. + /// The input sequence. + /// The truncated (trimmed) mean of the input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// truncated (trimmed) mean - let inline meanTruncated (percent:float) (data:seq<'T>) : 'U = - let zero = LanguagePrimitives.GenericZero< 'U > + /// + /// + /// let values = {1.0 .. 10.0} + /// let m = Seq.meanTruncated 0.2 values // returns mean of {3.0 .. 8.0} or 5.5 + /// + /// + let inline meanTruncated (proportion:float) (data:seq<'T>) : 'T = + let zero = LanguagePrimitives.GenericZero< 'T > let n = Seq.length(data) if (n > 0) then - let k = int ((float n * percent)) + let k = int ((float n * proportion)) data |> Seq.sort |> Seq.skip k @@ -226,20 +313,26 @@ module Seq = else (zero / zero) - /// - /// Computes the truncated (trimmed) mean + /// Computes the truncated (trimmed) mean by applying a function to each element. /// - /// - /// The input sequence. - /// A function applied to transform each element of the sequence. + /// A function applied to transform each element of the sequence. + /// The proportion of values to discard from each end. + /// The input sequence. + /// The truncated (trimmed) mean of the transformed input sequence. + /// Thrown if the sequence is empty and type cannot divide by zero. /// Returns NaN if data is empty or if any entry is NaN. - /// truncated (trimmed) mean - let inline meanTruncatedBy (f : 'T -> ^U) (percent:float) (data:seq<'T>) : 'U = + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let m = Seq.meanTruncatedBy (fun x -> x * 2.0) 0.2 values // returns 7.0 + /// + /// + let inline meanTruncatedBy (f : 'T -> ^U) (proportion:float) (data:seq<'T>) : 'U = let zero = LanguagePrimitives.GenericZero< 'U > let n = Seq.length(data) if (n > 0) then - let k = int (floor (float n * percent)) + let k = int (floor (float n * proportion)) data |> Seq.sort |> Seq.skip k @@ -255,12 +348,15 @@ module Seq = // ##### ##### ##### ##### ##### // Median - /// Sample Median - /// - /// - /// + /// + /// Computes the sample median. + /// + /// The input sequence. + /// The sample median of the input sequence. /// /// + /// let values = [1; 2; 3; 4; 5] + /// let m = Seq.median values // returns 3 /// /// let inline median (items:seq<'T>) = @@ -363,13 +459,19 @@ module Seq = // #region standard deviation, variance and coefficient of variation + /// - /// Computes the sample variance (Bessel's correction by N-1) + /// Computes the sample variance (Bessel's correction by N-1). /// - /// /// The input sequence. + /// The sample variance (Bessel's correction by N-1) of the input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// variance of a sample (Bessel's correction by N-1) + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let v = Seq.var values // returns 2.5 + /// + /// let inline var (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -388,15 +490,19 @@ module Seq = else (zero / zero) loop 0 zero zero - /// - /// Computes the sample variance (Bessel's correction by N-1) + /// Computes the sample variance (Bessel's correction by N-1) by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. + /// The sample variance (Bessel's correction by N-1) of the transformed input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// variance of a sample (Bessel's correction by N-1) + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let v = Seq.varBy (fun x -> x * 2.0) values // returns 10.0 + /// + /// let inline varBy f (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -418,12 +524,17 @@ module Seq = /// - /// Computes variance of the given values (denominator N) + /// Computes the population variance estimator (denominator N). /// - /// /// The input sequence. + /// The population variance estimator (denominator N) of the input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population variance estimator (denominator N) + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let v = Seq.varPopulation values // returns 2.0 + /// + /// let inline varPopulation (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -442,15 +553,19 @@ module Seq = else (zero / zero) loop 0 zero zero - /// - /// Computes variance of the given values (denominator N) + /// Computes the population variance estimator (denominator N) by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population variance estimator (denominator N) + /// The population variance estimator (denominator N) of the transformed input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let v = Seq.varPopulationBy (fun x -> x * 2.0) values // returns 8.0 + /// + /// let inline varPopulationBy f (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -471,83 +586,99 @@ module Seq = loop 0 zero zero - /// - /// Computes the sample standard deviation + /// Computes the sample standard deviation. /// - /// /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// standard deviation of a sample (Bessel's correction by N-1) + /// The sample standard deviation (Bessel's correction by N-1) of the input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let sd = Seq.stDev values // returns approximately 1.58114 + /// + /// let inline stDev (items:seq<'T>) : 'U = sqrt ( var items ) /// - /// Computes the sample standard deviation + /// Computes the sample standard deviation by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// standard deviation of a sample (Bessel's correction by N-1) + /// The sample standard deviation (Bessel's correction by N-1) of the transformed input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let sd = Seq.stDevBy (fun x -> x * 2.0) values // returns approximately 3.16228 + /// + /// let inline stDevBy f (items:seq<'T>) : 'U = sqrt ( varBy f items ) /// - /// Computes the population standard deviation (denominator = N) + /// Computes the population standard deviation (denominator = N). /// - /// /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population standard deviation (denominator = N) + /// The population standard deviation (denominator = N) of the input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let sd = Seq.stDevPopulation values // returns approximately 1.41421 + /// + /// let inline stDevPopulation (items:seq<'T>) : 'U = sqrt (varPopulation items) /// - /// Computes the population standard deviation (denominator = N) + /// Computes the population standard deviation (denominator = N) by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population standard deviation (denominator = N) + /// The population standard deviation (denominator = N) of the transformed input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let sd = Seq.stDevPopulationBy (fun x -> x * 2.0) values // returns approximately 2.82843 + /// + /// let inline stDevPopulationBy f (items:seq<'T>) : 'U = sqrt (varPopulationBy f items) - - /// Computes the standard error of the mean (SEM) with bessel corrected sample standard deviation - /// - /// - /// + /// + /// Computes the standard error of the mean (SEM) with Bessel corrected sample standard deviation. + /// + /// The input sequence. + /// The standard error of the mean (SEM) of the input sequence. /// /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let sem = Seq.sem values // returns approximately 0.70711 /// /// let inline sem (items:seq<'T>) = stDev items / sqrt (float (Seq.length items)) - - - - - - - - - - - + /// - /// Computes the Coefficient of Variation of a sample (Bessel's correction by N-1) + /// Computes the Coefficient of Variation of a sample (Bessel's correction by N-1). /// - /// /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// Coefficient of Variation of a sample (Bessel's correction by N-1) + /// The Coefficient of Variation of a sample (Bessel's correction by N-1) of the input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let cv = Seq.cv values // returns approximately 0.52705 + /// + /// let inline cv (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -569,13 +700,18 @@ module Seq = /// - /// Computes the Coefficient of Variation of a sample (Bessel's correction by N-1) + /// Computes the Coefficient of Variation of a sample (Bessel's correction by N-1) by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// Coefficient of Variation of a sample (Bessel's correction by N-1) + /// The Coefficient of Variation of a sample (Bessel's correction by N-1) of the transformed input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let cv = Seq.cvBy (fun x -> x * 2.0) values // returns approximately 0.52705 + /// + /// let inline cvBy f (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -595,16 +731,19 @@ module Seq = sd / m1 else (zero / zero) loop 0 zero zero - - - + /// - /// Computes the Coefficient of Variation of the population (population standard deviation) + /// Computes the Coefficient of Variation of the population (population standard deviation). /// - /// /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// Coefficient of Variation of the population + /// The Coefficient of Variation of the population of the input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let cv = Seq.cvPopulation values // returns approximately 0.47140 + /// + /// > let inline cvPopulation (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -626,13 +765,18 @@ module Seq = /// - /// Computes the Coefficient of Variation of the population (population standard deviation) + /// Computes the Coefficient of Variation of the population (population standard deviation) by applying a function to each element. /// - /// /// A function applied to transform each element of the sequence. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// Coefficient of Variation of the population + /// The Coefficient of Variation of the population of the transformed input sequence. + /// + /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let cv = Seq.cvPopulationBy (fun x -> x * 2.0) values // returns approximately 0.47140 + /// + /// let inline cvPopulationBy f (items:seq<'T>) : 'U = use e = items.GetEnumerator() let zero = LanguagePrimitives.GenericZero< 'U > @@ -653,15 +797,22 @@ module Seq = else (zero / zero) loop 0 zero zero - + /// - /// Computes the population covariance of two random variables + /// Computes the population covariance of two random variables. /// - /// /// The first input sequence. /// The second input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population covariance estimator (denominator N) + /// The population covariance estimator (denominator N) of the two input sequences. + /// Thrown when the input sequences have different lengths. + /// + /// + /// let x = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let y = [2.0; 4.0; 6.0; 8.0; 10.0] + /// let cov = Seq.covPopulation x y // returns 4.0 + /// + /// let inline covPopulation (seq1:seq<'T>) (seq2:seq<'T>) : 'U = let v1 = seq1 |> OpsS.seqV let v2 = seq2 |> OpsS.seqV @@ -677,21 +828,16 @@ module Seq = div (mul - (div (sumX * sumY) v1.Length)) v1.Length /// - /// Computes the population covariance of two random variables. - /// The covariance will be calculated between the paired observations. + /// Computes the population covariance of two random variables. + /// The covariance will be calculated between the paired observations. /// /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population covariance estimator (denominator N) - /// - /// - /// // Consider a sequence of paired x and y values: - /// // [(x1, y1); (x2, y2); (x3, y3); (x4, y4); ... ] - /// let xy = [(5., 2.); (12., 8.); (18., 18.); (-23., -20.); (45., 28.)] - /// - /// // To get the population covariance between x and y: - /// xy |> Seq.covPopulationOfPairs // evaluates to 347.92 - /// + /// The population covariance estimator (denominator N) of the paired observations. + /// + /// + /// let xy = [(1.0, 2.0); (2.0, 4.0); (3.0, 6.0); (4.0, 8.0); (5.0, 10.0)] + /// let cov = Seq.covPopulationOfPairs xy // returns 4.0 /// /// let inline covPopulationOfPairs (seq:seq<'T * 'T>) : 'U = @@ -700,24 +846,17 @@ module Seq = |> Array.unzip ||> covPopulation - /// - /// Computes the population covariance of two random variables generated by applying a function to the input sequence. + /// Computes the population covariance of two random variables generated by applying a function to the input sequence. /// /// A function applied to transform each element of the input sequence into a tuple of paired observations. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// population covariance estimator (denominator N) - /// - /// - /// // To get the population covariance between x and y observations: - /// let xy = [ {| x = 5.; y = 2. |} - /// {| x = 12.; y = 8. |} - /// {| x = 18.; y = 18. |} - /// {| x = -23.; y = -20. |} - /// {| x = 45.; y = 28. |} ] - /// - /// xy |> Seq.covPopulationBy (fun x -> x.x, x.y) // evaluates to 347.92 + /// The population covariance estimator (denominator N) of the transformed input sequence. + /// + /// + /// let data = [{| X = 1.0; Y = 2.0 |}; {| X = 2.0; Y = 4.0 |}; {| X = 3.0; Y = 6.0 |}; {| X = 4.0; Y = 8.0 |}; {| X = 5.0; Y = 10.0 |}] + /// let cov = data |> Seq.covPopulationBy (fun d -> d.X, d.Y) // returns 4.0 /// /// let inline covPopulationBy f (seq: 'T seq) : 'U = @@ -726,13 +865,20 @@ module Seq = |> covPopulationOfPairs /// - /// Computes the sample covariance of two random variables + /// Computes the sample covariance of two random variables. /// - /// /// The first input sequence. /// The second input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// sample covariance estimator (Bessel's correction by N-1) + /// The sample covariance estimator (Bessel's correction by N-1) of the two input sequences. + /// Thrown when the input sequences have different lengths. + /// + /// + /// let x = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let y = [2.0; 4.0; 6.0; 8.0; 10.0] + /// let cov = Seq.cov x y // returns 5.0 + /// + /// let inline cov (seq1:seq<'T>) (seq2:seq<'T>) : 'U = let v1 = seq1 |> OpsS.seqV let v2 = seq2 |> OpsS.seqV @@ -748,21 +894,20 @@ module Seq = div (mul - (div (sumX * sumY) v1.Length)) (v1.Length - 1) /// - /// Computes the sample covariance of two random variables. - /// The covariance will be calculated between the paired observations. + /// Computes the sample covariance of two random variables. + /// The covariance will be calculated between the paired observations. /// /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// sample covariance estimator (Bessel's correction by N-1) - /// - /// + /// The sample covariance estimator (Bessel's correction by N-1) of the paired observations. + /// + /// /// // Consider a sequence of paired x and y values: /// // [(x1, y1); (x2, y2); (x3, y3); (x4, y4); ... ] /// let xy = [(5., 2.); (12., 8.); (18., 18.); (-23., -20.); (45., 28.)] /// /// // To get the sample covariance between x and y: - /// xy |> Seq.covOfPairs // evaluates to 434.90 - /// + /// xy |> Seq.covOfPairs // evaluates to 434.9 /// /// let inline covOfPairs (seq:seq<'T * 'T>) : 'U = @@ -772,14 +917,14 @@ module Seq = ||> cov /// - /// Computes the sample covariance of two random variables generated by applying a function to the input sequence. + /// Computes the sample covariance of two random variables generated by applying a function to the input sequence. /// /// A function applied to transform each element of the input sequence into a tuple of paired observations. /// The input sequence. /// Returns NaN if data is empty or if any entry is NaN. - /// sample covariance estimator (Bessel's correction by N-1) - /// - /// + /// The sample covariance estimator (Bessel's correction by N-1) of the transformed input sequence. + /// + /// /// // To get the sample covariance between x and y observations: /// let xy = [ {| x = 5.; y = 2. |} /// {| x = 12.; y = 8. |} @@ -795,7 +940,7 @@ module Seq = |> Seq.map f |> covOfPairs -// // #endregion standard deviation, variance and coefficient of variation + // // #endregion standard deviation, variance and coefficient of variation // // // /// @@ -987,15 +1132,15 @@ module Seq = // // m4 / (m2 * m2) - 3. - - - - /// Median absolute deviation (MAD) - /// - /// - /// + /// + /// Computes the median absolute deviation (MAD). + /// + /// The input sequence. + /// The median absolute deviation (MAD) of the input sequence. /// /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let mad = Seq.medianAbsoluteDev values // returns 1.0 /// /// let medianAbsoluteDev (data:seq) = @@ -1005,9 +1150,7 @@ module Seq = |> Array.map (fun x -> abs ( x - m' )) |> median - - -// /// Average absolute deviation (Normalized by N) + // /// Average absolute deviation (Normalized by N) // let populationAverageDev (data) = // let filterSeq = // data |> Seq.filter (fun x -> not (System.Double.IsNaN x)) @@ -1029,13 +1172,21 @@ module Seq = // else nan // // - - /// Returns SummeryStats of deq with N, mean, sum-of-squares, minimum and maximum - /// - /// - /// + /// + /// Returns SummaryStats of the input sequence with N, mean, sum-of-squares, minimum and maximum. + /// + /// The input sequence. + /// The SummaryStats of the input sequence. /// /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let stats = Seq.stats values + /// // returns SummaryStats with: + /// // N = 5 + /// // Mean = 3.5 + /// // SumOfSquares = 5.0 + /// // Minimum = 1.0 + /// // Maximum = 5.0 /// /// let inline stats (items:seq<'T>) = @@ -1046,36 +1197,35 @@ module Seq = let rec loop n (minimum) (maximum) m1 m2 = match e.MoveNext() with | true -> - let current = e.Current - let delta = current - m1 - let deltaN = (delta / n) + let current = e.Current + let delta = current - m1 + let deltaN = (delta / n) //let delta_n2 = deltaN * deltaN - let m1' = m1 + deltaN + let m1' = m1 + deltaN let m2' = m2 + delta * deltaN * (n-one) loop (n + one) (min current minimum) (max current maximum) m1' m2' + | false -> SummaryStats.createSummaryStats n m1 m2 minimum maximum - | false -> SummaryStats.createSummaryStats (n-one) m1 m2 minimum maximum - - //Init by fist value + //Init by first value match e.MoveNext() with | true -> loop one e.Current e.Current zero zero | false -> let uNan = zero / zero SummaryStats.createSummaryStats zero uNan uNan uNan uNan - - - - - /// calculates the sample means with a given number of replicates present in the sequence - /// - /// - /// - /// + /// + /// Calculates the sample means with a given number of replicates present in the sequence. + /// + /// The number of replicates. + /// The input sequence. + /// A sequence of sample means for each replicate group. + /// Thrown when the sequence length is not a multiple of the replicate number. /// /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0] + /// let means = Seq.getMeanOfReplicates 2 values // returns seq [1.5; 3.5; 5.5] /// - /// + /// > let inline getMeanOfReplicates rep (data:seq<'a>) = if ( Seq.length data ) % rep = 0 then data @@ -1083,13 +1233,17 @@ module Seq = |> Seq.map mean else failwithf "sequence length is no multiple of replicate number" - /// calculates the sample standard deviations with a given number of replicates present in the sequence - /// - /// - /// - /// + /// + /// Calculates the sample standard deviations with a given number of replicates present in the sequence. + /// + /// The number of replicates. + /// The input sequence. + /// A sequence of sample standard deviations for each replicate group. + /// Thrown when the sequence length is not a multiple of the replicate number. /// /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0] + /// let stDevs = Seq.getStDevOfReplicates 2 values // returns seq [0.7071067812; 0.7071067812; 0.7071067812] /// /// let inline getStDevOfReplicates rep (data:seq<'a>) = @@ -1098,14 +1252,18 @@ module Seq = |> Seq.chunkBySize rep |> Seq.map stDev else failwithf "sequence length is no multiple of replicate number" - - /// calculates the coefficient of variation based on the sample standard deviations with a given number of replicates present in the sequence - /// - /// - /// - /// + + /// + /// Calculates the coefficient of variation based on the sample standard deviations with a given number of replicates present in the sequence. + /// + /// The number of replicates. + /// The input sequence. + /// A sequence of coefficients of variation for each replicate group. + /// Thrown when the sequence length is not a multiple of the replicate number. /// /// + /// let values = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0] + /// let cvs = Seq.getCvOfReplicates 2 values // returns seq [0.4714045208; 0.2020305089; 0.1285648693] /// /// let inline getCvOfReplicates rep (data:seq<'a>) = @@ -1117,49 +1275,41 @@ module Seq = - - - - - - - - - - - - - - - - - - - - - // ######################################################################## /// A module which implements helper functions to provide special statistical measures module UtilityFunctions = /// - /// Computes sum of squares + /// Computes the sum of squares. /// - /// - /// seq of float + /// The observed values. + /// The expected values. + /// The sum of squares. /// Returns NaN if data is empty or if any entry is NaN. - /// sum of squares + /// + /// + /// let observed = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let expected = [2.0; 3.0; 4.0; 5.0; 6.0] + /// let ss = Seq.UtilityFunctions.sumOfSquares observed expected // returns 5.0 + /// + /// let sumOfSquares (xData:seq) (exData:seq) = let xX = Seq.zip xData exData Seq.fold (fun acc (x,ex) -> acc + square (x - ex)) 0. xX - /// - /// Computes the pooled variance of the given values + /// Computes the pooled variance of the given values. /// - /// - /// The number of samples - /// The population variances for each samples. + /// The number of samples for each group. + /// The population variances for each group. + /// The pooled variance. + /// + /// + /// let sizes = [10; 20; 15] + /// let variances = [2.5; 3.2; 1.8] + /// let pooledVar = Seq.UtilityFunctions.pooledVarOf sizes variances // returns 2.411111 + /// + /// > let pooledVarOf (sizes:seq) (variances:seq) = let var,n = @@ -1169,10 +1319,19 @@ module Seq = (varAcc + var,nAcc + n)) (0., 0.) var / n - /// - /// Computes the pooled variance of the given values + /// Computes the pooled variance of the given values. /// + /// A sequence of sequences representing the data groups. + /// The pooled variance. + /// + /// + /// let group1 = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let group2 = [2.0; 4.0; 6.0; 8.0; 10.0] + /// let group3 = [3.0; 6.0; 9.0; 12.0; 15.0] + /// let pooledVar = Seq.UtilityFunctions.pooledVar [group1; group2; group3] // returns 7.466667 + /// + /// let pooledVar (data:seq<#seq>) = let sizes = data |> Seq.map Seq.length @@ -1184,11 +1343,18 @@ module Seq = var / n /// - /// Computes the pooled population variance of the given values (Bessel's correction by N-1) + /// Computes the pooled population variance of the given values (Bessel's correction by N-1). /// - /// - /// The number of samples - /// The population variances for each samples. + /// The number of samples for each group. + /// The population variances for each group. + /// The pooled population variance. + /// + /// + /// let sizes = [10; 20; 15] + /// let variances = [2.5; 3.2; 1.8] + /// let pooledVarPop = Seq.UtilityFunctions.pooledVarPopulationOf sizes variances // returns 2.583333 + /// + /// > let pooledVarPopulationOf (sizes:seq) (variances:seq) = let var,n = @@ -1200,8 +1366,18 @@ module Seq = /// - /// Computes the pooled population variance of the given values (Bessel's correction by N-1) + /// Computes the pooled population variance of the given values (Bessel's correction by N-1). /// + /// A sequence of sequences representing the data groups. + /// The pooled population variance. + /// + /// + /// let group1 = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let group2 = [2.0; 4.0; 6.0; 8.0; 10.0] + /// let group3 = [3.0; 6.0; 9.0; 12.0; 15.0] + /// let pooledVarPop = Seq.UtilityFunctions.pooledVarPopulation [group1; group2; group3] // returns 9.333333 + /// + /// let pooledVarPopulation (data:seq<#seq>) = let sizes = data |> Seq.map Seq.length @@ -1214,61 +1390,77 @@ module Seq = /// - /// Computes the pooled standard deviation of the given values + /// Computes the pooled standard deviation of the given values. /// - /// - /// The number of samples - /// The population variances for each samples. + /// The number of samples for each group. + /// The population variances for each group. + /// The pooled standard deviation. + /// + /// + /// let sizes = [10; 20; 15] + /// let variances = [2.5; 3.2; 1.8] + /// let pooledStDev =Seq. UtilityFunctions.pooledStDevOf sizes variances // returns 1.552775 + /// + /// let pooledStDevOf (sizes:seq) (variances:seq) = sqrt (pooledVarOf sizes variances) /// - /// Computes the pooled standard deviation of the given values. - /// + /// Computes the pooled standard deviation of the given values. + /// + /// A sequence of sequences representing the data groups. + /// The pooled standard deviation. + /// + /// + /// let group1 = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let group2 = [2.0; 4.0; 6.0; 8.0; 10.0] + /// let group3 = [3.0; 6.0; 9.0; 12.0; 15.0] + /// let pooledStDev = Seq.UtilityFunctions.pooledStDev [group1; group2; group3] // returns 2.732520 + /// + /// let pooledStDev (data:seq<#seq>) = sqrt (pooledVar data) - /// - /// Computes the pooled population standard deviation of the given values (Bessel's correction by N-1) + /// Computes the pooled population standard deviation of the given values (Bessel's correction by N-1). /// - /// - /// The number of samples - /// The population variances for each samples. + /// The number of samples for each group. + /// The population variances for each group. + /// The pooled population standard deviation. + /// + /// + /// let sizes = [10; 20; 15] + /// let variances = [2.5; 3.2; 1.8] + /// let pooledStDevPop = Seq.UtilityFunctions.pooledStDevPopulationOf sizes variances // returns 1.607275 + /// + /// let pooledStDevPopulationOf (sizes:seq) (variances:seq) = sqrt (pooledVarPopulationOf sizes variances) - /// - /// Computes the pooled population standard deviation of the given values (Bessel's correction by N-1) - /// - let pooledStDevPopulation (data:seq<#seq>) = - sqrt (pooledVarPopulation data) - - /// Converts the input sequence to an array if it not already is an array. - /// - /// - /// - /// + /// Computes the pooled population standard deviation of the given values (Bessel's correction by N-1). + /// + /// A sequence of sequences representing the data groups. + /// The pooled population standard deviation. /// /// + /// let group1 = [1.0; 2.0; 3.0; 4.0; 5.0] + /// let group2 = [2.0; 4.0; 6.0; 8.0; 10.0] + /// let group3 = [3.0; 6.0; 9.0; 12.0; 15.0] + /// let pooledStDevPop = Seq.UtilityFunctions.pooledStDevPopulation [group1; group2; group3] // returns 3.055050 /// - /// + /// + let pooledStDevPopulation (data:seq<#seq>) = + sqrt (pooledVarPopulation data) + + /// Converts the input sequence to an array if it is not already an array. let inline internal toArrayQuick (xs: seq<'T>) = match xs with | :? ('T[]) as arr -> arr | _ -> Seq.toArray xs - /// Like toArrayQuick but if the input sequence is an array already, it is copied to a new one to not interfere with inplace operations - /// - /// - /// - /// - /// - /// - /// - /// + /// Converts the input sequence to an array if it is not already an array. If the input sequence is already an array, it is copied to a new array. let inline internal toArrayCopyQuick (xs: seq<'T>) = match xs with | :? ('T[]) as arr -> Array.copy arr @@ -1277,14 +1469,20 @@ module Seq = [] module SeqExtension = type Seq() = - + /// - /// Creates an seq float with values between a given interval + /// Creates a sequence of floats with values between a given interval. /// - /// start value (is included) - /// end value (by default is included ) - /// sets the number of elements in the seq. If not set, stepsize = 1. - /// If false, the seq does not contain the stop value + /// The start value (inclusive). + /// The end value (by default inclusive). + /// The number of elements in the sequence. If not set, stepsize = 1. + /// If false, the sequence does not contain the stop value. + /// A sequence of floats between the specified interval. + /// + /// + /// let values = Seq.linspace(0.0, 1.0, 5) // returns seq [0.0; 0.25; 0.5; 0.75; 1.0] + /// + /// > static member inline linspace(start:float,stop:float,num:int,?IncludeEndpoint:bool) : seq = let includeEndpoint = defaultArg IncludeEndpoint true @@ -1298,12 +1496,19 @@ module SeqExtension = /// - /// Creates a geometric seq float with values between a given interval + /// Creates a geometric sequence of floats with values between a given interval. /// - /// start value (is included) - /// end value (by default is included) - /// sets the number of elements in the seq. Defaults to 50. - /// If false, the seq does not contain the stop value. Defaults to true. + /// The start value (inclusive). + /// The end value (by default inclusive). + /// The number of elements in the sequence. Defaults to 50. + /// If false, the sequence does not contain the stop value. Defaults to true. + /// A geometric sequence of floats between the specified interval. + /// Thrown when start or stop is less than or equal to zero. + /// + /// + /// let values = Seq.geomspace(1.0, 100.0, 5) // returns seq [1.0; 3.16227766; 10.0; 31.6227766; 100.0] + /// + /// static member inline geomspace (start:float, stop:float, num:int, ?IncludeEndpoint:bool) : seq = if start <= 0. || stop <= 0. then failwith "Geometric space can only take positive values." diff --git a/tests/FSharp.Stats.Tests/Seq.fs b/tests/FSharp.Stats.Tests/Seq.fs index 345dc0c9..8cdb188a 100644 --- a/tests/FSharp.Stats.Tests/Seq.fs +++ b/tests/FSharp.Stats.Tests/Seq.fs @@ -5,18 +5,34 @@ open System open FSharp.Stats open TestExtensions +/// Linear congruential generator +/// Used for generating longer consistent sequences +let lcg (seed: int) = + let m = 2147483647I + let a = 16807I + let c = 0I + (bigint seed) + |> Seq.unfold + (fun state -> + let next = (a * state + c) % m + Some (next, next) + ) + +let seqGenDbl mul len = lcg len |> Seq.map (fun i -> float i / float System.Int32.MaxValue) |> Seq.map (fun x -> x*mul - mul/2.0) |> Seq.take len +let seqGenInts len = lcg len |> Seq.map (fun i -> i) |> Seq.take len +let seqGen len = seqGenDbl 1000.0 len + let testSeqEvenCounts = seq [10000.;-0.1;14.;-10.] let testSeqOddCounts = seq [10000.;-0.1;14.;-10.;5.] let testSeqNan = seq [10000.;-0.1;14.;-10.;5.;Double.NaN] let testSeqInfinity = seq [10000.;-0.1;14.;-10.;Double.PositiveInfinity] let testSeqNegInfinity = seq [10000.;-0.1;14.;-10.;5.;Double.NegativeInfinity] - let testSeqEvenCountsInt = seq [10000;-50;14;-9] let testSeqOddCountsInt = seq [10000;-50;14;-10;5] [] let medianTests = - testList "Seq" [ + testList "Seq.median" [ testCase "medianEvenCounts" <| fun () -> let median = Seq.median testSeqEvenCounts Expect.floatClose Accuracy.high median 6.95 "Median should be 6.95" @@ -32,7 +48,6 @@ let medianTests = testCase "medianNegInf" <| fun () -> let median = Seq.median testSeqNegInfinity Expect.floatClose Accuracy.high median 2.45 "Median should be 2.45" - testCase "testListEvenCountsInt" <| fun () -> let median = Seq.median testSeqEvenCountsInt Expect.equal median 2 "Median should be 2" @@ -41,9 +56,60 @@ let medianTests = Expect.equal median 5 "Median should be 5" ] + +[] +let rangeTests = + testList "Seq.range" [ + testCase "Empty sequence" <| fun () -> + Expect.equal (Seq.range Seq.empty) Interval.Empty "Range of empty sequence should be Empty" + + testCase "One element sequence" <| fun () -> + Expect.equal (Seq.range [42]) (Interval.Closed(42, 42)) "Range of one element sequence should be Closed(x, x)" + + testCase "Two element sequence" <| fun () -> + Expect.equal (Seq.range [1; 2]) (Interval.Closed(1, 2)) "Range of two element sequence should be Closed(min, max)" + + testCase "All same element sequence" <| fun () -> + Expect.equal (Seq.range [5; 5; 5; 5]) (Interval.Closed(5, 5)) "Range of all same element sequence should be Closed(x, x)" + + testCase "All different element sequence" <| fun () -> + Expect.equal (Seq.range [1; 4; 2; 8; 3]) (Interval.Closed(1, 8)) "Range of all different element sequence should be Closed(min, max)" + + // Currently this is undefined and will depend on the order of the sequence + //testCase "Sequence with NaN" <| fun () -> + // Expect.equal (Seq.range [1.0; 2.0; nan; 3.0]) (Interval.Closed(nan, nan)) "Range of sequence with NaN should be Closed(nan, nan)" + + testCase "Sequence with Infinity" <| fun () -> + Expect.equal (Seq.range [1.0; 2.0; infinity]) (Interval.Closed(1.0, infinity)) "Range of sequence with Infinity should be Closed(min, infinity)" + + testCase "Sequence with Negative Infinity" <| fun () -> + Expect.equal (Seq.range [1.0; 2.0; -infinity]) (Interval.Closed(-infinity, 2.0)) "Range of sequence with Negative Infinity should be Closed(-infinity, max)" + + testCase "Sequence with negative values" <| fun () -> + Expect.equal (Seq.range [-1; -4; -2]) (Interval.Closed(-4, -1)) "Range of sequence with negative values should be Closed(min, max)" + + testCase "Sequence with positive values" <| fun () -> + Expect.equal (Seq.range [1; 4; 2]) (Interval.Closed(1, 4)) "Range of sequence with positive values should be Closed(min, max)" + + testCase "Sequence with mixed values" <| fun () -> + Expect.equal (Seq.range [-1; 4; -2; 8]) (Interval.Closed(-2, 8)) "Range of sequence with mixed values should be Closed(min, max)" + + testCase "Sequence with Int32 values" <| fun () -> + Expect.equal (Seq.range [1; 4; 2]) (Interval.Closed(1, 4)) "Range of sequence with Int32 values should be Closed(min, max)" + + testCase "Sequence with Int64 values" <| fun () -> + Expect.equal (Seq.range [1L; 4L; 2L]) (Interval.Closed(1L, 4L)) "Range of sequence with Int64 values should be Closed(min, max)" + + testCase "Sequence with string values" <| fun () -> + Expect.equal (Seq.range ["a"; "c"; "b"]) (Interval.Closed("a", "c")) "Range of sequence with string values should be Closed(min, max)" + + testCase "Sequence with null string values" <| fun () -> + Expect.equal (Seq.range ["a"; null; "b"]) (Interval.Closed(null, "b")) "Range of sequence with null string value should be Closed(null, max)" + ] + [] let meanTests = - testList "Seq" [ + testList "Seq.mean" [ testCase "mean" <| fun () -> let mean = Seq.mean testSeqEvenCounts Expect.floatClose Accuracy.high mean 2500.975 "Mean should be 2500.975" @@ -58,6 +124,135 @@ let meanTests = Expect.isTrue (Double.IsNegativeInfinity mean) "Mean should be nan" ] + +[] +let meanByTests = + testList "Seq.meanBy" [ + testCase "Empty seq" <| fun () -> + Expect.isTrue (Seq.meanBy sin Seq.empty |> Double.IsNaN) "Expected NaN" + + testCase "One element seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [42.0])) -0.916522 "Expected -0.916522" + + testCase "Two element seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [1.0; 2.0])) 0.875384 "Expected 0.875384" + + testCase "All same seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [5.0; 5.0; 5.0])) -0.958924 "Expected -0.958924" + + testCase "All different seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [1.0; 2.0; 3.0])) 0.630629 "Expected 0.630629" + + testCase "Seq with NaN" <| fun () -> + Expect.isTrue (Seq.meanBy sin (seq [1.0; nan; 2.0]) |> Double.IsNaN) "Expected NaN" + + testCase "Seq with Infinity" <| fun () -> + Expect.isTrue (Seq.meanBy sin (seq [1.0; infinity; 2.0]) |> Double.IsNaN) "Expected NaN" + + testCase "Seq with -Infinity" <| fun () -> + Expect.isTrue (Seq.meanBy sin (seq [1.0; -infinity; 2.0]) |> Double.IsNaN) "Expected NaN" + + testCase "Negative seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [-1.0; -2.0; -3.0])) -0.630629 "Expected -0.630629" + + testCase "Positive seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [1.0; 2.0; 3.0])) 0.630629 "Expected 0.630629" + + testCase "Mixed seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy sin (seq [-1.0; 2.0; -3.0])) -0.024431 "Expected -0.024431" + + testCase "Int32 seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy (float >> sin) (seq [1; 2; 3])) 0.630629 "Expected 0.630629" + + testCase "Int64 seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy (float >> sin) (seq [1L; 2L; 3L])) 0.630629 "Expected 0.630629" + + testCase "String seq" <| fun () -> + Expect.floatClose Accuracy.medium (Seq.meanBy (float << String.length) (seq ["hello"; "world"; "!"])) 3.666667 "Expected 3.666667" + ] + +[] +let weightedMeanTests = + testList "Seq.weightedMean" [ + testCase "basic" <| fun () -> + let weights = [0.1; 0.2; 0.3; 0.2; 0.2] + let values = [1.0; 2.0; 3.0; 4.0; 5.0] + let wMean = Seq.weightedMean weights values + Expect.floatClose Accuracy.high wMean 3.2 "Weighted mean should be 3.2" + + testCase "emptySeq" <| fun () -> + let emptyWeights = Seq.empty + let emptyValues = Seq.empty + let wMean = Seq.weightedMean emptyWeights emptyValues + Expect.isTrue (Double.IsNaN(wMean)) "Weighted mean of empty seq should be NaN" + + testCase "oneElement" <| fun () -> + let oneElemWeights = seq [1.0] + let oneElemValues = seq [5.0] + let wMean = Seq.weightedMean oneElemWeights oneElemValues + Expect.floatClose Accuracy.high wMean 5.0 "Weighted mean of one element seq should be the element value" + + testCase "twoElements" <| fun () -> + let twoElemWeights = seq [0.4;0.6] + let twoElemValues = seq [2.0;4.0] + let wMean = Seq.weightedMean twoElemWeights twoElemValues + Expect.floatClose Accuracy.high wMean 3.2 "Weighted mean of [2.0,4.0] with weights [0.4,0.6] should be 3.2" + + testCase "allSameElements" <| fun () -> + let allSameWeights = seq [0.2;0.2;0.2;0.2;0.2] + let allSameValues = seq [3.0;3.0;3.0;3.0;3.0] + let wMean = Seq.weightedMean allSameWeights allSameValues + Expect.floatClose Accuracy.high wMean 3.0 "Weighted mean of all same elements should be the element value" + + testCase "nanValue" <| fun () -> + let nanWeights = seq [0.5;0.5] + let nanValues = seq [1.0;nan] + let wMean = Seq.weightedMean nanWeights nanValues + Expect.isTrue (Double.IsNaN(wMean)) "Weighted mean of seq containing NaN should be NaN" + + testCase "infValue" <| fun () -> + let infWeights = seq [0.5;0.5] + let infValues = seq [1.0;infinity] + let wMean = Seq.weightedMean infWeights infValues + Expect.equal wMean infinity "Weighted mean of seq containing infinity should be infinity" + + testCase "negInfValue" <| fun () -> + let negInfWeights = seq [0.5;0.5] + let negInfValues = seq [1.0;-infinity] + let wMean = Seq.weightedMean negInfWeights negInfValues + Expect.equal wMean -infinity "Weighted mean of seq containing -infinity should be -infinity" + + testCase "negativeValues" <| fun () -> + let negativeWeights = seq [-0.1;-0.2;-0.3;-0.2;-0.2] + let negativeValues = seq [-1.0;-2.0;-3.0;-4.0;-5.0] + let wMean = Seq.weightedMean negativeWeights negativeValues + Expect.floatClose Accuracy.high wMean -3.2 "Weighted mean of negative values should be -3.2" + + testCase "positiveValues" <| fun () -> + let positiveWeights = seq [0.1;0.2;0.3;0.2;0.2] + let positiveValues = seq [1.0;2.0;3.0;4.0;5.0] + let wMean = Seq.weightedMean positiveWeights positiveValues + Expect.floatClose Accuracy.high wMean 3.2 "Weighted mean of positive values should be 3.2" + + testCase "mixedValues" <| fun () -> + let mixedWeights = seq [-0.1;0.2;-0.3;0.2;-0.2] + let mixedValues = seq [-1.0;2.0;-3.0;4.0;-5.0] + let wMean = Seq.weightedMean mixedWeights mixedValues + Expect.floatClose Accuracy.high wMean -16.0 "Weighted mean of mixed values should be -16.0" + + testCase "int32Values" <| fun () -> + let int32Weights = seq [1;2;3;2;2] + let int32Values = seq [1;2;3;4;5] + let wMean = Seq.weightedMean int32Weights int32Values + Expect.equal wMean 3 "Weighted mean of int32 values should be 3" + + testCase "int64Values" <| fun () -> + let int64Weights = seq [1L;2L;3L;2L;2L] + let int64Values = seq [1L;2L;3L;4L;5L] + let wMean = Seq.weightedMean int64Weights int64Values + Expect.equal wMean 3L "Weighted mean of int64 values should be 3L" + ] + [] let meanQuadraticTests = testList "Seq" [ @@ -121,3 +316,667 @@ let geomspaceTests = Expect.throws expected "geomspace cannot be initialized with negative values." ] + +[] +let meanHarmonicTests = + testList "Seq.meanHarmonic" [ + testCase "Empty sequence" <| fun () -> + Expect.isTrue (Seq.meanHarmonic Seq.empty |> System.Double.IsNaN) "Expected NaN for empty sequence" + + testCase "One element sequence" <| fun () -> + Expect.equal (Seq.meanHarmonic [42.0]) 42.0 "Expected 42.0 for one element sequence" + + testCase "Two element sequence" <| fun () -> + Expect.floatClose Accuracy.high (Seq.meanHarmonic [3.0; 6.0]) 4.0 "Expected 4.0 for two element sequence" + + testCase "All same elements sequence" <| fun () -> + Expect.floatClose Accuracy.high (Seq.meanHarmonic [2.5; 2.5; 2.5]) 2.5 "Expected 2.5 for all same elements sequence" + + testCase "All different elements sequence" <| fun () -> + Expect.floatClose Accuracy.high (Seq.meanHarmonic [1.0; 2.0; 3.0; 4.0; 5.0]) 2.18978102189781 "Expected approximately 2.18978 for all different elements sequence" + + testCase "Sequence with NaN" <| fun () -> + Expect.isTrue (Seq.meanHarmonic [1.0; 2.0; nan] |> System.Double.IsNaN) "Expected NaN for sequence with NaN" + + testCase "Sequence with Infinity" <| fun () -> + Expect.equal (Seq.meanHarmonic [1.0; 2.0; infinity]) 2.0 "Expected 2.0 for sequence with Infinity" + + testCase "Sequence with -Infinity" <| fun () -> + Expect.equal (Seq.meanHarmonic [1.0; 2.0; -infinity]) 2.0 "Expected 2.0 for sequence with -Infinity" + + testCase "Sequence with negative values" <| fun () -> + Expect.floatClose Accuracy.high (Seq.meanHarmonic [-1.0; -2.0; -3.0]) -1.6363636363636365 "Expected approximately -1.63636 for sequence with negative values" + + testCase "Sequence with positive values" <| fun () -> + Expect.floatClose Accuracy.high (Seq.meanHarmonic [1.0; 2.0; 3.0]) 1.6363636363636365 "Expected approximately 1.63636 for sequence with positive values" + + testCase "Sequence with mixed values" <| fun () -> + Expect.floatClose Accuracy.high (Seq.meanHarmonic [-1.0; 2.0; -3.0; 4.0]) -6.857142857142857 "Expected approximately -6.85714 for sequence with mixed values" + + testCase "Sequence with Int32 values" <| fun () -> + Expect.equal (Seq.meanHarmonic [1; 2; 3; 4; 5]) 5 "Expected 5 for sequence with Int32 values" + + testCase "Sequence with Int64 values" <| fun () -> + Expect.equal (Seq.meanHarmonic [1L; 2L; 3L; 4L; 5L]) 5L "Expected 5L for sequence with Int64 values" + ] + + +[] +let seqGenTests = + testList "Seq.meanTruncated" [ + testCase "Empty sequence" <| fun () -> + let xs = Seq.empty + let result = Seq.meanTruncated 0.1 xs + Expect.isTrue (Double.IsNaN result) "Expected NaN for empty sequence" + + testCase "Single element" <| fun () -> + let xs = seqGen 1 + Expect.floatClose Accuracy.high (Seq.meanTruncated 0.1 xs) (Seq.head xs) "Expected mean to equal single element" + + testCase "All same value" <| fun () -> + let xs = Seq.replicate 100 5.0 + Expect.floatClose Accuracy.high 5.0 (Seq.meanTruncated 0.1 xs) "Expected 5.0 for all same value" + + testCase "Random floats length 10 trunc 0.1" <| fun () -> + let xs = seqGen 10 + Expect.floatClose Accuracy.high (Seq.meanTruncated 0.1 xs) -52.347631218073715331 "Expected mean of -52.347631218073715331" + + testCase "Random floats length 100 trunc 0.2" <| fun () -> + let xs = seqGen 100 + Expect.floatClose Accuracy.high (Seq.meanTruncated 0.2 xs) 9.5124633561411808813 "Expected mean of 9.5124633561411808813" + + testCase "Random floats length 1000 trunc 0.05" <| fun () -> + let xs = seqGen 1000 + Expect.floatClose Accuracy.high (Seq.meanTruncated 0.05 xs) -6.0286203235934587852 "Expected mean of -6.0286203235934587852" + + testCase "Sequence with NaN" <| fun () -> + let xs = seq [1.0; 2.0; Double.NaN; 3.0; 4.0] + let result = Seq.meanTruncated 0.1 xs + Expect.isTrue (Double.IsNaN result) "Expected NaN when sequence contains NaN" + ] + + +[] +let varTests = + testList "Seq.var" [ + testCase "varEmpty" <| fun () -> + let variance = Seq.var Seq.empty + Expect.isTrue (nan.Equals(variance)) "Variance of empty seq should be NaN" + + testCase "varSingleValue" <| fun () -> + let variance = Seq.var [5.] + Expect.isTrue (nan.Equals(variance)) "Variance of single value should be NaN" + + testCase "varSameValues" <| fun () -> + let variance = Seq.var [2.;2.;2.;2.] + Expect.floatClose Accuracy.high variance 0. "Variance of same values should be 0.0" + + testCase "varShortSeq" <| fun () -> + let variance = Seq.var [1.;2.;3.;4.;5.] + Expect.floatClose Accuracy.high variance 2.5 "Variance of short seq [1.;2.;3.;4.;5.] should be 2.5" + + testCase "varNaN" <| fun () -> + let variance = Seq.var [1.;2.;3.;nan] + Expect.isTrue (nan.Equals(variance)) "Variance of seq containing NaN should be NaN" + + testCase "varInfinity" <| fun () -> + let variance = Seq.var [1.;2.;infinity] + Expect.isTrue (nan.Equals(variance)) "Variance of seq containing infinity should be NaN" + + testCase "varNegInfinity" <| fun () -> + let variance = Seq.var [1.;2.;-infinity] + Expect.isTrue (nan.Equals(variance)) "Variance of seq containing -infinity should be NaN" + + testCase "varSeqGen10" <| fun () -> + let variance = Seq.var (seqGen 10) + Expect.floatClose Accuracy.high variance 63886.22 "Variance of seqGen 10 should be around 63886.22" + + testCase "varSeqGen100" <| fun () -> + let variance = Seq.var (seqGen 100) + Expect.floatClose Accuracy.high variance 84091.74 "Variance of seqGen 100 should be around 84091.74" + + testCase "varSeqGen1000" <| fun () -> + let variance = Seq.var (seqGen 1000) + Expect.floatClose Accuracy.high variance 82020.82 "Variance of seqGen 1000 should be around 82020.82" + ] + + + +[] +let varPopulationTests = + testList "Seq.varPopulation" [ + testCase "varPopulationEmpty" <| fun () -> + let variance = Seq.varPopulation Seq.empty + Expect.isTrue (Double.IsNaN variance) "Variance of empty sequence should be NaN" + + testCase "varPopulationAllSame" <| fun () -> + let variance = Seq.varPopulation (List.replicate 100 5.0) + Expect.floatClose Accuracy.high variance 0.0 "Variance of sequence with all same values should be 0.0" + + testCase "varPopulationWithNaN" <| fun () -> + let variance = Seq.varPopulation [1.0; 2.0; 3.0; nan; 4.0; 5.0] + Expect.isTrue (Double.IsNaN variance) "Variance of sequence containing NaN should be NaN" + + testCase "varPopulationWithInfinity" <| fun () -> + let variance = Seq.varPopulation [1.0; 2.0; 3.0; infinity; 4.0; 5.0] + Expect.isTrue (Double.IsNaN variance) "Variance of sequence containing infinity should be NaN" + + testCase "varPopulationWithNegInfinity" <| fun () -> + let variance = Seq.varPopulation [1.0; 2.0; 3.0; -infinity; 4.0; 5.0] + Expect.isTrue (Double.IsNaN variance) "Variance of sequence containing negative infinity should be NaN" + + testCase "varPopulationSeq5" <| fun () -> + let variance = Seq.varPopulation (seqGen 5) + Expect.floatClose Accuracy.high variance 83883.29 "Variance of seqGen 5" + + testCase "varPopulationSeq10" <| fun () -> + let variance = Seq.varPopulation (seqGen 10) + Expect.floatClose Accuracy.high variance 57497.59 "Variance of seqGen 10" + + testCase "varPopulationSeq100" <| fun () -> + let variance = Seq.varPopulation (seqGen 100) + Expect.floatClose Accuracy.high variance 83250.82 "Variance of seqGen 100" + ] + +[] +let stDevTests = + testList "Seq.stDev" [ + testCase "stDevEmpty" <| fun () -> + let stDev = Seq.stDev (Seq.empty:seq) + Expect.isTrue (Double.IsNaN(stDev)) "stDev of empty seq should be NaN" + + testCase "stDevSingleValue" <| fun () -> + let stDev = Seq.stDev ([5.0]:seq) + Expect.isTrue (Double.IsNaN(stDev)) "stDev of single value should be NaN" + + testCase "stDevAllSameValue" <| fun () -> + let stDev = Seq.stDev ([10.0; 10.0; 10.0; 10.0; 10.0]:seq) + Expect.floatClose Accuracy.high stDev 0.0 "stDev of all same values should be 0.0" + + testCase "stDevShortSeq" <| fun () -> + let stDev = Seq.stDev ([1.0; 2.0; 3.0; 4.0; 5.0]:seq) + Expect.floatClose Accuracy.high stDev 1.58113883 "stDev of [1.0; 2.0; 3.0; 4.0; 5.0] should be about 1.58113883" + + testCase "stDevWithNegatives" <| fun () -> + let stDev = Seq.stDev ([1.0; -2.0; 3.0; -4.0; 5.0]:seq) + Expect.floatClose Accuracy.high stDev 3.64691651 "stDev of [1.0; -2.0; 3.0; -4.0; 5.0] should be about 3.64691651" + + testCase "stDevLargeSeq" <| fun () -> + let stDev = Seq.stDev (seqGen 1000) + Expect.floatClose Accuracy.veryHigh stDev 286.39276524 "stDev of seqGen 1000 should be about 286.39276524" + ] + + +[] +let stDevPopulationTests = + testList "Seq.stDevPopulation" [ + testCase "stDevPopulationEmpty" <| fun () -> + let stdev = Seq.stDevPopulation Seq.empty + Expect.isTrue (Double.IsNaN(stdev)) "stdev of empty seq should be NaN" + + testCase "stDevPopulationAllSame" <| fun () -> + let stdev = Seq.stDevPopulation (Seq.replicate 100 42.0) + Expect.floatClose Accuracy.high stdev 0.0 "stdev of all same values should be 0.0" + + testCase "stDevPopulationSeqGen5" <| fun () -> + let stdev = Seq.stDevPopulation (seqGen 5) + Expect.floatClose Accuracy.medium stdev 289.62612676671483314 "stdev of seqGen 5 should be around 289.62612676671483314" + + testCase "stDevPopulationWithNaN" <| fun () -> + let stdev = Seq.stDevPopulation [1.0; 2.0; 3.0; nan] + Expect.isTrue (Double.IsNaN(stdev)) "stdev of seq with NaN should be NaN" + + testCase "stDevPopulationWithInfinity" <| fun () -> + let stdev = Seq.stDevPopulation [1.0; 2.0; 3.0; infinity] + Expect.isTrue (Double.IsNaN(stdev)) "stdev of seq with infinity should be NaN" + + testCase "stDevPopulationWithNegativeInfinity" <| fun () -> + let stdev = Seq.stDevPopulation [1.0; 2.0; 3.0; -infinity] + Expect.isTrue (Double.IsNaN(stdev)) "stdev of seq with negative infinity should be NaN" + ] + + +[] +let semTests = + testList "Seq.sem" [ + testCase "semEmpty" <| fun () -> + let sem = Seq.sem ([]:double list) + Expect.isTrue (nan.Equals(sem)) "SEM of empty sequence should be NaN" + testCase "semSingleValue" <| fun () -> + let sem = Seq.sem [42.0] + Expect.isTrue (nan.Equals(sem)) "SEM of single value should be NaN" + testCase "semAllSameValue" <| fun () -> + let sem = Seq.sem [42.0; 42.0; 42.0; 42.0; 42.0] + Expect.floatClose Accuracy.high sem 0.0 "SEM of all same values should be 0.0" + testCase "semShortSeq" <| fun () -> + let sem = Seq.sem [1.0; 2.0; 3.0; 4.0; 5.0] + Expect.floatClose Accuracy.high sem 0.70710678118654757274 "SEM of short sequence" + testCase "semLongSeq" <| fun () -> + let sem = Seq.sem (seqGen 1000) + Expect.floatClose Accuracy.high sem 9.0565344355779000551 "SEM of long sequence" + testCase "semNaN" <| fun () -> + let sem = Seq.sem [1.0; 2.0; 3.0; nan; 5.0] + Expect.isTrue (nan.Equals(sem)) "SEM of sequence with NaN should be NaN" + testCase "semInfinity" <| fun () -> + let sem = Seq.sem [1.0; 2.0; 3.0; infinity; 5.0] + Expect.isTrue (nan.Equals(sem)) "SEM of sequence with infinity should be NaN" + testCase "semNegInfinity" <| fun () -> + let sem = Seq.sem [1.0; 2.0; 3.0; -infinity; 5.0] + Expect.isTrue (nan.Equals(sem)) "SEM of sequence with negative infinity should be NaN" + ] + + +[] +let cvTests = + testList "Seq.cv" [ + testCase "cvEmpty" <| fun () -> + let cv = Seq.cv Seq.empty + Expect.isTrue (Double.IsNaN cv) "CV of empty sequence should be NaN" + + testCase "cvAllSame" <| fun () -> + let cv = Seq.cv (List.replicate 100 5.0) + Expect.floatClose Accuracy.high cv 0.0 "CV of all same values should be 0.0" + + testCase "cvShortSeq" <| fun () -> + let cv = Seq.cv [1.0; 2.0; 3.0; 4.0; 5.0] + Expect.floatClose Accuracy.medium cv 0.52705 "CV of short sequence" + + testCase "cvLongSeq" <| fun () -> + let cv = Seq.cv (seqGen 1000) + Expect.floatClose Accuracy.medium cv -50.953708636964790912 "CV of long sequence" + + testCase "cvNaN" <| fun () -> + let cv = Seq.cv [1.0; 2.0; 3.0; nan; 5.0] + Expect.isTrue (Double.IsNaN cv) "CV of sequence with NaN should be NaN" + + testCase "cvInf" <| fun () -> + let cv = Seq.cv [1.0; 2.0; 3.0; infinity; 5.0] + Expect.isTrue (Double.IsNaN cv) "CV of sequence with Infinity should be NaN" + + testCase "cvNegInf" <| fun () -> + let cv = Seq.cv [1.0; 2.0; 3.0; -infinity; 5.0] + Expect.isTrue (Double.IsNaN cv) "CV of sequence with -Infinity should be NaN" + ] + + +[] +let cvPopulationTests = + testList "Seq.cvPopulation" [ + testCase "cvPopulationTypical" <| fun () -> + let cv = Seq.cvPopulation (seqGen 100) + Expect.floatClose Accuracy.medium cv 116.8527 "CV should be approximately 116.8527" + + testCase "cvPopulationEmpty" <| fun () -> + let cv = Seq.cvPopulation Seq.empty + Expect.isTrue (Double.IsNaN cv) "CV of empty sequence should be NaN" + + testCase "cvPopulationSingleValue" <| fun () -> + let cv = Seq.cvPopulation (Seq.replicate 10 5.0) + Expect.floatClose Accuracy.high cv 0.0 "CV of sequence with all same values should be 0.0" + + testCase "cvPopulationWithNaN" <| fun () -> + let cv = Seq.cvPopulation [1.0; 2.0; 3.0; nan] + Expect.isTrue (Double.IsNaN cv) "CV of sequence containing NaN should be NaN" + + testCase "cvPopulationWithInfinity" <| fun () -> + let cv = Seq.cvPopulation [1.0; 2.0; 3.0; infinity] + Expect.isTrue (Double.IsNaN cv) "CV of sequence containing infinity should be NaN" + + testCase "cvPopulationWithNegativeInfinity" <| fun () -> + let cv = Seq.cvPopulation [1.0; 2.0; 3.0; -infinity] + Expect.isTrue (Double.IsNaN cv) "CV of sequence containing negative infinity should be NaN" + ] + + + +[] +let covPopulationTests = + testList "Seq.covPopulation" [ + testCase "covPopulationBasic" <| fun () -> + let x = seqGen 5 |> Seq.take 5 + let y = seqGen 10 |> Seq.take 5 + let cov = Seq.covPopulation x y + Expect.floatClose Accuracy.high cov 34997.222487256090972 "Covariance should be 34997.222487256090972" + + testCase "covPopulationEmpty" <| fun () -> + let x = Seq.empty + let y = Seq.empty + let cov = Seq.covPopulation x y + Expect.isTrue (Double.IsNaN(cov)) "Covariance of empty sequences should be NaN" + + testCase "covPopulationNaN" <| fun () -> + let x = [1.0; 2.0; Double.NaN] + let y = [4.0; 5.0; 6.0] + let cov = Seq.covPopulation x y + Expect.isTrue (Double.IsNaN(cov)) "Covariance should be NaN if any element is NaN" + + testCase "covPopulationSameValue" <| fun () -> + let x = [2.5; 2.5; 2.5] + let y = [8.0; 8.0; 8.0] + let cov = Seq.covPopulation x y + Expect.floatClose Accuracy.high cov 0.0 "Covariance of sequences with same values should be 0" + + testCase "covPopulationInfinity" <| fun () -> + let x = [1.0; 2.0; Double.PositiveInfinity] + let y = [4.0; 5.0; 6.0] + let cov = Seq.covPopulation x y + Expect.isTrue (Double.IsNaN(cov)) "Covariance should be NaN if any element is infinity" + + testCase "covPopulationNegativeInfinity" <| fun () -> + let x = [1.0; 2.0; Double.NegativeInfinity] + let y = [4.0; 5.0; 6.0] + let cov = Seq.covPopulation x y + Expect.isTrue (Double.IsNaN(cov)) "Covariance should be NaN if any element is negative infinity" + + testCase "covPopulationDifferentLengths" <| fun () -> + let x = [1.0; 2.0; 3.0] + let y = [4.0; 5.0] + Expect.throws (fun () -> Seq.covPopulation x y |> ignore) "Sequences of different lengths should throw an exception" + ] + + + + +[] +let covPopulationOfPairsTests = + testList "Seq.covPopulationOfPairs" [ + testCase "covPopulationOfPairsEmpty" <| fun () -> + let cov = Seq.covPopulationOfPairs Seq.empty + Expect.isTrue (Double.IsNaN(cov)) "Covariance of empty sequence should be NaN" + + testCase "covPopulationOfPairsNaN" <| fun () -> + let cov = Seq.covPopulationOfPairs [(1.0, 2.0); (2.0, nan); (3.0, 6.0)] + Expect.isTrue (Double.IsNaN(cov)) "Covariance of sequence with NaN should be NaN" + + testCase "covPopulationOfPairsAllSame" <| fun () -> + let cov = Seq.covPopulationOfPairs (seqGen 100 |> Seq.map (fun x -> (x, x))) + Expect.floatClose Accuracy.high cov 83250.82204 "Covariance of sequence with all same values should be close to 83250.82204" + + testCase "covPopulationOfPairsInfinity" <| fun () -> + let cov = Seq.covPopulationOfPairs [(1.0, 2.0); (infinity, 4.0); (3.0, 6.0)] + Expect.isTrue (Double.IsNaN(cov)) "Covariance of sequence with infinity should be NaN" + + testCase "covPopulationOfPairsNegInfinity" <| fun () -> + let cov = Seq.covPopulationOfPairs [(1.0, 2.0); (-infinity, 4.0); (3.0, 6.0)] + Expect.isTrue (Double.IsNaN(cov)) "Covariance of sequence with negative infinity should be NaN" + + testCase "covPopulationOfPairsLargeSeq" <| fun () -> + let cov = Seq.covPopulationOfPairs (seqGen 100000 |> Seq.map (fun x -> (x, x+1.0))) + Expect.floatClose Accuracy.high cov 83366.21512 "Covariance of large sequence should be close to 83366.21512" + + testCase "covPopulationOfPairsSeq" <| fun () -> + let s1 = seqGen 100 + let s2 = seqGen 200 |> Seq.take 100 + let cov = Seq.covPopulationOfPairs (Seq.zip s1 s2) + Expect.floatClose Accuracy.high cov 40559.822281678054424 "Covariance of large sequence should be close to 40559.822281678054424" + ] + + + +[] +let covTests = + testList "Seq.cov" [ + testCase "covPositiveCorrelation" <| fun () -> + let x = seqGen 100 |> Seq.take 10 + let y = x |> Seq.map (fun x -> 2.0 * x) + let cov = Seq.cov x y + Expect.floatClose Accuracy.high cov 229809.2 "Covariance should be around 229809.2" + + testCase "covNegativeCorrelation" <| fun () -> + let x = seqGen 100 |> Seq.take 10 + let y = x |> Seq.map (fun x -> -2.0 * x) + let cov = Seq.cov x y + Expect.floatClose Accuracy.high cov -229809.2 "Covariance should be around -229809.2" + + testCase "covEmpty" <| fun () -> + let x = Seq.empty + let y = Seq.empty + let cov = Seq.cov x y + Expect.isTrue (nan.Equals(cov)) "Covariance of empty sequences should be NaN" + + testCase "covNaN" <| fun () -> + let x = [1.0; 2.0; 3.0; nan] + let y = [2.0; 4.0; 6.0; 8.0] + let cov = Seq.cov x y + Expect.isTrue (nan.Equals(cov)) "Covariance should be NaN if any element is NaN" + + testCase "covInfinity" <| fun () -> + let x = [1.0; 2.0; 3.0; infinity] + let y = [2.0; 4.0; 6.0; 8.0] + let cov = Seq.cov x y + Expect.isTrue (nan.Equals(cov)) "Covariance should be NaN if any element is infinity" + + testCase "covNegInfinity" <| fun () -> + let x = [1.0; 2.0; 3.0; -infinity] + let y = [2.0; 4.0; 6.0; 8.0] + let cov = Seq.cov x y + Expect.isTrue (nan.Equals(cov)) "Covariance should be NaN if any element is -infinity" + ] + + + +[] +let covOfPairsTests = + testList "Seq.covOfPairs" [ + testCase "covOfPairsEmpty" <| fun () -> + let cov = Seq.covOfPairs Seq.empty + Expect.isTrue (nan.Equals(cov)) "Covariance of empty sequence should be NaN" + + testCase "covOfPairsNaN" <| fun () -> + let cov = Seq.covOfPairs [(1.0,1.0); (2.0,nan); (3.0,3.0)] + Expect.isTrue (nan.Equals(cov)) "Covariance of sequence containing NaN should be NaN" + + testCase "covOfPairsAllSame" <| fun () -> + let cov = Seq.covOfPairs (Seq.init 100 (fun _ -> (5.0, 5.0))) + Expect.equal cov 0.0 "Covariance of sequence with all same values should be 0.0" + + testCase "covOfPairsSeqGen" <| fun () -> + let cov = Seq.covOfPairs (Seq.zip (seqGen 100) (seqGen 100)) + Expect.floatClose Accuracy.high cov 84091.74 "Covariance of seqGen 100 with itself" + + testCase "covOfPairsSeqGenOffset" <| fun () -> + let cov = Seq.covOfPairs (Seq.zip (seqGen 100) (seqGen 100 |> Seq.skip 50)) + Expect.floatClose Accuracy.medium cov 5709.76 "Covariance of offset seqGen sequences" + + testCase "covOfPairsInfinity" <| fun () -> + let cov = Seq.covOfPairs [(1.0,1.0); (2.0,infinity); (3.0,Double.NegativeInfinity)] + Expect.isTrue (nan.Equals(cov)) "Covariance of sequence with infinities should be NaN" + ] + + +[] +let medianAbsoluteDevTests = + testList "Seq.medianAbsoluteDev" [ + testCase "emptySeq" <| fun () -> + let mad = Seq.medianAbsoluteDev Seq.empty + Expect.isTrue (Double.IsNaN mad) "MAD of empty sequence should be NaN" + + testCase "singleValue" <| fun () -> + let mad = Seq.medianAbsoluteDev [42.0] + Expect.equal mad 0.0 "MAD of single value should be 0.0" + + testCase "allSameValue" <| fun () -> + let mad = Seq.medianAbsoluteDev (List.replicate 100 42.0) + Expect.equal mad 0.0 "MAD of all same values should be 0.0" + + testCase "seqWithNaN" <| fun () -> + let mad = Seq.medianAbsoluteDev (seqGen 100 |> Seq.map (fun x -> if x < 0.5 then nan else x)) + Expect.isTrue (Double.IsNaN mad) "MAD of sequence containing NaN should be NaN" + + testCase "seqWithInfinity" <| fun () -> + let mad = Seq.medianAbsoluteDev (seqGen 100 |> Seq.map (fun x -> if x < 0.5 then infinity else x)) + Expect.floatClose Accuracy.high mad 424.181 "MAD of sequence containing infinity" + + testCase "seqWithNegInfinity" <| fun () -> + let mad = Seq.medianAbsoluteDev (seqGen 100 |> Seq.map (fun x -> if x < 0.5 then -infinity else x)) + Expect.floatClose Accuracy.high mad 424.181 "MAD of sequence containing negative infinity" + + testCase "seqWithPosAndNeg" <| fun () -> + let mad = Seq.medianAbsoluteDev (seqGen 100 |> Seq.map (fun x -> if x < 0.5 then -x else x)) + Expect.floatClose Accuracy.medium mad 125.358 "MAD of sequence with pos and neg values" + + testCase "largeSeq" <| fun () -> + let mad = Seq.medianAbsoluteDev (seqGen 10000) + Expect.floatClose Accuracy.medium mad 246.563 "MAD of large sequence" + ] + + + +[] +let statsTests = + testList "Seq.stats" [ + testCase "statsEmpty" <| fun () -> + let stats = Seq.stats (Seq.empty) + Expect.equal stats.N 0 "N should be 0" + Expect.isTrue (Double.IsNaN stats.Mean) "Mean should be NaN" + Expect.isTrue (Double.IsNaN stats.SumOfSquares) "SumOfSquares should be NaN" + Expect.isTrue (Double.IsNaN stats.Min) "Min should be NaN" + Expect.isTrue (Double.IsNaN stats.Max) "Max should be NaN" + + testCase "statsSeqGen10" <| fun () -> + let stats = seqGen 10 |> Seq.stats + Expect.equal stats.N 10 "N should be 9" + Expect.floatClose Accuracy.high stats.Mean -13.979665708718687 "Mean should be -13.979665708718687" + Expect.floatClose Accuracy.high stats.SumOfSquares 362450.2113702808 "SumOfSquares should be 362450.2113702808" + Expect.floatClose Accuracy.high stats.Min -499.92173630740575163 "Min should be -499.92173630740575163" + Expect.floatClose Accuracy.high stats.Max 292.9640583661216624 "Max should be 10.644420177367894" + + testCase "statsSeqGen1000" <| fun () -> + let stats = seqGen 1000 |> Seq.stats + Expect.equal stats.N 1000 "N should be 999" + Expect.floatClose Accuracy.medium stats.Mean -5.133606105015737 "Mean should be -5.133606105015737" + Expect.floatClose Accuracy.medium stats.SumOfSquares 81701824.38921407 "SumOfSquares should be 81701824.38921407" + Expect.floatClose Accuracy.medium stats.Min -498.70270583718212265 "Min should be -498.70270583718212265" + Expect.floatClose Accuracy.medium stats.Max 499.80056798076293489 "Max should be 10.644420177367894" + + testCase "statsAllSame" <| fun () -> + let stats = Seq.init 100 (fun _ -> 42.0) |> Seq.stats + Expect.equal stats.N 100 "N should be 100" + Expect.equal stats.Mean 42.0 "Mean should be 42.0" + Expect.equal stats.SumOfSquares 0.0 "SumOfSquares should be 0.0" + Expect.equal stats.Min 42.0 "Min should be 42.0" + Expect.equal stats.Max 42.0 "Max should be 42.0" + + testCase "statsNaN" <| fun () -> + let stats = Seq.stats [1.0; 2.0; Double.NaN; 3.0] + Expect.equal stats.N 4 "N should be 4" + Expect.isTrue (Double.IsNaN stats.Mean) "Mean should be NaN" + Expect.isTrue (Double.IsNaN stats.SumOfSquares) "SumOfSquares should be NaN" + + testCase "statsInfinity" <| fun () -> + let stats = Seq.stats [1.0; Double.PositiveInfinity; 2.0; Double.NegativeInfinity] + Expect.equal stats.N 4 "N should be 4" + Expect.isTrue (Double.IsNaN stats.Mean) "Mean should be NaN" + Expect.isTrue (Double.IsNaN stats.SumOfSquares) "SumOfSquares should be NaN" + Expect.equal stats.Min Double.NegativeInfinity "Min should be negative infinity" + Expect.equal stats.Max Double.PositiveInfinity "Max should be positive infinity" + ] + + +[] +let getMeanOfReplicatesTests = + testList "Seq.getMeanOfReplicates" [ + testCase "emptySeq" <| fun () -> + let means = Seq.getMeanOfReplicates 2 Seq.empty + Expect.isEmpty means "Means of empty seq should be empty" + + testCase "singleValue" <| fun () -> + let means = Seq.getMeanOfReplicates 2 (Seq.replicate 6 42.0) + Expect.sequenceEqual means [42.0; 42.0; 42.0] "Means should all be 42.0" + + testCase "seqWithNaN" <| fun () -> + let values = [1.0; 2.0; nan; 4.0; 5.0; nan] + let means = Seq.getMeanOfReplicates 2 values |> Seq.toList + Expect.floatClose Accuracy.high means.[0] 1.5 "First mean should be 1.5" + Expect.isTrue (means.[1] |> System.Double.IsNaN) "Second mean should be NaN" + Expect.isTrue (means.[2] |> System.Double.IsNaN) "Third mean should be NaN" + + testCase "seqWithInfinity" <| fun () -> + let values = [1.0; 2.0; infinity; 4.0; -infinity; 6.0] + let means = Seq.getMeanOfReplicates 2 values |> Seq.toList + Expect.floatClose Accuracy.high means.[0] 1.5 "First mean should be 1.5" + Expect.isTrue (means.[1] |> System.Double.IsInfinity) "Second mean should be Infinity" + Expect.isTrue (means.[2] |> System.Double.IsNegativeInfinity) "Third mean should be -Infinity" + + testCase "generatedSeq" <| fun () -> + let values = seqGen 100 |> Seq.truncate 60 + let means = Seq.getMeanOfReplicates 3 values |> Seq.toList + Expect.equal means.Length 20 "Should have 20 means" + Expect.floatClose Accuracy.high means.[0] -261.63544 "First mean" + Expect.floatClose Accuracy.high means.[9] -129.70288 "10th mean" + Expect.floatClose Accuracy.high means.[19] 80.95719 "Last mean" + ] + + + +[] +let getStDevOfReplicatesTests = + testList "Seq.getStDevOfReplicates" [ + testCase "emptySeq" <| fun () -> + let stdevs = Seq.getStDevOfReplicates 2 (Seq.empty : float seq) + Expect.isEmpty stdevs "Empty sequence should return empty" + + testCase "nanSeq" <| fun () -> + let data = Seq.init 10 (fun _ -> nan) + let stdevs = Seq.getStDevOfReplicates 2 data + stdevs |> Seq.iter (fun sd -> Expect.isTrue (nan.Equals(sd)) "Stdev should be NaN") + + testCase "allSameValue" <| fun () -> + let data = Seq.init 10 (fun _ -> 42.0) + let stdevs = Seq.getStDevOfReplicates 2 data + stdevs |> Seq.iter (fun sd -> Expect.floatClose Accuracy.high sd 0.0 "Stdev should be 0.0") + + testCase "seqWithInfinity" <| fun () -> + let data = seqGen 10 |> Seq.map (fun i -> if int i % 2 = 0 then infinity else 1.0) + let stdevs = Seq.getStDevOfReplicates 2 data + stdevs |> Seq.iter (fun sd -> Expect.isTrue (nan.Equals(sd)) "Stdev should be NaN") + + testCase "seqWithNegInfinity" <| fun () -> + let data = seqGen 10 |> Seq.map (fun i -> if int i % 2 = 0 then -infinity else 1.0) + let stdevs = Seq.getStDevOfReplicates 2 data + stdevs |> Seq.iter (fun sd -> Expect.isTrue (nan.Equals(sd)) "Stdev should be NaN") + + testCase "seqLengthNotMultipleOfRep" <| fun () -> + let data = seqGen 10 + Expect.throws (fun () -> Seq.getStDevOfReplicates 3 data |> ignore) "Should throw for length not multiple of rep" + + testCase "typicalValues" <| fun () -> + let data = [1.0; 2.0; 3.0; 4.0; 5.0; 6.0] + let stdevs = Seq.getStDevOfReplicates 2 data + let expected = [0.7071067811865476; 0.7071067811865476; 0.7071067811865476] + Expect.sequenceEqual stdevs expected "Stdevs should match expected" + + testCase "largeSequence" <| fun () -> + let data = seqGen 10000 + let stdevs = Seq.getStDevOfReplicates 100 data + Expect.equal (Seq.length stdevs) 100 "Should have 100 stdev values" + ] + + + +[] +let cvOrReplicatesTests = + testList "Seq.getCvOfReplicates" [ + testCase "emptySeq" <| fun () -> + let cvs = Seq.getCvOfReplicates 2 Seq.empty + Expect.isEmpty cvs "CV of empty seq should be empty" + + testCase "nanSeq" <| fun () -> + let data = [nan; nan; nan; nan] + let cvs = Seq.getCvOfReplicates 2 data |> Seq.take 2 |> List.ofSeq + Expect.isTrue (cvs |> List.forall (fun cv -> Double.IsNaN cv)) "All CVs should be NaN" + + testCase "sameValueSeq" <| fun () -> + let data = [42.0; 42.0; 42.0; 42.0; 42.0; 42.0] + let cvs = Seq.getCvOfReplicates 3 data |> List.ofSeq + Expect.equal cvs [0.0; 0.0] "CVs of same values should be 0.0" + + testCase "randomSeq" <| fun () -> + let data = seqGen 100 + let cvs = Seq.getCvOfReplicates 5 data |> Seq.take 20 |> List.ofSeq + Expect.isTrue (cvs.Length = 20) "Should return 20 CVs" + Expect.floatClose Accuracy.high cvs.[0] -2.6735807467216741173 "CV at index 0" + Expect.floatClose Accuracy.high cvs.[4] 3.3738276249721761424 "CV at index 4" + Expect.floatClose Accuracy.high cvs.[11] 3.0084709580126212103 "CV at index 11" + + ] \ No newline at end of file