Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Generalised Linear Models to FSharpStats #334

Merged
merged 54 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
6288d55
Implement IRLS solver for GLMs
LibraChris Feb 8, 2024
425d12b
Rename variables
LibraChris May 8, 2024
d1c9c41
add qr based GLM
LibraChris May 10, 2024
d637be7
add inital tests for the glm
LibraChris May 10, 2024
d0fc5ee
Update glm QR Solver
LibraChris May 14, 2024
d24955a
Add new Test for GLMs using Gamma Distribution
LibraChris May 14, 2024
ddcf09c
Add tests for the Poisson linker functions
LibraChris May 14, 2024
60c3ec1
Add tests for the Gamma linker functions
LibraChris May 14, 2024
0f0661c
Rename testcases to Reflect their log function
LibraChris May 14, 2024
fe83ba6
Add tests for the LogitLinkFunction
LibraChris May 14, 2024
ab44068
Add tests for the InverseSquaredLinkFunction
LibraChris May 15, 2024
c00980e
Add tests by example for glm IrLS solver
LibraChris May 15, 2024
1e6a524
Add tests for the IdentityLinkFunction
LibraChris May 15, 2024
ac416bf
Add tests groudwork for the BinomialLinkFunction
LibraChris May 15, 2024
8f88c1e
Add tests for the variance of Binominal Family
LibraChris May 16, 2024
148a933
Add tests for the variance of Poisson Family
LibraChris May 16, 2024
a8b5f00
Add tests for the variance of Gaussian/Normal Family
LibraChris May 16, 2024
2cbef3c
Fix test implemetation for familyVarianceFunctions
LibraChris May 16, 2024
a73a07e
Add tests for the variance of Gamma Family
LibraChris May 16, 2024
4d03d46
Add tests for the variance of Inv.Gaussian Family
LibraChris May 16, 2024
4465115
Rename test Cases based on their DistributionFamily
LibraChris May 16, 2024
a6e6568
Fix LogitLinkFunction
LibraChris May 16, 2024
de1fcd7
remove redundant BinomialLinkFunction
LibraChris May 16, 2024
3554a02
Remove redundant LinkFunction
LibraChris May 16, 2024
c1f38f1
Fix InverseSquaredLinkFunction
LibraChris May 16, 2024
2a3b096
Updated Gamma Distribution Variance function
LibraChris May 16, 2024
3f5a349
add Deriv Functions
LibraChris May 18, 2024
2787fbd
add Tests for Link and deriv
LibraChris May 18, 2024
3ee33e3
fix various Linkfunctions
LibraChris May 18, 2024
43cea23
Rework GLM QR Solver
LibraChris May 22, 2024
3e83833
Modify tests
LibraChris May 22, 2024
2816155
Add tests prototype for QR-Stepwise iteration
LibraChris May 22, 2024
c5ced84
Fix QR based solver for GLMs
LibraChris May 22, 2024
5029c3a
Modify Variance tests
LibraChris May 22, 2024
c3dddcb
Update statistics
LibraChris May 28, 2024
a7c5c1b
Update GeneralisedLinearModel.fs
LibraChris May 28, 2024
d8877b7
Update GeneralisedLinearModel.fs
LibraChris May 29, 2024
3cd68a8
Update GeneralisedLinearModel.fs
LibraChris May 30, 2024
253ac91
Rework GLMStatistics
LibraChris May 31, 2024
19cad0f
Remove deprecated GLM.Irls
LibraChris May 31, 2024
1b3336f
Fix minor testing issue
LibraChris May 31, 2024
a1d0ee4
add getFamilyReisualDeviance for more families
LibraChris Jun 2, 2024
37d03e0
Write code comments and documentation
LibraChris Jun 5, 2024
5e9a1b6
add Documentation for GLM Usage
LibraChris Jun 7, 2024
72bfb83
Update formating for documentation
LibraChris Jun 10, 2024
a8a0004
added data for Documentation
LibraChris Jun 10, 2024
f694340
remote tests for binominal family variance
LibraChris Jun 10, 2024
8dcd8ab
Adress changes requested in #344
LibraChris Jun 12, 2024
170519e
Adress changes requested in #334
LibraChris Jun 18, 2024
ba5ae9c
Update xml comments
LibraChris Jun 19, 2024
6c3a235
fix building error
LibraChris Jul 3, 2024
2e80081
Fix Typo
LibraChris Jul 3, 2024
13b3de9
Fix indentations
LibraChris Aug 26, 2024
df24c3f
Updated XML documentation
LibraChris Oct 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 312 additions & 0 deletions docs/GeneralisedLinearModels.fsx

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions docs/data/cheese.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"","Taste","Acetic","H2S","Lactic"
"1",12.3,94,23,0.86
"2",20.9,174,155,1.53
"3",39,214,230,1.57
"4",47.9,317,1801,1.81
"5",5.6,106,45,0.99
"6",25.9,298,2000,1.09
"7",37.3,362,6161,1.29
"8",21.9,436,2881,1.78
"9",18.1,134,47,1.29
"10",21,189,65,1.58
"11",34.9,311,465,1.68
"12",57.2,630,2719,1.9
"13",0.7,88,20,1.06
"14",25.9,188,140,1.3
"15",54.9,469,856,1.52
"16",40.9,581,14589,1.74
"17",15.9,120,50,1.16
"18",6.4,224,110,1.49
"19",18,190,480,1.63
"20",38.9,230,8639,1.99
"21",14,96,141,1.15
"22",15.2,200,185,1.33
"23",32,234,10322,1.44
"24",56.7,349,26876,2.01
"25",16.8,214,39,1.31
"26",11.6,421,25,1.46
"27",26.5,638,1056,1.72
"28",0.7,206,50,1.25
"29",13.4,331,800,1.08
"30",5.5,481,120,1.25
105 changes: 105 additions & 0 deletions src/FSharp.Stats/Algebra/LinearAlgebra.fs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,111 @@ module LinearAlgebra =
// else LinearAlgebraManaged.QR a
LinearAlgebraManaged.QR a

/// <summary>
/// Performs QR decomposition using an alternative algorithm.
/// QR decomposition is a method to decompose a matrix A into two components:
/// Q (an orthogonal matrix) and R (an upper triangular matrix),
/// such that A = Q * R. It is commonly used in solving linear systems,
/// least squares fitting, and eigenvalue problems.
/// </summary>
/// <returns>
/// A tuple containing:
/// <list type="bullet">
/// <item><description>Q: The orthogonal matrix obtained from the decomposition.</description></item>
/// <item><description>R: The upper triangular matrix obtained from the decomposition.</description></item>
/// </list>
/// </returns>
let qrAlternative (A: Matrix<float>) =
let m: int = A.NumRows
let n: int = A.NumCols

let q: Matrix<float> = Matrix.zero m n
let r: Matrix<float> = Matrix.zero n n
let qLengths: Vector<float> = Vector.zeroCreate n

let getVectorLength (v: Vector<float>) = Vector.fold (fun folder i -> folder+(i*i)) 0. v

let setqOfA (n: int) =
let aN: Vector<float> = Matrix.getCol A n
let qN =
if n = 0 then
aN
else
Array.init (n) (fun i ->
let denominator = qLengths[i]
let forNominator: Vector<float> = Matrix.getCol q i
let nominator: float = Vector.dot aN forNominator
r.[i, n] <- nominator
(nominator/denominator) * forNominator
)
|> Array.fold (fun folder e -> folder-e ) aN
Matrix.setCol q n qN
qN

for i=0 to n-1 do
let qN = setqOfA i
let qLength = getVectorLength qN
let rValue = sqrt(qLength)
r[i,i] <- rValue
qLengths[i] <- qLength
Comment on lines +264 to +268
Copy link
Member

@bvenn bvenn Jun 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if I get whats happening here:

  1. qLengths is a zero-vector that gets filled iteratively for each i in line 262
  2. in line 256 setqOfA is called with the current i. However in l 247 qLength is called at position i which always should be 0, because it was initialized with 0. Just after the determination of qLength the value at this specific index is replaced in line 262.

Obviously I miss a step, but is there a possibility to create the qLength by Vector.init (...) rather than keeping it mutable and accessing/mutating it at multiple positions? If not, I'm happy to merge it, but I got confused during my attempt to understand whats happening here..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you additionally set into -brackets a reference to this implementation and a short description what the benefit, drawback is when using this implementation over others.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not have a direct reference since I had a look at the mathematical explanation for QR via Gram Schmidt and tried implementing it myself. The only real upside using this method instead of the in FSharp.Stats established one is the difference in the output Dimensions in R. An actual reference would be the numpy implantation of the reduced qr https://numpy.org/doc/stable/reference/generated/numpy.linalg.qr.html .

I also think that this implemtation needs to be optimised in the future to perfect the GLM.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if I get whats happening here:

  1. qLengths is a zero-vector that gets filled iteratively for each i in line 262
  2. in line 256 setqOfA is called with the current i. However in l 247 qLength is called at position i which always should be 0, because it was initialized with 0. Just after the determination of qLength the value at this specific index is replaced in line 262.

Obviously I miss a step, but is there a possibility to create the qLength by Vector.init (...) rather than keeping it mutable and accessing/mutating it at multiple positions? If not, I'm happy to merge it, but I got confused during my attempt to understand whats happening here..

The mutation was done to simplify the function to match the mathematical explanation better. This is one of the things that could be updated when I get to it.


for i=0 to n-1 do
let qN: Vector<float> = Matrix.getCol q i
let updateQ = (1./sqrt( qLengths[i] )) * qN
Matrix.setCol q i updateQ
for j=i+1 to n-1 do
let denominator = r[i, i]
let nominator = r[i, j]
r[i, j] <- (nominator/denominator)

q, r

/// <summary>
/// Solves a linear system of equations using QR decomposition.
/// </summary>
/// <param name="A">The coefficient matrix of the linear system.</param>
/// <param name="t">The target vector of the linear system.</param>
/// <returns>
/// A tuple containing:
/// <list type="bullet">
/// <item><description>mX: The solution vector of the linear system.</description></item>
/// <item><description>r: The upper triangular matrix obtained from QR decomposition.</description></item>
/// </list>
/// </returns>
let solveLinearQR (A: Matrix<float>) (t: Vector<float>) =
let m = A.NumRows
let n = A.NumCols

System.Diagnostics.Debug.Assert(m >= n)

let q,r = qrAlternative A

let QT = q.Transpose

let mX = Vector.zeroCreate n

let c: Vector<float> = QT * t

let rec build_mX_inner cross_prod i j =
if j=n then
cross_prod
else
let newCrossprod = cross_prod + (r[i, j] * mX[j])
build_mX_inner newCrossprod i (j+1)

let rec build_mX_outer i =
if i<0 then
()
else
let crossProd = build_mX_inner 0. i (i+1)
mX[i] <- (c[i] - crossProd) / r[i, i]
build_mX_outer (i-1)

build_mX_outer (n-1)

mX,r


///Returns the full Singular Value Decomposition of the input MxN matrix
///
///A : A = U * SIGMA * V**T in the tuple (S, U, V**T),
Expand Down
9 changes: 7 additions & 2 deletions src/FSharp.Stats/FSharp.Stats.fsproj
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
<Compile Include="Fitting\LogisticRegression.fs" />
<Compile Include="Fitting\QuantileNormalization.fs" />
<Compile Include="Fitting\Spline.fs" />
<Compile Include="Fitting\GeneralisedLinearModel.fs" />
<!-- ML -->
<Compile Include="ML\SurprisalAnalysis.fs" />
<Compile Include="ML\SimilarityMetrics.fs" />
Expand All @@ -170,7 +171,8 @@
<None Include="Playground.fsx" />
</ItemGroup>
<ItemGroup>
<Content Include="../../lib/*.dll" PackagePath="netlib_LAPACK"></Content>
<Content Include="../../lib/*.dll" PackagePath="netlib_LAPACK">
</Content>
</ItemGroup>
<ItemGroup>
<PackageReference Include="FSharpAux.Core" Version="2.0.0" />
Expand All @@ -182,4 +184,7 @@
<ItemGroup>
<None Include="../../README.md" Pack="true" PackagePath="\" />
</ItemGroup>
</Project>
<ItemGroup>
<InternalsVisibleTo Include="FSharp.Stats.Tests" />
</ItemGroup>
</Project>
Loading
Loading