From 7c27e4e88532168194321263d6b224201f47d460 Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Wed, 24 Jul 2024 14:09:47 -0400
Subject: [PATCH 1/7] added initial commit of projection

---
 R/project_onto_U.R | 79 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 R/project_onto_U.R

diff --git a/R/project_onto_U.R b/R/project_onto_U.R
new file mode 100644
index 0000000..b218e69
--- /dev/null
+++ b/R/project_onto_U.R
@@ -0,0 +1,79 @@
+project_onto_U <- function(
+    fit0, 
+    new_Y, 
+    new_Z, 
+    new_fixed_B
+) {
+  
+  n <- nrow(new_Y)
+  m <- ncol(new_Y)
+  K <- ncol(fit0$U)
+  
+  new_V <- matrix(
+    data = rnorm(m * K, sd = 1e-3),
+    nrow = m,
+    ncol = K
+  )
+  
+  # here I need to integrate the new fixed b with the old fit
+  # add new size factors
+  new_B <- matrix(log(Matrix::colMeans(new_Y)))
+  colnames(new_B) <- "col_size_factor"
+  n_b <- 1
+  
+  LL <- t(cbind(fit0$U %*% diag(sqrt(fit0$d)),fit0$X,fit0$W))
+  FF <- t(cbind(new_V %*% diag(sqrt(fit0$d)),new_B,new_Z))
+  
+  update_indices_f <- 1:K
+  
+  FFnew <- matrix(FF,nrow(FF),m)
+  # Now, project onto U...
+  update_factors_faster_parallel(
+    L_T = t(LL),
+    FF = FFnew,
+    M = as.matrix(LL[update_indices_f,,drop = FALSE] %*% new_Y),
+    update_indices = update_indices_f - 1,
+    num_iter = 1000,
+    line_search = TRUE,
+    alpha = .01,
+    beta = .25
+  )
+  
+  FF <- FFnew
+  
+  # now, I need to orthonormalize the entire fit
+  fit <- list()
+  
+  fit$U <- t(LL[1:K,, drop = FALSE])
+  fit$V <- t(FF[1:K,, drop = FALSE])
+  
+  # I need to add back the dimnames here
+  fit$V <- rbind(fit$V, fit0$V %*% diag(sqrt(fit0$d)))
+  
+  fit <- orthonormalize_fit(fit)
+  fit$W <- fit0$W
+  fit$X <- fit0$X
+  fit$Z <- rbind(new_Z, fit0$Z)
+  fit$B <- rbind(new_B, fit0$B)
+  
+  rownames(fit$U) <- rownames(fit0$U)
+  colnames(fit$U) <- colnames(fit0$U)
+  
+  rownames(fit$V) <- c(colnames(new_Y), rownames(fit0$V))
+  colnames(fit$V) <- colnames(fit0$V)
+  
+  rownames(fit$W) <- rownames(fit0$W)
+  colnames(fit$W) <- colnames(fit0$W)
+  
+  rownames(fit$Z) <- c(colnames(new_Y), rownames(fit0$Z))
+  colnames(fit$Z) <- colnames(fit0$Z)
+  
+  rownames(fit$B) <- c(colnames(new_Y), rownames(fit0$B))
+  colnames(fit$B) <- colnames(fit0$B)
+  
+  rownames(fit$X) <- rownames(fit0$X)
+  colnames(fit$X) <- colnames(fit0$X)
+  
+  return(fit)
+  
+}
\ No newline at end of file

From e611ccc507676dc5c81e860113809713b31435f6 Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Mon, 29 Jul 2024 12:12:20 -0400
Subject: [PATCH 2/7] added test code

---
 R/fit.R                               | 105 ++++++++++++++++++++++++--
 inst/scratch/test_projection_method.R |  39 ++++++++++
 man/fit_glmpca_pois.Rd                |   9 +++
 3 files changed, 148 insertions(+), 5 deletions(-)
 create mode 100644 inst/scratch/test_projection_method.R

diff --git a/R/fit.R b/R/fit.R
index 5747945..a6cbedc 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -71,6 +71,15 @@
 #'   control argument for \code{\link[daarem]{daarem}}. This setting
 #'   determines to what extent the monotonicity condition can be
 #'   violated.}
+#'   
+#' \item{\code{training_frac}}{Fraction of the columns of input data \code{Y}
+#'   to fit initial model on. If set to \code{1} (default), the model is fit
+#'   by optimizing the parameters on the entire dataset. If set between \code{0}
+#'   and \code{1}, the model is optimized by first fitting a model on a randomly
+#'   selected fraction of the columns of \code{Y}, and then projecting the 
+#'   remaining columns of \code{Y} onto the solution. Setting this to a smaller
+#'   value will increase speed but decrease accuracy.
+#' }
 #'
 #' \item{\code{num_ccd_iter}}{Number of co-ordinate descent updates to
 #'   be made to parameters at each iteration of the algorithm.}
@@ -196,7 +205,7 @@ fit_glmpca_pois <- function(
   # Check and process input argument "control".
   control <- modifyList(fit_glmpca_pois_control_default(),
                         control,keep.null = TRUE)
-
+  
   # Set up the internal fit.
   D <- sqrt(fit0$d)
   if (K == 1)
@@ -205,7 +214,7 @@ fit_glmpca_pois <- function(
     D <- diag(D)
   LL <- t(cbind(fit0$U %*% D,fit0$X,fit0$W))
   FF <- t(cbind(fit0$V %*% D,fit0$B,fit0$Z))
-
+  
   # Determine which rows of LL and FF are "clamped".
   fixed_l <- numeric(0)
   fixed_f <- numeric(0)
@@ -217,9 +226,91 @@ fit_glmpca_pois <- function(
   fixed_f <- c(fixed_f,K + fit0$fixed_b_cols)
   if (nz > 0)
     fixed_f <- c(fixed_f,K + nx + seq(1,nz))
-
-  # Perform the updates.
-  res <- fit_glmpca_pois_main_loop(LL,FF,Y,fixed_l,fixed_f,verbose,control)
+  
+  if (control$training_frac == 1) {
+    
+    # Perform the updates.
+    res <- fit_glmpca_pois_main_loop(LL,FF,Y,fixed_l,fixed_f,verbose,control)
+    
+  } else {
+    
+    if (control$training_frac <= 0 || control$training_frac > 1)
+      stop("control argument \"training_frac\" should be between 0 and 1")
+    
+    train_idx <- sample(
+      1:ncol(Y), 
+      size = ceiling(ncol(Y) * control$training_frac)
+    )
+    
+    Y_train <- Y[, train_idx]
+    FF_train <- FF[, train_idx]
+    FF_test <- FF[, -train_idx]
+    Y_test <- Y[, -train_idx]
+    
+    test_idx <- 1:ncol(Y)
+    test_idx <- test_idx[-train_idx]
+    
+    # Perform the updates.
+    res <- fit_glmpca_pois_main_loop(
+      LL,
+      FF_train,
+      Y_train,
+      fixed_l,
+      fixed_f,
+      verbose,
+      control
+    )
+    
+    update_indices_f <- sort(setdiff(1:K,fixed_f))
+    
+    FF_to_update <- FF_test
+    #browser()
+    
+    # now, I just need to project the results back
+    update_factors_faster_parallel(
+      L_T = t(res$fit$LL),
+      FF = FF_to_update,
+      M = as.matrix(res$fit$LL[update_indices_f,,drop = FALSE] %*% Y_test),
+      update_indices = update_indices_f - 1,
+      num_iter = 1000,
+      line_search = TRUE,
+      alpha = .01,
+      beta = .25
+    )
+    
+    # now, I need to reconstruct FF, and hopefully compute the log-likelihood
+    FF[, train_idx] <- FF_train
+    FF[, test_idx] <- FF_to_update
+    res$fit$FF <- FF
+    
+    print(glue::glue("train loglik = {res$loglik}"))
+    
+    if (inherits(Y,"sparseMatrix")) {
+      test_loglik_const <- sum(mapSparse(Y_test,lfactorial))
+      loglik_func  <- lik_glmpca_pois_log_sp
+    } else {
+      test_loglik_const <- sum(lfactorial(Y_test))
+      loglik_func  <- lik_glmpca_pois_log
+    }
+    
+    test_loglik <- loglik_func(Y_test,res$fit$LL,FF_to_update,test_loglik_const)
+    print(glue::glue("test loglik = {test_loglik}"))
+    
+    print(glue::glue("Expected total loglik = {res$loglik + test_loglik}"))
+    
+    if (inherits(Y,"sparseMatrix")) {
+      loglik_const <- sum(mapSparse(Y,lfactorial))
+      loglik_func  <- lik_glmpca_pois_log_sp
+    } else {
+      loglik_const <- sum(lfactorial(Y))
+      loglik_func  <- lik_glmpca_pois_log
+    }
+    
+    res$loglik <- loglik_func(Y,res$fit$LL,res$fit$FF,loglik_const)
+    
+    print(glue::glue("Calculated loglik = {res$loglik}"))
+    
+  }
   
   # Prepare the final output.
   res$progress$iter <- max(fit0$progress$iter) + res$progress$iter
@@ -258,9 +349,12 @@ fit_glmpca_pois <- function(
     dimnames(fit$W) <- dimnames(fit0$W)
   }
   class(fit) <- c("glmpca_pois_fit","list")
+  
   return(fit)
+  
 }
 
+
 # This implements the core part of fit_glmpca_pois.
 #
 #' @importFrom Matrix t
@@ -358,6 +452,7 @@ fit_glmpca_pois_control_default <- function()
   list(use_daarem = FALSE,
        maxiter = 100,
        tol = 1e-4,
+       training_frac = 1,
        mon.tol = 0.05,
        convtype = "objfn",
        line_search = TRUE,
diff --git a/inst/scratch/test_projection_method.R b/inst/scratch/test_projection_method.R
new file mode 100644
index 0000000..e575650
--- /dev/null
+++ b/inst/scratch/test_projection_method.R
@@ -0,0 +1,39 @@
+library(fastglmpca)
+
+set.seed(1)
+fit1 <- fit_glmpca_pois(
+  Y = pbmc_facs$counts, 
+  K = 2,
+  control = list(training_frac = 1, maxiter = 10)
+)
+
+# for some reason the calculated log-likelihood and the expected
+# are not matching up
+set.seed(1)
+fit2 <- fit_glmpca_pois(
+  Y = pbmc_facs$counts, 
+  K = 2,
+  control = list(training_frac = 0.99, maxiter = 10)
+)
+# 
+# df1 <- data.frame(
+#   celltype = pbmc_facs$samples$celltype,
+#   PC1 = fit1$V[,1],
+#   PC2 = fit1$V[,2]
+# )
+# 
+# library(ggplot2)
+# 
+# ggplot(data = df1) +
+#   geom_point(aes(x = PC1, y = PC2, color = celltype))
+# 
+# df2 <- data.frame(
+#   celltype = pbmc_facs$samples$celltype,
+#   PC1 = fit2$V[,1],
+#   PC2 = fit2$V[,2]
+# )
+# 
+# library(ggplot2)
+# 
+# ggplot(data = df2) +
+#   geom_point(aes(x = PC1, y = PC2, color = celltype))
diff --git a/man/fit_glmpca_pois.Rd b/man/fit_glmpca_pois.Rd
index 12a512f..f745a6c 100644
--- a/man/fit_glmpca_pois.Rd
+++ b/man/fit_glmpca_pois.Rd
@@ -185,6 +185,15 @@ the help accompanying these functions for details.
   control argument for \code{\link[daarem]{daarem}}. This setting
   determines to what extent the monotonicity condition can be
   violated.}
+  
+\item{\code{training_frac}}{Fraction of the columns of input data \code{Y}
+  to fit initial model on. If set to \code{1} (default), the model is fit
+  by optimizing the parameters on the entire dataset. If set between \code{0}
+  and \code{1}, the model is optimized by first fitting a model on a randomly
+  selected fraction of the columns of \code{Y}, and then projecting the 
+  remaining columns of \code{Y} onto the solution. Setting this to a smaller
+  value will increase speed but decrease accuracy.
+}
 
 \item{\code{num_ccd_iter}}{Number of co-ordinate descent updates to
   be made to parameters at each iteration of the algorithm.}

From 5c425758ed1f83ac1db1fde9d7f4bce29b201af4 Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Mon, 29 Jul 2024 12:12:58 -0400
Subject: [PATCH 3/7] removed projection script

---
 R/project_onto_U.R | 79 ----------------------------------------------
 1 file changed, 79 deletions(-)
 delete mode 100644 R/project_onto_U.R

diff --git a/R/project_onto_U.R b/R/project_onto_U.R
deleted file mode 100644
index b218e69..0000000
--- a/R/project_onto_U.R
+++ /dev/null
@@ -1,79 +0,0 @@
-project_onto_U <- function(
-    fit0, 
-    new_Y, 
-    new_Z, 
-    new_fixed_B
-) {
-  
-  n <- nrow(new_Y)
-  m <- ncol(new_Y)
-  K <- ncol(fit0$U)
-  
-  new_V <- matrix(
-    data = rnorm(m * K, sd = 1e-3),
-    nrow = m,
-    ncol = K
-  )
-  
-  # here I need to integrate the new fixed b with the old fit
-  # add new size factors
-  new_B <- matrix(log(Matrix::colMeans(new_Y)))
-  colnames(new_B) <- "col_size_factor"
-  n_b <- 1
-  
-  LL <- t(cbind(fit0$U %*% diag(sqrt(fit0$d)),fit0$X,fit0$W))
-  FF <- t(cbind(new_V %*% diag(sqrt(fit0$d)),new_B,new_Z))
-  
-  update_indices_f <- 1:K
-  
-  FFnew <- matrix(FF,nrow(FF),m)
-  # Now, project onto U...
-  update_factors_faster_parallel(
-    L_T = t(LL),
-    FF = FFnew,
-    M = as.matrix(LL[update_indices_f,,drop = FALSE] %*% new_Y),
-    update_indices = update_indices_f - 1,
-    num_iter = 1000,
-    line_search = TRUE,
-    alpha = .01,
-    beta = .25
-  )
-  
-  FF <- FFnew
-  
-  # now, I need to orthonormalize the entire fit
-  fit <- list()
-  
-  fit$U <- t(LL[1:K,, drop = FALSE])
-  fit$V <- t(FF[1:K,, drop = FALSE])
-  
-  # I need to add back the dimnames here
-  fit$V <- rbind(fit$V, fit0$V %*% diag(sqrt(fit0$d)))
-  
-  fit <- orthonormalize_fit(fit)
-  fit$W <- fit0$W
-  fit$X <- fit0$X
-  fit$Z <- rbind(new_Z, fit0$Z)
-  fit$B <- rbind(new_B, fit0$B)
-  
-  rownames(fit$U) <- rownames(fit0$U)
-  colnames(fit$U) <- colnames(fit0$U)
-  
-  rownames(fit$V) <- c(colnames(new_Y), rownames(fit0$V))
-  colnames(fit$V) <- colnames(fit0$V)
-  
-  rownames(fit$W) <- rownames(fit0$W)
-  colnames(fit$W) <- colnames(fit0$W)
-  
-  rownames(fit$Z) <- c(colnames(new_Y), rownames(fit0$Z))
-  colnames(fit$Z) <- colnames(fit0$Z)
-  
-  rownames(fit$B) <- c(colnames(new_Y), rownames(fit0$B))
-  colnames(fit$B) <- colnames(fit0$B)
-  
-  rownames(fit$X) <- rownames(fit0$X)
-  colnames(fit$X) <- colnames(fit0$X)
-  
-  return(fit)
-  
-}
\ No newline at end of file

From 7222d45abf914367ca37ea539f1792862e6367a2 Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Thu, 1 Aug 2024 11:54:19 -0500
Subject: [PATCH 4/7] fixed bug in projection method

---
 R/fit.R                               | 20 ++------------------
 inst/scratch/test_projection_method.R |  2 +-
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/R/fit.R b/R/fit.R
index a6cbedc..5664ebc 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -279,12 +279,10 @@ fit_glmpca_pois <- function(
     )
     
     # now, I need to reconstruct FF, and hopefully compute the log-likelihood
-    FF[, train_idx] <- FF_train
+    FF[, train_idx] <- res$fit$FF
     FF[, test_idx] <- FF_to_update
     res$fit$FF <- FF
     
-    print(glue::glue("train loglik = {res$loglik}"))
-    
     if (inherits(Y,"sparseMatrix")) {
       test_loglik_const <- sum(mapSparse(Y_test,lfactorial))
       loglik_func  <- lik_glmpca_pois_log_sp
@@ -294,21 +292,7 @@ fit_glmpca_pois <- function(
     }
     
     test_loglik <- loglik_func(Y_test,res$fit$LL,FF_to_update,test_loglik_const)
-    print(glue::glue("test loglik = {test_loglik}"))
-    
-    print(glue::glue("Expected total loglik = {res$loglik + test_loglik}"))
-    
-    if (inherits(Y,"sparseMatrix")) {
-      loglik_const <- sum(mapSparse(Y,lfactorial))
-      loglik_func  <- lik_glmpca_pois_log_sp
-    } else {
-      loglik_const <- sum(lfactorial(Y))
-      loglik_func  <- lik_glmpca_pois_log
-    }
-    
-    res$loglik <- loglik_func(Y,res$fit$LL,res$fit$FF,loglik_const)
-    
-    print(glue::glue("Calculated loglik = {res$loglik}"))
+    res$loglik <- res$loglik + test_loglik
     
   }
   
diff --git a/inst/scratch/test_projection_method.R b/inst/scratch/test_projection_method.R
index e575650..18e213b 100644
--- a/inst/scratch/test_projection_method.R
+++ b/inst/scratch/test_projection_method.R
@@ -13,7 +13,7 @@ set.seed(1)
 fit2 <- fit_glmpca_pois(
   Y = pbmc_facs$counts, 
   K = 2,
-  control = list(training_frac = 0.99, maxiter = 10)
+  control = list(training_frac = 0.99, maxiter = 5)
 )
 # 
 # df1 <- data.frame(

From 681e5c1a6a260656e5446afd39778fc21746019c Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Thu, 1 Aug 2024 11:57:20 -0500
Subject: [PATCH 5/7] made code more memory efficient

---
 R/fit.R | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/R/fit.R b/R/fit.R
index 5664ebc..66102a3 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -263,13 +263,10 @@ fit_glmpca_pois <- function(
     
     update_indices_f <- sort(setdiff(1:K,fixed_f))
     
-    FF_to_update <- FF_test
-    #browser()
-    
     # now, I just need to project the results back
     update_factors_faster_parallel(
       L_T = t(res$fit$LL),
-      FF = FF_to_update,
+      FF = FF_test,
       M = as.matrix(res$fit$LL[update_indices_f,,drop = FALSE] %*% Y_test),
       update_indices = update_indices_f - 1,
       num_iter = 1000,
@@ -280,7 +277,7 @@ fit_glmpca_pois <- function(
     
     # now, I need to reconstruct FF, and hopefully compute the log-likelihood
     FF[, train_idx] <- res$fit$FF
-    FF[, test_idx] <- FF_to_update
+    FF[, test_idx] <- FF_test
     res$fit$FF <- FF
     
     if (inherits(Y,"sparseMatrix")) {
@@ -291,7 +288,7 @@ fit_glmpca_pois <- function(
       loglik_func  <- lik_glmpca_pois_log
     }
     
-    test_loglik <- loglik_func(Y_test,res$fit$LL,FF_to_update,test_loglik_const)
+    test_loglik <- loglik_func(Y_test,res$fit$LL,FF_test,test_loglik_const)
     res$loglik <- res$loglik + test_loglik
     
   }

From c6f9b4fce46398ce1f843cc620a5ed8c3c47b864 Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Sun, 4 Aug 2024 14:38:06 -0500
Subject: [PATCH 6/7] added option for number of projection iterations

---
 R/fit.R                               | 15 +++++++++++----
 inst/scratch/test_projection_method.R |  9 ++++++++-
 man/fit_glmpca_pois.Rd                |  6 ++++++
 3 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/R/fit.R b/R/fit.R
index 66102a3..eefae6a 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -80,6 +80,12 @@
 #'   remaining columns of \code{Y} onto the solution. Setting this to a smaller
 #'   value will increase speed but decrease accuracy.
 #' }
+#' 
+#' \item{\code{num_projection_ccd_iter}}{Number of co-ordinate descent updates
+#'   be made to elements of \code{V} if and when a subset of \code{Y} is 
+#'   projected onto \code{U}. Only used if \code{training_frac} is less than 
+#'   \code{1}.
+#' }
 #'
 #' \item{\code{num_ccd_iter}}{Number of co-ordinate descent updates to
 #'   be made to parameters at each iteration of the algorithm.}
@@ -269,10 +275,10 @@ fit_glmpca_pois <- function(
       FF = FF_test,
       M = as.matrix(res$fit$LL[update_indices_f,,drop = FALSE] %*% Y_test),
       update_indices = update_indices_f - 1,
-      num_iter = 1000,
-      line_search = TRUE,
-      alpha = .01,
-      beta = .25
+      num_iter = control$num_projection_ccd_iter,
+      line_search = control$line_search,
+      alpha = control$ls_alpha,
+      beta = control$ls_beta
     )
     
     # now, I need to reconstruct FF, and hopefully compute the log-likelihood
@@ -434,6 +440,7 @@ fit_glmpca_pois_control_default <- function()
        maxiter = 100,
        tol = 1e-4,
        training_frac = 1,
+       num_projection_ccd_iter = 10,
        mon.tol = 0.05,
        convtype = "objfn",
        line_search = TRUE,
diff --git a/inst/scratch/test_projection_method.R b/inst/scratch/test_projection_method.R
index 18e213b..fc8a80a 100644
--- a/inst/scratch/test_projection_method.R
+++ b/inst/scratch/test_projection_method.R
@@ -13,7 +13,14 @@ set.seed(1)
 fit2 <- fit_glmpca_pois(
   Y = pbmc_facs$counts, 
   K = 2,
-  control = list(training_frac = 0.99, maxiter = 5)
+  control = list(training_frac = 0.25, maxiter = 10, num_projection_ccd_iter = 25)
+)
+
+set.seed(1)
+fit3 <- fit_glmpca_pois(
+  Y = pbmc_facs$counts, 
+  K = 2,
+  control = list(training_frac = 0.25, maxiter = 10, num_projection_ccd_iter = 5)
 )
 # 
 # df1 <- data.frame(
diff --git a/man/fit_glmpca_pois.Rd b/man/fit_glmpca_pois.Rd
index f745a6c..e72c68a 100644
--- a/man/fit_glmpca_pois.Rd
+++ b/man/fit_glmpca_pois.Rd
@@ -195,6 +195,12 @@ the help accompanying these functions for details.
   value will increase speed but decrease accuracy.
 }
 
+\item{\code{num_projection_ccd_iter}}{Number of co-ordinate descent updates
+  be made to elements of \code{V} if and when a subset of \code{Y} is 
+  projected onto \code{U}. Only used if \code{training_frac} is less than 
+  \code{1}.
+}
+
 \item{\code{num_ccd_iter}}{Number of co-ordinate descent updates to
   be made to parameters at each iteration of the algorithm.}
 

From 49ebe8685a0cea7b9ce8bb87010099cae5e48233 Mon Sep 17 00:00:00 2001
From: Eric Weine <ericw456@mit.edu>
Date: Thu, 22 Aug 2024 18:26:11 -0400
Subject: [PATCH 7/7] added additional check in subset method

---
 R/fit.R                               |  14 ++++++++++++++
 inst/.DS_Store                        | Bin 0 -> 10244 bytes
 inst/scratch/test_projection_method.R |   6 ++++--
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 inst/.DS_Store

diff --git a/R/fit.R b/R/fit.R
index eefae6a..31a2023 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -248,7 +248,21 @@ fit_glmpca_pois <- function(
       size = ceiling(ncol(Y) * control$training_frac)
     )
     
+    browser()
     Y_train <- Y[, train_idx]
+    
+    if (any(Matrix::rowSums(Y_train) == 0) || any(Matrix::colSums(Y_train) == 0)) {
+      
+      stop(
+        "After subsetting, the remaining values of \"Y\" ",
+        "contain a row or a column where all counts are 0. This can cause ",
+        "problems with optimization. Please either remove rows / columns ",
+        "with few non-zero counts from \"Y\", or set \"training_frac\" to ",
+        "a larger value."
+        )
+      
+    }
+    
     FF_train <- FF[, train_idx]
     FF_test <- FF[, -train_idx]
     Y_test <- Y[, -train_idx]
diff --git a/inst/.DS_Store b/inst/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..363cf839b0b87f3f296528ea15e8600ba44f820f
GIT binary patch
literal 10244
zcmeHMUu+ab7@zMbbaxAMTUy*+i=Lbalmdm;@~0rzf7r^O)?Q2hP&oE>x9!rs-E(*E
zN+HBXW1?vM`>0|xYGRBM6EOyS&<7(gK4^%-gBmgMMO0o4pbz-X&R%J+VB&*F;!ZOA
z&3yBHGdsWeeX}=ngb--XsOt!c5JEW2REqQQaf3wLX`YorzNQEzpg$oK#34FyIt)vh
zWgRF21OfyC1OfyC1OfzZ2L$k&O%q4I-$4ox2oMMmn2rG5A0n8kjQMhsNpI`Ghj#=(
zS%7LUVHwW>o=?P=F<(wH>A7N`?DPP>EBHqYVD8u+q2?%KzMN!|xf5XS1pJY~KcRp(
zI{HOCa{@6YgJ*z1fWT}7=(nFEGBGh5)6efo!%AWs4W}l|Yp!_-OElOOViA>S^qfD<
z?nIs)hc+^CF*`&<>D>!lw}&Np_C;uQh@|mp<1Io8rFjFdr}DfCv3CVM=tv3~o%v31
z_3y2d<4)RtqmPnlLwDTT+OHurr(o{9LavZ2;`YTyt?{@kyBVihw)fInk7XIjTv<-;
z(Y5h}Sk_{huB@Ay?K$YGh9*OOx~W>@%~r-#9nUw;8aSTg6QXqF$ohubnn+!J-O-xJ
zkxlEdTwm94^eE4jN7puXC&rAFX&u8EU;=;RBMVr4>Phj)!$5TuBZ+TGUGkcACF@TZ
zzQ|4Zdr{0ynHP%YFIZSCmXt0kDJv;2UwnsDUJ(vgCB#KZSs5F#=~P-Q!?K%=jJ9{g
z*421gR&;Zu-_;Jeas9BC5KHq+yES$Hbyd$q8g0t1E4=O=ur<x}`s*AXb~LwtT6rKL
z7CVY<8Ag}o=&o*=149ljM2D-TI?Q1UN6g|Pbf#&Ovq!TXl(T&L2V~op%@M76VxJ)=
zHKWsPwp`arQA^sJVwqjGMtkblu}tR@-FAuL9F6VB6;`l<NTRGjNHjJa^U7RVPUTDJ
zF)dT$`Dm=m*KlMN#?vmchm4WK<azQ6d7GRjXUMnYH*ysUp#qk|DyWC8unpSbA?OAL
zMnQ*z%s-T40gat$2^_5CEw$lfTNt-P7jsK}4t#FoLQCD1ap9)e%|xV6v5ZX0bP{5T
zA{$D^kX^0GFepaO)XEP!mhIwdqTv*R2~o^ATDOJ{dRA*iH)j?|{Co4u!*@kjtX{vN
zG1l6?^DJYaAPi!(n65soYj*FruDYWGvZH9GigfGC>W-c?v>sDVX(dp@^PU&-RqD?q
z^@lUMJ0V6n&eK*_Ny3ndWACz5+M6n#<d}V=N|N@{mKIyXr|LW0vrH0t9Gsje`-Us4
z1)*mQ!&!9vq1LPwq+X9mqRO*z>gollFU$BR?(mfxO9iQ4u~I3VisS3HY?Y+_bl9D;
z<94Ps6JP0!KXwzg4LYN;t#yv$cH(Q>Q0oyD_0*|N)m<ys*nzVcpxrg8sy7F5VetZK
zx75XnUvansx|!8+3&W^jI~f;^RWtHLW3{Zn<5!@9tifMf?PLgd-ZXv-o<?>aC-0IE
z$*1IV@+CP>z9T=73*<*~k^D|BlPk#B5EMfRl)@sYf)%h5s-X@x!e(fMEzkxJ!cN!)
zyP*&EK>`kd3<tpm7c!8AN8kx~5{|)(@H)H!Z^Bz}0#3nc_!vHeZ{Qr9hYN5Ket}=%
zGF*W_;Lp&)P;p2K-OXNjDiV5GJ@*VP#nS9Z@Hf4XcB4oLx0P(aJtE<JZuc!Kzmo<;
zL*uq7?l7|D_%)ri9#v-$+@q=&8+kN4ixE;0t`wH!0w_PQdXeMWA&!QWSjem+!cq=7
zF3x4uWx{d}*)7gv)#`8!hw&taysEG^ypG4H5$CXKb@)D>6M<DW3Y&S1AF+T{D~0=c
zUL;;6+!Su%Fa|{KACdEYa*BL{+viy%=TGEka*1364n$ZC5x5tYBOO=4YG{BBun9>?
zDY+fD)ehJJJ<yA^?1w?zU{x4~5!_^LaFCponv?J-JO+=$Gw>`t2QR=&@G=~SSK&1z
z>pSp49tGQT<YN*}&Zb~CZkvW>j$lh}3!mu})B*$o1OfyC1OfzZ5dw4AI1zdW(023x
z|8LRT1j7Uf1PJ^W2tYw=ytNrezS%F+5Vl1yAHYl#wVPzpbHN8Sp?dE+9xL=Z-n%J5
ooqxpi+GD=mB$J*y`g!gs*=_#6{%1gN{twRo({954kLUlt0HZ2OVgLXD

literal 0
HcmV?d00001

diff --git a/inst/scratch/test_projection_method.R b/inst/scratch/test_projection_method.R
index fc8a80a..9a72c24 100644
--- a/inst/scratch/test_projection_method.R
+++ b/inst/scratch/test_projection_method.R
@@ -1,10 +1,12 @@
 library(fastglmpca)
 
 set.seed(1)
+cc <- pbmc_facs$counts[Matrix::rowSums(pbmc_facs$counts) > 10, ]
+
 fit1 <- fit_glmpca_pois(
-  Y = pbmc_facs$counts, 
+  Y = cc, 
   K = 2,
-  control = list(training_frac = 1, maxiter = 10)
+  control = list(training_frac = 0.99, maxiter = 10)
 )
 
 # for some reason the calculated log-likelihood and the expected