better get_anova(), added add_group_column()

SchmidtPaul · Sep 29, 2023 · af99149 · af99149
1 parent a259ba8
commit af99149
Show file tree

Hide file tree

Showing 50 changed files with 508 additions and 135 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: BioMathR
 Title: Utility functions used at BioMath
-Version: 0.6.0
+Version: 0.7.0
 Authors@R: c(
  person("Paul", "Schmidt", email = "[email protected]", role = c("aut", "cre")),
  person("BioMath", role = "fnd")
@@ -46,6 +46,7 @@ Suggests:
  lme4,
  lmerTest,
  MetBrewer,
+ nlme,
  openair,
  pdftools,
  performance,
@@ -59,3 +60,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
+Roxygen: list(markdown = TRUE)
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(get_anova,glmmTMB)
-S3method(get_anova,lm)
-S3method(get_anova,lmerMod)
 S3method(get_varcomp,glmmTMB)
 S3method(get_varcomp,merMod)
 S3method(tidy_reg,lm)
 S3method(tidy_reg,openair)
 export("%not_in%")
+export(add_group_column)
 export(add_sheet)
 export(cond_format)
 export(create_wb)

diff --git a/R/add_group_column.R b/R/add_group_column.R
@@ -0,0 +1,31 @@
+#' Add Group Column
+#'
+#' This function adds a new column to a data frame with a specified name. The new column indicates groups based on the grouping variables provided. When no grouping variable is provided, a default value is assigned to the new column.
+#'
+#' @param data A data frame.
+#' @param name The name of the new column to be added.
+#' @param group_by A vector of strings specifying the column names used for grouping. Default is NULL.
+#'
+#' @return A data frame with the new column added.
+#' @examples
+#' \dontrun{
+#' data <- data.frame(x = c(1,2,3,1,2,3), y = c("a","b","c","a","b","c"))
+#' data %>% add_group_column(name = "group_id", group_by = c("x", "y"))
+#' }
+#'
+#' @export
+#'
+add_group_column <- function(data, name, group_by = NULL) {
+ if (is.null(group_by)) {
+ data[[name]] <- as.factor("-")
+ return(data)
+ } else {
+ data <- data %>%
+ group_by(across(all_of(group_by))) %>%
+ mutate(temp____col = paste0(name, str_pad(cur_group_id(), width = 2, pad = "0")) %>% as.factor()) %>%
+ ungroup()
+
+ names(data)[which(names(data) == "temp____col")] <- name
+ return(data)
+ }
+}
diff --git a/R/describe.R b/R/describe.R
@@ -79,14 +79,14 @@ describe <-
  if (is.numeric(digits)) {
  out <- out %>%
  mutate(across(
- cols_to_round,
+ all_of(cols_to_round),
  ~ round(., digits = digits)
  ))
  }
  if (digits == "round_smart") {
  out <- out %>%
  mutate(across(
- cols_to_round,
+ all_of(cols_to_round),
  ~ BioMathR::round_smart(., ...)
  ))
  }

diff --git a/R/get_anova.R b/R/get_anova.R
@@ -2,69 +2,126 @@
 #'
 #' @description This function obtains the ANOVA table for a model.
 #'
-#' @param model a fitted model object. Supported classes: \code{"lm"}, \code{"lmerMod"} & \code{"glmmTMB"}.
+#' @param model a fitted model object. Supported classes: \code{"lm"}, \code{"lmerModLmerTest"}, \code{"glmmTMB"}, \code{"gls"}, \code{"lme"}.
 #' @param type Type of ANOVA test: \code{"I"}, \code{"II"} or \code{"III"} (default).
 #' @param ... additional arguments to be passed to the specific anova function.
+#' @param info Logical, if \code{TRUE}, information about the type of ANOVA and
+#' test statistic used is printed to the console. Default is \code{FALSE}.
 #'
-#' @details The function utilizes \code{car::Anova()} for all supported model classes. For models of class 'lm' and 'lmerMod', the F-test is employed. In contrast, for models of the 'glmmTMB' class, the Chi-Square test is used since the F-test is \href{https://github.com/glmmTMB/glmmTMB/blob/bd1932addbfb4edf2ce933675f5a8bf72abc0e7c/glmmTMB/R/Anova.R#L73C8-L73C8}{currently unavailable}. It's worth noting that only the 'lmerMod' models support (and default to) the Kenward-Roger method as a degrees of freedom method.
+#' @details The function utilizes \code{car::Anova()} for all supported model
+#' classes and defaults to type III of sum of squares. For models of class
+#' 'lm' and 'lmerModLmerTest', the F-test is employed. In contrast, for models of the
+#' 'glmmTMB', 'gls', and 'lme' class, the Chi-Square test is used since the
+#' F-test is
+#' \href{https://github.com/glmmTMB/glmmTMB/blob/bd1932addbfb4edf2ce933675f5a8bf72abc0e7c/glmmTMB/R/Anova.R#L73C8-L73C8}{currently
+#' unavailable}. It's worth noting that only the 'lmerModLmerTest' models support (and
+#' default to) the Kenward-Roger method as a degrees of freedom method.
+#'
+#' \itemize{
+#' \item{\code{lm} (Package: stats):} {Types I/II/III; F-Test; Standard df.}
+#' \item{\code{lmer/lmerTest} (Packages: lme4/lmerTest):} {Types I/II/III; F-Test; Kenward-Roger (KR) df.}
+#' \item{\code{lme} (Package: nlme):} {Types I/II/III; Chi-Square Test (Chisq*); Standard df.}
+#' \item{\code{gls} (Package: nlme):} {Types I/II/III; Chi-Square Test (Chisq*); Standard df.}
+#' \item{\code{glmmTMB} (Package: glmmTMB):} {Types I/II/III; Chi-Square Test; Standard df.}
+#' }
+#'
+#' * You can obtain the F-Test only for type I/II via \code{stats::anova()}
+#'
+#' The \code{type} argument specifies the type of sum of squares to be used in the analysis:
+#' \itemize{
+#' \item{Type I (Sequential) sum of squares:}{
+#' The order in which factors are entered into the model does matter. Each factor is adjusted for the factors listed before it.
+#' }
+#' \item{Type II (Marginal) sum of squares:}{
+#' The order in which factors are entered into the model does not matter. Each factor is adjusted for all of the other factors in the model.
+#' }
+#' \item{Type III sum of squares:}{
+#' The order in which factors are entered into the model does not matter, similar to Type II. However, each factor is adjusted for all
+#' of the other factors as well as for itself, which allows for the testing of each factor in the presence of interactions.
+#' }
+#' }
+#'
+#' Kenward-Roger Degrees of Freedom: The Kenward-Roger (KR) method is
+#' a sophisticated approach to approximating the degrees of freedom in mixed
+#' models, particularly in the presence of small sample sizes or unbalanced
+#' data. It is not applicable to non-mixed models. Unlike the classical
+#' degrees of freedom methods which may overestimate the significance of
+#' effects, the KR approximation tends to provide a more conservative and
+#' accurate estimation. This method adjusts the degrees of freedom to account
+#' for the complexity of the mixed model structure, thereby enhancing the
+#' robustness of the resulting inference. The Kenward-Roger method is
+#' especially beneficial when working with complex models that include
+#' multiple random effects and/or nested structures, as it helps to mitigate
+#' the risk of Type I errors, offering a more reliable foundation for
+#' hypothesis testing. Importantly, employing the KR method is never
+#' disadvantageous when compared to using the default method; it provides a
+#' more accurate reflection of the model's complexity and the data structure,
+#' thus leading to more reliable statistical inferences.
 #'
 #' @seealso
 #' [car::Anova()],
 #' [lmerTest::lmer()],
-#' [glmmTMB::glmmTMB()]
+#' [glmmTMB::glmmTMB()],
+#' [nlme::gls()],
+#' [nlme::lme()]
 #'
 #' @export
-get_anova <- function(model, type, ...) {
- assertthat::assert_that(inherits(model, c("lm", "lmerMod", "glmmTMB")),
+get_anova <- function(model, type = "III", ..., info = FALSE) {
+ assertthat::assert_that(inherits(model, c("lm", "lmerMod", "lmerModLmerTest", "glmmTMB", "gls", "lme")),
  msg = "This is not a supported model class.")
 
- if (!requireNamespace("car", quietly = TRUE)) {
- stop("Package 'car' must be installed.")
- }
+ model_class <- class(model)[1] # Extract the primary class name of the model
 
- UseMethod("get_anova")
-}
-
-
-#' @export
-#' @rdname get_anova
-get_anova.lm <- function(model,
- type = c("I", "II", "III")[3],
- ...) {
+ if (model_class == "lmerMod" && !requireNamespace("lmerTest", quietly = TRUE)) {
+ stop("Please fit your model via lmerTest::lmer() instead of lme4::lmer().")
+ }
 
- anova_table <- car::Anova(model, type, ...)
- return(anova_table)
-}
+ test_statistic <- switch(
+ model_class,
+ lm = "F",
+ lmerModLmerTest = "F",
+ glmmTMB = "Chisq",
+ gls = "Chisq",
+ lme = "Chisq",
+ stop("Unsupported model class")
+ )
 
+ ddf <- if (model_class == "lmerModLmerTest") "Kenward-Roger" else NULL
 
-#' @export
-#' @rdname get_anova
-get_anova.lmerMod <- function(model,
- type = c("I", "II", "III")[3],
- ...) {
+ necessary_package <- switch(
+ model_class,
+ lm = "stats",
+ lmerModLmerTest = c("lmerTest", "lme4"),
+ glmmTMB = "glmmTMB",
+ gls = "nlme",
+ lme = "nlme",
+ NULL
+ )
 
- if (!requireNamespace("lme4", quietly = TRUE)) {
- stop("When model object is 'lmerMod', package 'lme4' must be installed.")
+ if (!is.null(necessary_package)) {
+ for (pkg in necessary_package) {
+ assertthat::assert_that(requireNamespace(pkg, quietly = TRUE),
+ msg = sprintf("When model object is '%s', package '%s' must be installed.", model_class, pkg))
+ }
  }
 
- if (!requireNamespace("lmerTest", quietly = TRUE)) {
- stop("When model object is 'lmerMod', package 'lmerTest' must be installed.")
- }
+ anova_table <- car::Anova(model, type = type, test.statistic = test_statistic, ddf = ddf, ...)
 
- anova_table <- car::Anova(model, type = type, test.statistic = "F", ...)
- return(anova_table)
-}
+ # Get an info summary above the table
+ anova_summary <- sprintf(
+ "The ANOVA performed is of Type %s, utilizing a %s test statistic.",
+ type,
+ test_statistic
+ )
 
-#' @export
-#' @rdname get_anova
-get_anova.glmmTMB <- function(model,
- type = c("I", "II", "III")[3],
- ...) {
+ if (!is.null(ddf)) {
+ anova_summary <- paste(anova_summary, sprintf("Degrees of freedom were adjusted using the %s method.", ddf))
+ }
 
- if (!requireNamespace("glmmTMB", quietly = TRUE)) {
- stop("When model object is 'glmmTMB', package 'glmmTMB' must be installed.")
+ # Output the summary only if info = TRUE
+ if (info) {
+ cat(paste0(anova_summary, "\n"))
  }
 
- anova_table <- car::Anova(model, type = type, test.statistic = "Chisq", ...)
  return(anova_table)
 }
diff --git a/R/get_varcomp.R b/R/get_varcomp.R
@@ -12,7 +12,7 @@
 #' @seealso
 #' [lme4::VarCorr()],
 #' [glmmTMB::VarCorr()],
-#' [mixedup::extract_vc()]
+#' [mixedup::extract_vc()](https://github.com/m-clark/mixedup)
 #'
 #' @export
 get_varcomp <- function(model,
@@ -73,7 +73,7 @@ get_varcomp.merMod <- function(model,
 get_varcomp.glmmTMB <- function(model,
  digits = 3) {
 
- if (!requireNamespace("glmmTMB", quietly = TRUE)) {
+ if (!suppressWarnings(requireNamespace("glmmTMB", quietly = TRUE))) {
  stop("When model object is 'glmmTMB', package 'glmmTMB' must be installed.")
  }
 

diff --git a/R/project_setup.R b/R/project_setup.R
@@ -40,9 +40,9 @@ project_setup <- function() {
  # Create setup file
  setup_file <- file.path(root_dir, "code", "00 setup.R")
  setup_content <- 'library(BioMathR) # remotes::install_github("SchmidtPaul/BioMathR")
-library(conflicted) # install.packages("conflicted")
-library(here) # install.packages("here")
-library(tidyverse) # install.packages("tidyverse")
+library(conflicted)
+library(here)
+library(tidyverse)
 
 # conflicts
 conflicts_prefer(dplyr::filter, .quiet = TRUE)
@@ -57,6 +57,11 @@ here_out <- function(...) {here("10 RCode", "out", ...)}
 BMcols <- BioMathR::palette_getset("BioMath")'
  writeLines(setup_content, setup_file)
 
+ # Create import file
+ import_file <- file.path(root_dir, "code", "01 import.R")
+ import_content <- 'source(here::here("10 RCode", "code", "00 setup.R"), encoding = "UTF-8")'
+ writeLines(import_content, import_file)
+
  # send message
- message("Directories and 00 setup.R created successfully!")
+ message("Directories, '00 setup.R' and '01 import.R' created successfully!")
 }
diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -74,6 +74,7 @@ reference:
 
 - title: utility
  contents:
+ - add_group_column
  - project_setup
  - round_smart
  - write_ascii

diff --git a/docs/404.html b/docs/404.html
diff --git a/docs/authors.html b/docs/authors.html
diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
@@ -2,7 +2,7 @@ pandoc: 3.1.1
 pkgdown: 2.0.7
 pkgdown_sha: ~
 articles: {}
-last_built: 2023-09-15T10:25Z
+last_built: 2023-09-29T13:40Z
 urls:
  reference: schmidtpaul.github.io/BioMathR/reference
  article: schmidtpaul.github.io/BioMathR/articles

diff --git a/docs/reference/BioMathR-package.html b/docs/reference/BioMathR-package.html