sweep out (some) cobwebs

KaiAragaki · Sep 3, 2024 · 04f4287 · 04f4287
1 parent be84fe6
commit 04f4287
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 61 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ S3method(pcr_plot,data.frame)
 S3method(pcr_plot,pcr)
 S3method(pcr_rq,data.frame)
 S3method(pcr_rq,pcr)
+S3method(well_data,pcr)
 export(as_pcr)
 export(pcr_calc_slope)
 export(pcr_control)
@@ -27,7 +28,6 @@ export(read_pcr)
 export(scrub)
 export(tidy_lab)
 export(well_data)
-export(well_data.pcr)
 importFrom(ggplot2,aes)
 importFrom(ggplot2,element_blank)
 importFrom(gplate,well_data)

diff --git a/R/pcr_rq.R b/R/pcr_rq.R
@@ -18,11 +18,11 @@ pcr_rq <- function(x, relative_sample, control_probe = NULL, ...) {
 #' @examples
 #' dat_path <- system.file("extdata", "untidy-pcr-example.xls", package = "amplify")
 #'
-#' pcr_tidy(dat_path) |>
+#' read_pcr(dat_path) |>
 #'   pcr_rq("U6D1")
 #'
 #' # Can also be run after using pcr_control:
-#' pcr_tidy(dat_path) |>
+#' read_pcr(dat_path) |>
 #'   pcr_control("GAPDH") |>
 #'   pcr_rq("U6D1")
 pcr_rq.pcr <- function(x, relative_sample, control_probe = NULL, ...) {
@@ -61,40 +61,45 @@ pcr_rq.data.frame <- function(x, relative_sample, control_probe = NULL, ...) {
   used_wells <- x |>
     dplyr::filter(!is.na(.data$target_name), !is.na(.data$sample_name))
 
-  with_ct_summaries <- used_wells |>
-    dplyr::mutate(
+  ct_summaries <- used_wells |>
+    dplyr::summarize(
       ct_mean = mean(.data$ct, na.rm = TRUE),
       ct_sd = stats::sd(.data$ct, na.rm = TRUE),
       rep = dplyr::n(),
       .by = c("target_name", "sample_name")
     )
 
-  minimal_data <- with_ct_summaries |>
-    dplyr::select(
-      ".row", ".col", "ct_mean", "ct_sd", "target_name", "sample_name", "rep"
-    )
+  minimal_data <- ct_summaries |>
+    dplyr::select("ct_mean", "ct_sd", "target_name", "sample_name", "rep")
 
-  minimal_data_with_deltas <- minimal_data |>
-    dplyr::mutate(
-      delta_ct = .data$ct_mean - .data$ct_mean[.data$target_name == control_probe],
-      delta_ct_sd  = sqrt(.data$ct_sd^2 + .data$ct_sd[.data$target_name == control_probe]^2),
-      delta_ct_se  = .data$delta_ct_sd / sqrt(.data$rep),
-      df = max(1, .data$rep + .data$rep[.data$target_name == control_probe] - 2),
-      t = stats::qt(.05 / 2, .data$df, lower.tail = FALSE),
-      .by = "sample_name"
-    ) |>
-    dplyr::mutate(
-      delta_delta_ct = .data$delta_ct - .data$delta_ct[.data$sample_name == relative_sample],
-      rq = 2^-(.data$delta_delta_ct),
-      rq_min = 2^-(.data$delta_delta_ct + .data$t * .data$delta_ct_se),
-      rq_max = 2^-(.data$delta_delta_ct - .data$t * .data$delta_ct_se),
-      .by = "target_name"
-    )
+  calc_dct <- function(x) {
+    control <- dplyr::filter(x, .data$target_name == control_probe)
+    x$delta_ct <- x$ct_mean - control$ct_mean
+    x$delta_ct_sd <- sqrt(x$ct_sd^2 + control$ct_sd^2)
+    x$delta_ct_se <- x$delta_ct_sd / sqrt(x$rep)
+    x$df <- max(1, x$rep + control$rep - 2)
+    x$t <- stats::qt(0.05 / 2, x$df, lower.tail = FALSE)
+    x
+  }
 
-  left_join(
-    with_ct_summaries, minimal_data_with_deltas,
-    by = dplyr::all_of(colnames(minimal_data))
-  )
+  calc_rq <- function(x) {
+    relative <- dplyr::filter(x, .data$sample_name == relative_sample)
+    x$delta_delta_ct <- x$delta_ct - relative$delta_ct
+    x$rq <- 2^-(x$delta_delta_ct)
+    x$rq_min <- 2^-(x$delta_delta_ct + x$t * x$delta_ct_se)
+    x$rq_max <- 2^-(x$delta_delta_ct - x$t * x$delta_ct_se)
+    x
+  }
+
+  w_rq <- minimal_data |>
+    tapply(~sample_name, calc_dct) |>
+    array2DF() |>
+    dplyr::select(-1) |>
+    tapply(~target_name, calc_rq) |>
+    array2DF() |>
+    dplyr::select(-1)
+
+  dplyr::left_join(used_wells, w_rq, by = c("sample_name", "target_name"))
 }
 
 get_control_probe <- function(df, control_probe) {

diff --git a/R/utils.R b/R/utils.R
@@ -6,12 +6,13 @@ well_data.pcr <- function(x, ...) {
 
 #' Recalcuate standard slope of quantity vs Ct
 #'
-#' @param tidy_pcr a object that has been tidied by `tidy_pcr`
+#' @param tidy_pcr or data.frame object that has been tidied by `tidy_pcr`
 #'
 #' @return a tibble with an updated `slope` column
 #' @export
 pcr_calc_slope <- function(tidy_pcr) {
-  tidy_pcr$data$well_data$slope <- stats::lm(ct~log10(quantity), data = tidy_pcr$data$well_data)$coefficients[2] |> unname()
+  if (inherits(tidy_pcr, "pcr")) tidy_pcr <- tidy_pcr$data$well_data
+  tidy_pcr$slope <- stats::lm(ct ~ log10(quantity), data = tidy_pcr)$coefficients[2] |> unname()
   tidy_pcr
 }
 

diff --git a/man/pcr_lib_calc.Rd b/man/pcr_lib_calc.Rd
diff --git a/man/pcr_plate_view.Rd b/man/pcr_plate_view.Rd
diff --git a/man/pcr_rq.Rd b/man/pcr_rq.Rd
diff --git a/vignettes/analyzing-ddctqrtpcr.Rmd b/vignettes/analyzing-ddctqrtpcr.Rmd
@@ -34,24 +34,24 @@ library(ggplot2)
 library(stringr)
 ```
 
-We have a particular advantage because although the data are untidy, they are untidy in a very specific way. Reading in data is as simple as running `pcr_tidy`, which is given a path to the pcr .xls(x) file. If no path is given, an interactive file explorer window will appear for the user to choose the file.
+We have a particular advantage because although the data are untidy, they are untidy in a very specific way. Reading in data is as simple as running `read_pcr` and `scrub` (re-exports from the [`mop`](https://github.com/KaiAragaki/mop) package), which is given a path to the pcr .xls(x) file. If no path is given, an interactive file explorer window will appear for the user to choose the file.
 
 ```{r read_in_data}
-dat <- 
-  system.file("extdata", "untidy-pcr-example-2.xlsx", package = "amplify") |> 
-  pcr_tidy()
+dat <- system.file("extdata", "untidy-pcr-example-2.xlsx", package = "amplify") |>
+  read_pcr() |>
+  scrub()
 dat
 ```
 
-The typical format of PCR data is woefully non-rectangular. `pcr_tidy` fixes this by skipping to the good bits, but also pulling out useful metadata supplied in the header and footer of the dataset - like the last few columns:
+The typical format of PCR data is woefully non-rectangular. `amplify` fixes this by skipping to the good bits, but also pulling out useful metadata supplied in the header and footer of the dataset - like the last few columns:
 
 ```{r}
-select(dat, plate_type:ref_samp)
+select(dat, analysis_type:reference_sample)
 ```
 
 # Plate Plotting with `pcr_plate_view`
 
-`pcr_tidy` also adds two features derived from the `well_position` feature into two separate features - `well_row` and `well_col`. This makes downstream plotting particularly easy. A built in function can do this automatically for us:
+`pcr_plate_view` makes it very easy to get a bird's-eye view of the plate:
 
 ```{r plot_plate_target}
 pcr_plate_view(dat)
@@ -68,15 +68,16 @@ pcr_plate_view(dat, "sample_name")
 We can also look at CT:
 
 ```{r plot_plate_ct}
-pcr_plate_view(dat, fill = "ct") + 
-  scale_fill_viridis_c()
+pcr_plate_view(dat, fill = ct) +
+  scale_color_viridis_c(end = 0.9)
 ```
 
 Looks like some didn't amplify at all. 
 You might consider looking at it with `rq` but...
 
 ```{r plot_plate_rq}
-pcr_plate_view(dat, fill = "rq") + scale_fill_viridis_c()
+pcr_plate_view(dat, fill = rq) +
+  scale_color_viridis_c()
 ```
 
 Immediately we see a loss of information: Not only are the scales between
@@ -99,16 +100,16 @@ We notice the differences between cell lines, but the large dynamic range makes
 First, we need to extract the cell line from the sample name. Additionally, I don't like how it says "Zeb1" and "Zeb2" instead of "ZEB1" and "ZEB2" (the capitalization may mislead people into thinking this is murine, when it is in fact human). I'll change that here.
 
 ```{r}
-dat <- dat |> 
-  mutate(cell_line = str_extract(sample_name, "^.*(?=[:space:])")) |> 
+dat <- dat |>
+  mutate(cell_line = str_extract(sample_name, "^.*(?=[:space:])")) |>
   mutate(target_name = str_replace(target_name, "Zeb", "ZEB"))
 ```
 
 As an example, let's look at UC14:
 
 ```{r}
-uc14 <- dat |> 
-  filter(cell_line == "UC14") |> 
+uc14 <- dat |>
+  filter(cell_line == "UC14") |>
   pcr_rq("UC14 PBS")
 
 pcr_plot(uc14)
@@ -117,7 +118,7 @@ pcr_plot(uc14)
 I personally prefer it when the control is on the left and the experimental conditions are on the right - let's flip them. While we're at it, let's get rid of the legend - it doesn't tell us any extra information and is just taking up space.
 
 ```{r}
-uc14 <- uc14 |> 
+uc14 <- uc14 |>
   mutate(sample_name = factor(sample_name, levels = c("UC14 PBS", "UC14 Drug")))
 
 pcr_plot(uc14) +
@@ -141,10 +142,10 @@ Umm...it doesn't look any different. And that's what we should expect: the plott
 
 ```{r}
 wise <- uc14 |> 
-  filter(well_position != "C24") %>%
+  filter(well_position != "C24") |>
   pcr_rq("UC14 PBS")
 
 pcr_plot(wise)
 ```
 
-Now, we see that the ZEB1 values have updated to reflect the ommision we made.
+Now, we see that the ZEB1 values have updated to reflect the omission we made.