1.0.1

nuno-agostinho · Nov 4, 2018 · ecb24eb · ecb24eb
2 parents 2611ec8 + 74e338b
commit ecb24eb
Show file tree

Hide file tree

Showing 36 changed files with 179 additions and 90 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,10 +1,12 @@
 Package: cTRAP
-Title: Identification of potential transcriptome perturbations
-Version: 0.99.12
-Authors@R: c(person("Nuno", "Agostinho", 
-        email="[email protected]", role=c("aut", "cre")),
-        person("Bernardo", "de Almeida", role="aut"),
-        person(c("Nuno", "Luís"), "Barbosa-Morais", role=c("aut", "led")))
+Title: Identification of candidate causal perturbations from differential gene 
+    expression data
+Version: 1.0.1
+Authors@R: c(
+        person(c("Bernardo", "P."), "de Almeida", role="aut"),
+        person("Nuno", "Saraiva-Agostinho",
+            email="[email protected]", role=c("aut", "cre")),
+        person(c("Nuno", "L."), "Barbosa-Morais", role=c("aut", "led")))
 Description: Compare differential gene expression results with those from known
     cellular perturbations (such as gene knock-down, overexpression or small 
     molecules) derived from the Connectivity Map. Such analyses allow not only 
@@ -21,15 +23,15 @@ URL: https://github.com/nuno-agostinho/cTRAP
 BugReports: https://github.com/nuno-agostinho/cTRAP/issues
 Suggests: testthat,
     knitr,
-    covr
+    covr,
+    biomaRt
 RoxygenNote: 6.1.0
 Imports: data.table,
     limma,
     stats,
     fgsea,
     pbapply,
     plyr,
-    biomaRt,
     cowplot,
     ggplot2,
     rhdf5,

diff --git a/NAMESPACE b/NAMESPACE
@@ -12,9 +12,6 @@ export(performDifferentialExpression)
 export(plotL1000comparison)
 export(prepareENCODEgeneExpression)
 importFrom(R.utils,gunzip)
-importFrom(biomaRt,getBM)
-importFrom(biomaRt,useDataset)
-importFrom(biomaRt,useMart)
 importFrom(cowplot,plot_grid)
 importFrom(data.table,data.table)
 importFrom(data.table,fread)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,14 @@
-# 0.0.0.9000
-
-* Added a `NEWS.md` file to track changes to the package.
-
-
-
+# 1.0.1 (2 November, 2018)
+
+* Update title, author names, version and README
+* Remove biomaRt dependency
+* By default, `getL1000conditions` now shows CMap perturbation types except for
+controls
+* Compare against CMap perturbations (`compareAgainstL1000` function):
+    - Remove "_t" from resulting column names (as the t-statistic may or may not
+    be used)
+    - Select p-value adjustment method when performing correlation analyses
+    (Benjamini-Hochberg is set by default)
+* Documentation:
+    - Fix obsolete function calls in function documentation
+    - Hide non-exported functions from reference PDF manual
diff --git a/R/ENCODE.R b/R/ENCODE.R
@@ -4,6 +4,7 @@
 #' @param table Data frame
 #'
 #' @return Character vector with respective experiment identifiers
+#' @keywords internal
 getENCODEcontrols <- function(control, table) {
     sub <- table[table$`Experiment accession` == control, ]
     exp <- sub$`File accession`
@@ -90,6 +91,7 @@ downloadENCODEknockdownMetadata <- function(cellLine=NULL, gene=NULL) {
 #'
 #' @importFrom data.table fread
 #' @return Data table with ENCODE sample data
+#' @keywords internal
 loadENCODEsample <- function (metadata, replicate, control=FALSE) {
     metadata <- metadata[metadata$`Biological replicate(s)` == replicate, ]
 

diff --git a/R/L1000.R b/R/L1000.R
@@ -62,19 +62,25 @@ downloadL1000data <- function(file, type=c("metadata", "geneInfo", "zscores"),
 #'
 #' Downloads metadata if not available
 #'
-#' @param metadata frame: L1000 metadata
+#' @param metadata Data table: L1000 metadata
+#' @param control Boolean: show controls for perturbation types?
 #'
 #' @return List of conditions in L1000 datasets
 #' @export
 #'
 #' @examples
 #' data("l1000metadata")
-#' # l1000metadata <- downloadL1000metadata("l1000metadata.txt")
+#' # l1000metadata <- downloadL1000data("l1000metadata.txt", "metadata")
 #' getL1000conditions(l1000metadata)
-getL1000conditions <- function(metadata) {
+getL1000conditions <- function(metadata, control=FALSE) {
     pertTypes <- getL1000perturbationTypes()
     pertTypes <- names(pertTypes)[pertTypes %in% unique(metadata$pert_type)]
 
+    if (!control) {
+        pertTypes <- grep("Control", pertTypes, value=TRUE, invert=TRUE,
+                          fixed=TRUE)
+    }
+
     list("Perturbation type"=pertTypes,
          "Cell line"=unique(metadata$cell_id),
          "Dosage"=unique(metadata$pert_idose),
@@ -91,8 +97,9 @@ getL1000conditions <- function(metadata) {
 #' @importFrom stats cor.test p.adjust
 #'
 #' @return Data frame with correlations statistics, p-value and q-value
+#' @keywords internal
 correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations,
-                                 method) {
+                                 method, pAdjustMethod="BH") {
     cat(paste("Comparing with cell line", cellLine), fill=TRUE)
     perturbation <- perturbations[
         , tolower(attr(perturbations, "cellLines")) == tolower(cellLine)]
@@ -113,11 +120,11 @@ correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations,
 
     cor <- sapply(cors, "[[", "estimate")
     pval <- sapply(cors, "[[", "p.value")
-    qval <- p.adjust(pval)
+    qval <- p.adjust(pval, pAdjustMethod)
     names(cor) <- names(pval) <- names(qval) <- colnames(perturbation)
 
     res <- data.table(names(cor), cor, pval, qval)
-    names(res) <- c("genes", sprintf("%s_t_%s_%s", cellLine, method,
+    names(res) <- c("genes", sprintf("%s_%s_%s", cellLine, method,
                                      c("coef", "pvalue", "qvalue")))
     attr(res, "perturbation") <- ref
     return(res)
@@ -135,6 +142,7 @@ correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations,
 #'
 #' @return Data frame containing gene set enrichment analysis (GSEA) results per
 #' cell line
+#' @keywords internal
 performGSAperCellLine <- function(cellLine, perturbations, pathways) {
     perturbation <- perturbations[
         , tolower(attr(perturbations, "cellLines")) == tolower(cellLine)]
@@ -181,14 +189,16 @@ performGSAperCellLine <- function(cellLine, perturbations, pathways) {
 #'   where the name of the vector are gene names and the values are a statistic
 #'   that represents significance and magnitude of differentially expressed
 #'   genes (e.g. t-statistics)
-#' @param geneSize Number: top and bottom differentially expressed genes to use
-#'   for gene set enrichment (GSE); if \code{method} is not \code{gsea}, this
-#'   argument does nothing
 #' @param perturbations \code{l1000perturbations} object: file with L1000 loaded
 #'   perturbations (check \code{\link{loadL1000perturbations}})
 #' @param cellLine Character: cell line(s)
 #' @param method Character: comparison method (\code{spearman}, \code{pearson}
 #'   or \code{gsea})
+#' @param geneSize Number: top and bottom differentially expressed genes to use
+#'   for gene set enrichment (GSE) (only used if \code{method} is \code{gsea})
+#' @param pAdjustMethod Character: method for p-value adjustment (for more
+#'   details, see \code{\link{p.adjust.methods}}; only used if \code{method} is
+#'   \code{spearman} or \code{pearson})
 #'
 #' @importFrom data.table setkeyv
 #' @importFrom piano loadGSC
@@ -216,12 +226,12 @@ performGSAperCellLine <- function(cellLine, perturbations, pathways) {
 #' compareAgainstL1000(diffExprStat, perturbations, cellLine, method="gsea")
 compareAgainstL1000 <- function(diffExprGenes, perturbations, cellLine,
                                 method=c("spearman", "pearson", "gsea"),
-                                geneSize=150) {
+                                geneSize=150, pAdjustMethod="BH") {
     method <- match.arg(method)
     if (method %in% c("spearman", "pearson")) {
-        cellLineRes <- lapply(cellLine, correlatePerCellLine,
-                              diffExprGenes, perturbations, method)
-        colnameSuffix <- sprintf("_t_%s_coef", method)
+        cellLineRes <- lapply(cellLine, correlatePerCellLine, diffExprGenes,
+                              perturbations, method, pAdjustMethod)
+        colnameSuffix <- sprintf("_%s_coef", method)
     } else if (method == "gsea") {
         ordered     <- order(diffExprGenes)
         topGenes    <- names(diffExprGenes)[head(ordered, geneSize)]
@@ -296,7 +306,6 @@ filterL1000metadata <- function(metadata, cellLine=NULL, timepoint=NULL,
 
 #' Load L1000 perturbation data
 #'
-#' @inheritParams downloadL1000metadata
 #' @param metadata Data frame: L1000 Metadata
 #' @param zscores Data frame: GCTX z-scores
 #' @param geneInfo Data frame: L1000 gene info
@@ -309,10 +318,11 @@ filterL1000metadata <- function(metadata, cellLine=NULL, timepoint=NULL,
 #' @export
 #' @examples
 #' if (interactive()) {
-#'   metadata <- downloadL1000metadata("l1000metadata.txt")
+#'   metadata <- downloadL1000data("l1000metadata.txt", "metadata")
 #'   metadata <- filterL1000metadata(metadata, cellLine="HepG2")
-#'   zscores  <- downloadL1000zscores("l1000zscores.gctx", metadata$sig_id)
-#'   geneInfo <- downloadL1000geneInfo("l1000geneInfo.txt")
+#'   zscores  <- downloadL1000data("l1000zscores.gctx", "zscores",
+#'       metadata$sig_id)
+#'   geneInfo <- downloadL1000data("l1000geneInfo.txt", "geneInfo")
 #'   loadL1000perturbations(metadata, zscores, geneInfo)
 #' }
 loadL1000perturbations <- function(metadata, zscores, geneInfo,

diff --git a/R/cTRAP-package.r b/R/cTRAP-package.r
@@ -65,6 +65,7 @@ NULL
 #'
 #' @name counts
 #' @docType data
+#' @keywords internal
 NULL
 
 #' Differential expression's t-statistics sample
@@ -100,6 +101,7 @@ NULL
 #'
 #' @name diffExprStat
 #' @docType data
+#' @keywords internal
 NULL
 
 #' ENCODE metadata sample
@@ -115,6 +117,7 @@ NULL
 #'
 #' @name ENCODEmetadata
 #' @docType data
+#' @keywords internal
 NULL
 
 #' L1000 metadata
@@ -130,6 +133,7 @@ NULL
 #'
 #' @name l1000metadata
 #' @docType data
+#' @keywords internal
 NULL
 
 #' L1000 perturbations sample for knockdown experiments
@@ -170,6 +174,7 @@ NULL
 #'
 #' @name l1000perturbationsKnockdown
 #' @docType data
+#' @keywords internal
 NULL
 
 #' L1000 perturbations sample for small molecules
@@ -213,6 +218,7 @@ NULL
 #'
 #' @name l1000perturbationsSmallMolecules
 #' @docType data
+#' @keywords internal
 NULL
 
 #' Sample of ENCODE samples
@@ -240,4 +246,5 @@ NULL
 #'
 #' @name ENCODEsamples
 #' @docType data
+#' @keywords internal
 NULL
diff --git a/R/cmapR_subset.R b/R/cmapR_subset.R
@@ -20,6 +20,7 @@
 #' @seealso \url{http://clue.io/help} for more information on the GCT format
 #'
 #' @source https://github.com/cmap/cmapR
+#' @keywords internal
 setClass("GCT", representation(
     mat = "matrix", rid = "character", cid = "character", rdesc = "data.frame",
     cdesc = "data.frame", version = "character", src = "character"))
@@ -37,11 +38,10 @@ setClass("GCT", representation(
 #' @details This is a low-level helper function
 #'   which most users will not need to access directly
 #'
-#' @return meta the same data frame with (potentially) adjusted
-#'   column types.
+#' @return meta the same data frame with (potentially) adjusted column types
+#' @keywords internal
 #'
 #' @family GCTX parsing functions
-#' @keywords internal
 #'
 #' @source https://github.com/cmap/cmapR
 fix.datatypes <- function(meta) {
@@ -92,6 +92,7 @@ fix.datatypes <- function(meta) {
 #' @importFrom rhdf5 h5read
 #'
 #' @return a \code{data.frame} of metadata
+#' @keywords internal
 #'
 #' @source https://github.com/cmap/cmapR
 #'
@@ -143,6 +144,7 @@ readGctxMeta <- function(gctx_path, dimension="row", ids=NULL,
 #' @param dimension which ids to read (row or column)
 #'
 #' @return a character vector of row or column ids from the provided file
+#' @keywords internal
 #'
 #' @source https://github.com/cmap/cmapR
 #'
@@ -410,6 +412,7 @@ setMethod("initialize", signature = "GCT", definition = function(
 #' @importFrom utils packageVersion
 #'
 #' @return Closes all open identifiers
+#' @keywords internal
 closeOpenHandles <- function() {
     if(packageVersion('rhdf5') < "2.23.0")
         rhdf5::H5close()
@@ -428,6 +431,7 @@ closeOpenHandles <- function() {
 #'   columns of \code{df}
 #'
 #' @source https://github.com/cmap/cmapR
+#' @keywords internal
 checkColnames <- function(test_names, df, throw_error=TRUE) {
     # check whether test_names are valid names in df
     # throw error if specified
@@ -451,7 +455,6 @@ checkColnames <- function(test_names, df, throw_error=TRUE) {
 #' @return a subset version of \code{df}
 #'
 #' @source https://github.com/cmap/cmapR
-#'
 #' @keywords internal
 subsetToIds <- function(df, ids) {
     # helper function to do a robust df subset

diff --git a/R/plots.R b/R/plots.R
@@ -12,6 +12,7 @@
 #' @importFrom stats na.omit
 #'
 #' @return Grid of plots illustrating a GSEA plot
+#' @keywords internal
 plotGSEA <- function(pathways, stats, title="GSEA plot") {
     # Custom
     axis_title_size <- 12