Merge pull request #20 from nuno-agostinho/dev

Dev
nuno-agostinho · May 20, 2021 · e8fe1b6 · e8fe1b6
2 parents 9f8a667 + c46c25c
commit e8fe1b6
Show file tree

Hide file tree

Showing 55 changed files with 2,292 additions and 952 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,7 @@ docs/
 expressionDrugSensitivity*.rds
 molecular_descriptors*.rds
 ENCODEmetadata.RDS
+*.txt
+*.xls
+*.h5
+.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: cTRAP
 Title: Identification of candidate causal perturbations from differential gene 
     expression data
-Version: 1.8
+Version: 1.10.0
 Authors@R: c(
         person(c("Bernardo", "P."), "de Almeida", role="aut"),
         person("Nuno", "Saraiva-Agostinho",
@@ -13,7 +13,7 @@ Description: Compare differential gene expression results with those from known
     to infer the molecular causes of the observed difference in gene expression 
     but also to identify small molecules that could drive or revert specific 
     transcriptomic alterations.
-Depends: R (>= 3.6.0)
+Depends: R (>= 4.0)
 License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
@@ -35,6 +35,7 @@ Imports: biomaRt,
     data.table,
     dplyr,
     DT,
+    fastmatch,
     fgsea,
     ggplot2,
     ggrepel,
@@ -43,6 +44,7 @@ Imports: biomaRt,
     httr,
     limma,
     methods,
+    parallel,
     pbapply,
     R.utils,
     readxl,
@@ -51,6 +53,7 @@ Imports: biomaRt,
     scales,
     shiny,
     stats,
+    tibble,
     tools,
     utils
 VignetteBuilder: knitr

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,8 +1,11 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method("[",expressionDrugSensitivityAssociation)
 S3method("[",perturbationChanges)
 S3method(as.table,referenceComparison)
+S3method(dim,expressionDrugSensitivityAssociation)
 S3method(dim,perturbationChanges)
+S3method(dimnames,expressionDrugSensitivityAssociation)
 S3method(dimnames,perturbationChanges)
 S3method(plot,perturbationChanges)
 S3method(plot,referenceComparison)
@@ -15,6 +18,7 @@ export(getCMapConditions)
 export(getCMapPerturbationTypes)
 export(launchCMapDataLoader)
 export(launchDiffExprLoader)
+export(launchDrugSetEnrichmentAnalyser)
 export(launchMetadataViewer)
 export(launchResultPlotter)
 export(listExpressionDrugSensitivityAssociation)
@@ -33,9 +37,11 @@ export(prepareDrugSets)
 export(prepareENCODEgeneExpression)
 export(rankSimilarPerturbations)
 importFrom(DT,DTOutput)
+importFrom(DT,dataTableProxy)
 importFrom(DT,datatable)
 importFrom(DT,formatSignif)
 importFrom(DT,renderDT)
+importFrom(DT,replaceData)
 importFrom(R.utils,capitalize)
 importFrom(R.utils,gunzip)
 importFrom(R.utils,isGzipped)
@@ -54,6 +60,7 @@ importFrom(data.table,fread)
 importFrom(data.table,setkeyv)
 importFrom(data.table,transpose)
 importFrom(dplyr,bind_rows)
+importFrom(fastmatch,fmatch)
 importFrom(fgsea,calcGseaStat)
 importFrom(fgsea,fgsea)
 importFrom(ggplot2,aes)
@@ -107,22 +114,32 @@ importFrom(limma,topTable)
 importFrom(limma,voom)
 importFrom(methods,is)
 importFrom(methods,new)
+importFrom(parallel,mclapply)
 importFrom(pbapply,closepb)
 importFrom(pbapply,getpb)
-importFrom(pbapply,pbapply)
 importFrom(pbapply,pblapply)
 importFrom(pbapply,setpb)
 importFrom(pbapply,startpb)
 importFrom(readxl,read_excel)
 importFrom(reshape2,dcast)
+importFrom(rhdf5,h5closeAll)
+importFrom(rhdf5,h5createDataset)
+importFrom(rhdf5,h5createFile)
+importFrom(rhdf5,h5ls)
 importFrom(rhdf5,h5read)
+importFrom(rhdf5,h5readAttributes)
+importFrom(rhdf5,h5write)
 importFrom(scales,extended_breaks)
+importFrom(shiny,HTML)
 importFrom(shiny,NS)
 importFrom(shiny,actionButton)
 importFrom(shiny,brushedPoints)
 importFrom(shiny,checkboxGroupInput)
 importFrom(shiny,column)
+importFrom(shiny,div)
 importFrom(shiny,fluidRow)
+importFrom(shiny,h3)
+importFrom(shiny,helpText)
 importFrom(shiny,hr)
 importFrom(shiny,isolate)
 importFrom(shiny,mainPanel)
@@ -133,6 +150,7 @@ importFrom(shiny,observeEvent)
 importFrom(shiny,plotOutput)
 importFrom(shiny,reactive)
 importFrom(shiny,renderPlot)
+importFrom(shiny,renderUI)
 importFrom(shiny,runApp)
 importFrom(shiny,selectizeInput)
 importFrom(shiny,shinyApp)
@@ -142,6 +160,7 @@ importFrom(shiny,stopApp)
 importFrom(shiny,tabPanel)
 importFrom(shiny,tagList)
 importFrom(shiny,tags)
+importFrom(shiny,uiOutput)
 importFrom(shiny,updateSelectizeInput)
 importFrom(stats,aggregate)
 importFrom(stats,cor)
@@ -151,6 +170,7 @@ importFrom(stats,na.omit)
 importFrom(stats,p.adjust)
 importFrom(stats,quantile)
 importFrom(stats,setNames)
+importFrom(tibble,tibble)
 importFrom(tools,file_ext)
 importFrom(tools,file_path_sans_ext)
 importFrom(utils,askYesNo)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,62 @@
+# cTRAP 1.10.0 (18 March, 2021)
+
+## Improvements to graphical interface functions:
+
+* New `launchDrugSetEnrichmentAnalysis()` function to analyse drug set
+enrichment and visualize respective results
+* `launchCMapDataLoader()`:
+    - Now allows to load multiple CMap perturbation types simultaneously
+    - Keep selected timepoint, dosage and cell line options when selecting
+    another perturbation type
+    - Add bubble plot of CMap perturbation types
+* `launchResultPlotter()`:
+    - Now allows to view tables below specific plots and drag-and-select those
+    plots to filter data in those same tables
+    - When plotting targeting drugs and similar perturbations, update available
+    columns and correctly use user-selected column to plot
+* `launchMetadataViewer()` now correctly parses values from `Input`
+attributes as numeric
+
+## Major changes
+
+* `prepareCMapPerturbations()`: directly set perturbation type, cell line,
+timepoint and dosage conditions as arguments
+* `rankSimilarPerturbations()` and `predictTargetingDrugs()`:
+    - Avoid redundant loading of data chunks, slightly decreasing run time
+    - Lower memory footprint when using NCI60's gene expression and drug
+    sensitivity association (now available in HDF5 files) by loading and
+    processing data in chunks
+    - Faster GSEA-based score calculation (up to 4-7 times faster)
+    - New `threads` argument allows to set number of parallel threads (not
+    supported on Windows)
+    - New `chunkGiB` argument allows to set size of data chunks when reading
+    from supported HDF5 files (decreases peak RAM usage)
+    - New `verbose` argument allows to increase details printed in the console
+* `prepareDrugSets()`: allow greater control on the creation of bins based on
+numeric columns, including the setting of maximum number of bins per column and
+minimum bin size
+* `analyseDrugSetEnrichment()` and `plotDrugSetEnrichment()`: allow to select
+columns to use when comparing compound identifiers between datasets
+
+## Bug fixes and minor changes
+
+* `filterCMapMetadata()`: allow filtering CMap metadata based on multiple
+perturbation types
+* `prepareDrugSets()`: fix issues with 3D descriptors containing missing values
+* `plot()`:
+    - Fix wrong labels when plotting `targetingDrugs` objects
+    - Avoid printing "NA" in labels identifying metadata for perturbations
+* `plotTargetingDrugsVSsimilarPerturbations()`:
+    - Fix highlighting of plot points depending whether drug activity is
+    directly proportional to drug sensitivity
+    - Include rug plot
+* When subsetting a `perturbationChanges` or an
+`expressionDrugSensitivityAssociation` object, passing only one argument
+extracts its columns as in previous versions of cTRAP (similarly to when
+subsetting a `data.frame`)
+* `analyseDrugSetEnrichment()`: for the resulting table, the name of the first
+column was renamed from `pathway` to `descriptor`
+
 # cTRAP 1.8 (23 October, 2020)
 
 ## Interactive functions for loading data and analysing results

diff --git a/R/CMap.R b/R/CMap.R
@@ -103,7 +103,7 @@ prepareCMapZscores <- function(file, zscoresID=NULL) {
 #' @param data \code{perturbationChanges} object
 #' @param inheritAttrs Boolean: convert to \code{perturbationChanges} object and
 #'   inherit attributes from \code{data}?
-#' @param verbose Boolean: print messages?
+#' @param verbose Boolean: print additional details?
 #'
 #' @family functions related with the ranking of CMap perturbations
 #' @return Matrix containing CMap perturbation z-scores (genes as rows,
@@ -339,8 +339,8 @@ filterCMapMetadata <- function(metadata, cellLine=NULL, timepoint=NULL,
 
     if (!is.null(perturbationType)) {
         filter$perturbationType <- perturbationType
-        tmp <- getCMapPerturbationTypes()[perturbationType]
-        if (!is.na(tmp)) perturbationType <- tmp
+        tmp <- getCMapPerturbationTypes(control=TRUE)[perturbationType]
+        if (!all(is.na(tmp))) perturbationType <- tmp
         metadata <- metadata[metadata$pert_type %in% perturbationType, ]
     }
     if (length(filter) > 0) attr(metadata, "filter") <- filter
@@ -358,10 +358,11 @@ filterCMapMetadata <- function(metadata, cellLine=NULL, timepoint=NULL,
 #'   filepath to load data from file)
 #' @param compoundInfo Data frame (CMap compound info) or character (respective
 #'   filepath to load data from file)
+#' @inheritDotParams filterCMapMetadata
 #' @param loadZscores Boolean: load matrix of perturbation z-scores? Not
 #'   recommended in systems with less than 30GB of RAM; if \code{FALSE},
-#'   downstream functions will read the file chunk by chunk (this strategy
-#'   impacts performance at the expense of a much lower memory footprint)
+#'   downstream functions will load and process the file directly chunk by
+#'   chunk, resulting in a lower memory footprint
 #'
 #' @importFrom R.utils gunzip
 #' @importFrom methods new
@@ -377,8 +378,11 @@ filterCMapMetadata <- function(metadata, cellLine=NULL, timepoint=NULL,
 #' prepareCMapPerturbations(metadata, "cmapZscores.gctx", "cmapGeneInfo.txt")
 #' }
 prepareCMapPerturbations <- function(metadata, zscores, geneInfo,
-                                     compoundInfo=NULL, loadZscores=FALSE) {
+                                     compoundInfo=NULL, ...,
+                                     loadZscores=FALSE) {
     if (is.character(metadata)) metadata <- loadCMapData(metadata, "metadata")
+    if (!is.null(list(...))) metadata <- filterCMapMetadata(metadata, ...)
+
     if (is.character(geneInfo)) geneInfo <- loadCMapData(geneInfo, "geneInfo")
     if (is.character(zscores)) {
         zscores <- loadCMapData(zscores, "zscores", metadata$sig_id)
@@ -406,7 +410,7 @@ prepareCMapPerturbations <- function(metadata, zscores, geneInfo,
     # Display summary message of loaded perturbations
     filters <- attr(metadata, "filter")
     summaryMsg <- sprintf(
-        "\nSummary: %s CMap perturbations measured across %s genes",
+        "\nSummary: %s CMap perturbations and %s genes",
         ncol(zscores), nrow(zscores))
     if (!is.null(filters)) {
         filterNames <- c("cellLine"="Cell lines",
@@ -434,6 +438,8 @@ prepareCMapPerturbations <- function(metadata, zscores, geneInfo,
 #' @param metadata Data table: \code{data} metadata
 #' @inheritParams rankSimilarPerturbations
 #'
+#' @importFrom dplyr bind_rows
+#'
 #' @return A list with two items:
 #' \describe{
 #' \item{\code{data}}{input \code{data} with extra rows containing cell line
@@ -492,24 +498,16 @@ calculateCellLineMean <- function(data, cellLine, metadata, rankPerCellLine) {
     return(res)
 }
 
-#' Rank CMap perturbations' similarity to a differential expression profile
+#' Rank differential expression profile against CMap perturbations by similarity
 #'
 #' Compare differential expression results against CMap perturbations.
 #'
-#' @param method Character: comparison method (\code{spearman}, \code{pearson}
-#'   or \code{gsea}; multiple methods may be selected at once)
+#' @inherit rankAgainstReference
 #' @param perturbations \code{perturbationChanges} object: CMap perturbations
-#'   (check \code{\link{prepareCMapPerturbations}})
-#' @inheritParams compareAgainstReference
-#'
-#' @section GSEA score:
-#' Weighted connectivity scores (WTCS) are calculated when \code{method
-#'   = "gsea"} (\url{https://clue.io/connectopedia/cmap_algorithms}).
+#'   (check \code{\link{prepareCMapPerturbations}()})
 #'
 #' @aliases compareAgainstCMap
 #' @family functions related with the ranking of CMap perturbations
-#' @return Data table with correlation or GSEA results comparing differential
-#'   expression values with those associated with CMap perturbations
 #' @export
 #'
 #' @examples
@@ -539,13 +537,15 @@ calculateCellLineMean <- function(data, cellLine, metadata, rankPerCellLine) {
 rankSimilarPerturbations <- function(input, perturbations,
                                      method=c("spearman", "pearson", "gsea"),
                                      geneSize=150, cellLineMean="auto",
-                                     rankPerCellLine=FALSE) {
+                                     rankPerCellLine=FALSE, threads=1,
+                                     chunkGiB=1, verbose=FALSE) {
     metadata  <- attr(perturbations, "metadata")
     cellLines <- length(unique(metadata$cell_id))
-    rankedPerts <- compareAgainstReference(
+    rankedPerts <- rankAgainstReference(
         input, perturbations, method=method, geneSize=geneSize,
         cellLines=cellLines, cellLineMean=cellLineMean, rankByAscending=TRUE,
-        rankPerCellLine=rankPerCellLine)
+        rankPerCellLine=rankPerCellLine, threads=threads, chunkGiB=chunkGiB,
+        verbose=verbose)
 
     # Relabel the "identifier" column name to be more descriptive
     pertType <- unique(metadata$pert_type)
@@ -575,7 +575,7 @@ rankSimilarPerturbations <- function(input, perturbations,
 #' @param perturbation Character (perturbation identifier) or a
 #'   \code{similarPerturbations} table (from which the respective perturbation
 #'   identifiers are retrieved)
-#' @inheritParams compareAgainstReferencePerMethod
+#' @inheritParams compareWithAllMethods
 #' @inheritParams plot.referenceComparison
 #' @param title Character: plot title (if \code{NULL}, the default title depends
 #'   on the context; ignored when plotting multiple perturbations)
@@ -644,7 +644,7 @@ plotPerturbationChanges <- function(x, perturbation, input,
     if (!isSummaryPert) cellLinePerts <- perturbation
     names(cellLinePerts) <- cellLinePerts
     if (is.character(x)) {
-        zscores <- loadCMapZscores(x[cellLinePerts], verbose=FALSE)
+        zscores <- loadCMapZscores(x[ , cellLinePerts], verbose=FALSE)
     } else {
         zscores <- unclass(x)
     }
@@ -685,26 +685,7 @@ plotPerturbationChanges <- function(x, perturbation, input,
 #' @export
 `[.perturbationChanges` <- function(x, i, j, drop=FALSE, ...) {
     if (is.character(x)) {
-        out <- x
-        nargs <- nargs() - length(list(...)) - 1
-
-        hasI <- !missing(i)
-        hasJ <- !missing(j)
-        genes <- attr(out, "genes")
-        perts <- attr(out, "perturbations")
-        # Allow to search based on characters
-        names(genes) <- genes
-        names(perts) <- perts
-
-        if (nargs == 2) {
-            if (hasI) genes <- genes[i]
-            if (hasJ) perts <- perts[j]
-        } else if (hasI && nargs == 1) {
-            perts <- perts[i]
-        }
-        if (anyNA(perts) || anyNA(genes)) stop("subscript out of bounds")
-        attr(out, "genes") <- unname(genes)
-        attr(out, "perturbations") <- unname(perts)
+        out <- subsetData(x, i, j, "genes", "perturbations", nargs(), ...)
     } else {
         out <- NextMethod("[", drop=drop)
     }

diff --git a/R/cmapR_subset.R b/R/cmapR_subset.R
@@ -103,8 +103,7 @@ readGctxMeta <- function(gctx_path, dimension="row", ids=NULL,
     for (i in seq(length(fields))) {
         field <- fields[i]
         # Remove any trailing spaces and cast as vector
-        annots[,i] <- as.vector(gsub("\\s*$", "", raw_annots[[field]],
-                                     perl=TRUE))
+        annots[,i] <- as.vector(trimws(raw_annots[[field]], which="right"))
     }
     annots <- fix.datatypes(annots)
     # Subset to the provided set of identifiers, if any
@@ -142,7 +141,7 @@ readGctxIds <- function(gctx_path, dimension="row") {
     } else {
         name <- "0/META/COL/id"
     }
-    ids <- gsub("\\s*$", "", h5read(gctx_path, name=name), perl=TRUE)
+    ids <- trimws(h5read(gctx_path, name=name), which="right")
     ids <- as.character(ids)
     return(ids)
 }