Skip to content

Commit

Permalink
Merge pull request #20 from nuno-agostinho/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
nuno-agostinho authored May 20, 2021
2 parents 9f8a667 + c46c25c commit e8fe1b6
Show file tree
Hide file tree
Showing 55 changed files with 2,292 additions and 952 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,7 @@ docs/
expressionDrugSensitivity*.rds
molecular_descriptors*.rds
ENCODEmetadata.RDS
*.txt
*.xls
*.h5
.DS_Store
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: cTRAP
Title: Identification of candidate causal perturbations from differential gene
expression data
Version: 1.8
Version: 1.10.0
Authors@R: c(
person(c("Bernardo", "P."), "de Almeida", role="aut"),
person("Nuno", "Saraiva-Agostinho",
Expand All @@ -13,7 +13,7 @@ Description: Compare differential gene expression results with those from known
to infer the molecular causes of the observed difference in gene expression
but also to identify small molecules that could drive or revert specific
transcriptomic alterations.
Depends: R (>= 3.6.0)
Depends: R (>= 4.0)
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Expand All @@ -35,6 +35,7 @@ Imports: biomaRt,
data.table,
dplyr,
DT,
fastmatch,
fgsea,
ggplot2,
ggrepel,
Expand All @@ -43,6 +44,7 @@ Imports: biomaRt,
httr,
limma,
methods,
parallel,
pbapply,
R.utils,
readxl,
Expand All @@ -51,6 +53,7 @@ Imports: biomaRt,
scales,
shiny,
stats,
tibble,
tools,
utils
VignetteBuilder: knitr
Expand Down
22 changes: 21 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# Generated by roxygen2: do not edit by hand

S3method("[",expressionDrugSensitivityAssociation)
S3method("[",perturbationChanges)
S3method(as.table,referenceComparison)
S3method(dim,expressionDrugSensitivityAssociation)
S3method(dim,perturbationChanges)
S3method(dimnames,expressionDrugSensitivityAssociation)
S3method(dimnames,perturbationChanges)
S3method(plot,perturbationChanges)
S3method(plot,referenceComparison)
Expand All @@ -15,6 +18,7 @@ export(getCMapConditions)
export(getCMapPerturbationTypes)
export(launchCMapDataLoader)
export(launchDiffExprLoader)
export(launchDrugSetEnrichmentAnalyser)
export(launchMetadataViewer)
export(launchResultPlotter)
export(listExpressionDrugSensitivityAssociation)
Expand All @@ -33,9 +37,11 @@ export(prepareDrugSets)
export(prepareENCODEgeneExpression)
export(rankSimilarPerturbations)
importFrom(DT,DTOutput)
importFrom(DT,dataTableProxy)
importFrom(DT,datatable)
importFrom(DT,formatSignif)
importFrom(DT,renderDT)
importFrom(DT,replaceData)
importFrom(R.utils,capitalize)
importFrom(R.utils,gunzip)
importFrom(R.utils,isGzipped)
Expand All @@ -54,6 +60,7 @@ importFrom(data.table,fread)
importFrom(data.table,setkeyv)
importFrom(data.table,transpose)
importFrom(dplyr,bind_rows)
importFrom(fastmatch,fmatch)
importFrom(fgsea,calcGseaStat)
importFrom(fgsea,fgsea)
importFrom(ggplot2,aes)
Expand Down Expand Up @@ -107,22 +114,32 @@ importFrom(limma,topTable)
importFrom(limma,voom)
importFrom(methods,is)
importFrom(methods,new)
importFrom(parallel,mclapply)
importFrom(pbapply,closepb)
importFrom(pbapply,getpb)
importFrom(pbapply,pbapply)
importFrom(pbapply,pblapply)
importFrom(pbapply,setpb)
importFrom(pbapply,startpb)
importFrom(readxl,read_excel)
importFrom(reshape2,dcast)
importFrom(rhdf5,h5closeAll)
importFrom(rhdf5,h5createDataset)
importFrom(rhdf5,h5createFile)
importFrom(rhdf5,h5ls)
importFrom(rhdf5,h5read)
importFrom(rhdf5,h5readAttributes)
importFrom(rhdf5,h5write)
importFrom(scales,extended_breaks)
importFrom(shiny,HTML)
importFrom(shiny,NS)
importFrom(shiny,actionButton)
importFrom(shiny,brushedPoints)
importFrom(shiny,checkboxGroupInput)
importFrom(shiny,column)
importFrom(shiny,div)
importFrom(shiny,fluidRow)
importFrom(shiny,h3)
importFrom(shiny,helpText)
importFrom(shiny,hr)
importFrom(shiny,isolate)
importFrom(shiny,mainPanel)
Expand All @@ -133,6 +150,7 @@ importFrom(shiny,observeEvent)
importFrom(shiny,plotOutput)
importFrom(shiny,reactive)
importFrom(shiny,renderPlot)
importFrom(shiny,renderUI)
importFrom(shiny,runApp)
importFrom(shiny,selectizeInput)
importFrom(shiny,shinyApp)
Expand All @@ -142,6 +160,7 @@ importFrom(shiny,stopApp)
importFrom(shiny,tabPanel)
importFrom(shiny,tagList)
importFrom(shiny,tags)
importFrom(shiny,uiOutput)
importFrom(shiny,updateSelectizeInput)
importFrom(stats,aggregate)
importFrom(stats,cor)
Expand All @@ -151,6 +170,7 @@ importFrom(stats,na.omit)
importFrom(stats,p.adjust)
importFrom(stats,quantile)
importFrom(stats,setNames)
importFrom(tibble,tibble)
importFrom(tools,file_ext)
importFrom(tools,file_path_sans_ext)
importFrom(utils,askYesNo)
Expand Down
59 changes: 59 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,62 @@
# cTRAP 1.10.0 (18 March, 2021)

## Improvements to graphical interface functions:

* New `launchDrugSetEnrichmentAnalysis()` function to analyse drug set
enrichment and visualize respective results
* `launchCMapDataLoader()`:
- Now allows to load multiple CMap perturbation types simultaneously
- Keep selected timepoint, dosage and cell line options when selecting
another perturbation type
- Add bubble plot of CMap perturbation types
* `launchResultPlotter()`:
- Now allows to view tables below specific plots and drag-and-select those
plots to filter data in those same tables
- When plotting targeting drugs and similar perturbations, update available
columns and correctly use user-selected column to plot
* `launchMetadataViewer()` now correctly parses values from `Input`
attributes as numeric

## Major changes

* `prepareCMapPerturbations()`: directly set perturbation type, cell line,
timepoint and dosage conditions as arguments
* `rankSimilarPerturbations()` and `predictTargetingDrugs()`:
- Avoid redundant loading of data chunks, slightly decreasing run time
- Lower memory footprint when using NCI60's gene expression and drug
sensitivity association (now available in HDF5 files) by loading and
processing data in chunks
- Faster GSEA-based score calculation (up to 4-7 times faster)
- New `threads` argument allows to set number of parallel threads (not
supported on Windows)
- New `chunkGiB` argument allows to set size of data chunks when reading
from supported HDF5 files (decreases peak RAM usage)
- New `verbose` argument allows to increase details printed in the console
* `prepareDrugSets()`: allow greater control on the creation of bins based on
numeric columns, including the setting of maximum number of bins per column and
minimum bin size
* `analyseDrugSetEnrichment()` and `plotDrugSetEnrichment()`: allow to select
columns to use when comparing compound identifiers between datasets

## Bug fixes and minor changes

* `filterCMapMetadata()`: allow filtering CMap metadata based on multiple
perturbation types
* `prepareDrugSets()`: fix issues with 3D descriptors containing missing values
* `plot()`:
- Fix wrong labels when plotting `targetingDrugs` objects
- Avoid printing "NA" in labels identifying metadata for perturbations
* `plotTargetingDrugsVSsimilarPerturbations()`:
- Fix highlighting of plot points depending whether drug activity is
directly proportional to drug sensitivity
- Include rug plot
* When subsetting a `perturbationChanges` or an
`expressionDrugSensitivityAssociation` object, passing only one argument
extracts its columns as in previous versions of cTRAP (similarly to when
subsetting a `data.frame`)
* `analyseDrugSetEnrichment()`: for the resulting table, the name of the first
column was renamed from `pathway` to `descriptor`

# cTRAP 1.8 (23 October, 2020)

## Interactive functions for loading data and analysing results
Expand Down
67 changes: 24 additions & 43 deletions R/CMap.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ prepareCMapZscores <- function(file, zscoresID=NULL) {
#' @param data \code{perturbationChanges} object
#' @param inheritAttrs Boolean: convert to \code{perturbationChanges} object and
#' inherit attributes from \code{data}?
#' @param verbose Boolean: print messages?
#' @param verbose Boolean: print additional details?
#'
#' @family functions related with the ranking of CMap perturbations
#' @return Matrix containing CMap perturbation z-scores (genes as rows,
Expand Down Expand Up @@ -339,8 +339,8 @@ filterCMapMetadata <- function(metadata, cellLine=NULL, timepoint=NULL,

if (!is.null(perturbationType)) {
filter$perturbationType <- perturbationType
tmp <- getCMapPerturbationTypes()[perturbationType]
if (!is.na(tmp)) perturbationType <- tmp
tmp <- getCMapPerturbationTypes(control=TRUE)[perturbationType]
if (!all(is.na(tmp))) perturbationType <- tmp
metadata <- metadata[metadata$pert_type %in% perturbationType, ]
}
if (length(filter) > 0) attr(metadata, "filter") <- filter
Expand All @@ -358,10 +358,11 @@ filterCMapMetadata <- function(metadata, cellLine=NULL, timepoint=NULL,
#' filepath to load data from file)
#' @param compoundInfo Data frame (CMap compound info) or character (respective
#' filepath to load data from file)
#' @inheritDotParams filterCMapMetadata
#' @param loadZscores Boolean: load matrix of perturbation z-scores? Not
#' recommended in systems with less than 30GB of RAM; if \code{FALSE},
#' downstream functions will read the file chunk by chunk (this strategy
#' impacts performance at the expense of a much lower memory footprint)
#' downstream functions will load and process the file directly chunk by
#' chunk, resulting in a lower memory footprint
#'
#' @importFrom R.utils gunzip
#' @importFrom methods new
Expand All @@ -377,8 +378,11 @@ filterCMapMetadata <- function(metadata, cellLine=NULL, timepoint=NULL,
#' prepareCMapPerturbations(metadata, "cmapZscores.gctx", "cmapGeneInfo.txt")
#' }
prepareCMapPerturbations <- function(metadata, zscores, geneInfo,
compoundInfo=NULL, loadZscores=FALSE) {
compoundInfo=NULL, ...,
loadZscores=FALSE) {
if (is.character(metadata)) metadata <- loadCMapData(metadata, "metadata")
if (!is.null(list(...))) metadata <- filterCMapMetadata(metadata, ...)

if (is.character(geneInfo)) geneInfo <- loadCMapData(geneInfo, "geneInfo")
if (is.character(zscores)) {
zscores <- loadCMapData(zscores, "zscores", metadata$sig_id)
Expand Down Expand Up @@ -406,7 +410,7 @@ prepareCMapPerturbations <- function(metadata, zscores, geneInfo,
# Display summary message of loaded perturbations
filters <- attr(metadata, "filter")
summaryMsg <- sprintf(
"\nSummary: %s CMap perturbations measured across %s genes",
"\nSummary: %s CMap perturbations and %s genes",
ncol(zscores), nrow(zscores))
if (!is.null(filters)) {
filterNames <- c("cellLine"="Cell lines",
Expand Down Expand Up @@ -434,6 +438,8 @@ prepareCMapPerturbations <- function(metadata, zscores, geneInfo,
#' @param metadata Data table: \code{data} metadata
#' @inheritParams rankSimilarPerturbations
#'
#' @importFrom dplyr bind_rows
#'
#' @return A list with two items:
#' \describe{
#' \item{\code{data}}{input \code{data} with extra rows containing cell line
Expand Down Expand Up @@ -492,24 +498,16 @@ calculateCellLineMean <- function(data, cellLine, metadata, rankPerCellLine) {
return(res)
}

#' Rank CMap perturbations' similarity to a differential expression profile
#' Rank differential expression profile against CMap perturbations by similarity
#'
#' Compare differential expression results against CMap perturbations.
#'
#' @param method Character: comparison method (\code{spearman}, \code{pearson}
#' or \code{gsea}; multiple methods may be selected at once)
#' @inherit rankAgainstReference
#' @param perturbations \code{perturbationChanges} object: CMap perturbations
#' (check \code{\link{prepareCMapPerturbations}})
#' @inheritParams compareAgainstReference
#'
#' @section GSEA score:
#' Weighted connectivity scores (WTCS) are calculated when \code{method
#' = "gsea"} (\url{https://clue.io/connectopedia/cmap_algorithms}).
#' (check \code{\link{prepareCMapPerturbations}()})
#'
#' @aliases compareAgainstCMap
#' @family functions related with the ranking of CMap perturbations
#' @return Data table with correlation or GSEA results comparing differential
#' expression values with those associated with CMap perturbations
#' @export
#'
#' @examples
Expand Down Expand Up @@ -539,13 +537,15 @@ calculateCellLineMean <- function(data, cellLine, metadata, rankPerCellLine) {
rankSimilarPerturbations <- function(input, perturbations,
method=c("spearman", "pearson", "gsea"),
geneSize=150, cellLineMean="auto",
rankPerCellLine=FALSE) {
rankPerCellLine=FALSE, threads=1,
chunkGiB=1, verbose=FALSE) {
metadata <- attr(perturbations, "metadata")
cellLines <- length(unique(metadata$cell_id))
rankedPerts <- compareAgainstReference(
rankedPerts <- rankAgainstReference(
input, perturbations, method=method, geneSize=geneSize,
cellLines=cellLines, cellLineMean=cellLineMean, rankByAscending=TRUE,
rankPerCellLine=rankPerCellLine)
rankPerCellLine=rankPerCellLine, threads=threads, chunkGiB=chunkGiB,
verbose=verbose)

# Relabel the "identifier" column name to be more descriptive
pertType <- unique(metadata$pert_type)
Expand Down Expand Up @@ -575,7 +575,7 @@ rankSimilarPerturbations <- function(input, perturbations,
#' @param perturbation Character (perturbation identifier) or a
#' \code{similarPerturbations} table (from which the respective perturbation
#' identifiers are retrieved)
#' @inheritParams compareAgainstReferencePerMethod
#' @inheritParams compareWithAllMethods
#' @inheritParams plot.referenceComparison
#' @param title Character: plot title (if \code{NULL}, the default title depends
#' on the context; ignored when plotting multiple perturbations)
Expand Down Expand Up @@ -644,7 +644,7 @@ plotPerturbationChanges <- function(x, perturbation, input,
if (!isSummaryPert) cellLinePerts <- perturbation
names(cellLinePerts) <- cellLinePerts
if (is.character(x)) {
zscores <- loadCMapZscores(x[cellLinePerts], verbose=FALSE)
zscores <- loadCMapZscores(x[ , cellLinePerts], verbose=FALSE)
} else {
zscores <- unclass(x)
}
Expand Down Expand Up @@ -685,26 +685,7 @@ plotPerturbationChanges <- function(x, perturbation, input,
#' @export
`[.perturbationChanges` <- function(x, i, j, drop=FALSE, ...) {
if (is.character(x)) {
out <- x
nargs <- nargs() - length(list(...)) - 1

hasI <- !missing(i)
hasJ <- !missing(j)
genes <- attr(out, "genes")
perts <- attr(out, "perturbations")
# Allow to search based on characters
names(genes) <- genes
names(perts) <- perts

if (nargs == 2) {
if (hasI) genes <- genes[i]
if (hasJ) perts <- perts[j]
} else if (hasI && nargs == 1) {
perts <- perts[i]
}
if (anyNA(perts) || anyNA(genes)) stop("subscript out of bounds")
attr(out, "genes") <- unname(genes)
attr(out, "perturbations") <- unname(perts)
out <- subsetData(x, i, j, "genes", "perturbations", nargs(), ...)
} else {
out <- NextMethod("[", drop=drop)
}
Expand Down
5 changes: 2 additions & 3 deletions R/cmapR_subset.R
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ readGctxMeta <- function(gctx_path, dimension="row", ids=NULL,
for (i in seq(length(fields))) {
field <- fields[i]
# Remove any trailing spaces and cast as vector
annots[,i] <- as.vector(gsub("\\s*$", "", raw_annots[[field]],
perl=TRUE))
annots[,i] <- as.vector(trimws(raw_annots[[field]], which="right"))
}
annots <- fix.datatypes(annots)
# Subset to the provided set of identifiers, if any
Expand Down Expand Up @@ -142,7 +141,7 @@ readGctxIds <- function(gctx_path, dimension="row") {
} else {
name <- "0/META/COL/id"
}
ids <- gsub("\\s*$", "", h5read(gctx_path, name=name), perl=TRUE)
ids <- trimws(h5read(gctx_path, name=name), which="right")
ids <- as.character(ids)
return(ids)
}
Expand Down
Loading

0 comments on commit e8fe1b6

Please sign in to comment.