diff --git a/DESCRIPTION b/DESCRIPTION index 8eccbc2a..abfb91d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,12 @@ Package: cTRAP -Title: Identification of potential transcriptome perturbations -Version: 0.99.12 -Authors@R: c(person("Nuno", "Agostinho", - email="nunodanielagostinho@gmail.com", role=c("aut", "cre")), - person("Bernardo", "de Almeida", role="aut"), - person(c("Nuno", "Luís"), "Barbosa-Morais", role=c("aut", "led"))) +Title: Identification of candidate causal perturbations from differential gene + expression data +Version: 1.0.1 +Authors@R: c( + person(c("Bernardo", "P."), "de Almeida", role="aut"), + person("Nuno", "Saraiva-Agostinho", + email="nunodanielagostinho@gmail.com", role=c("aut", "cre")), + person(c("Nuno", "L."), "Barbosa-Morais", role=c("aut", "led"))) Description: Compare differential gene expression results with those from known cellular perturbations (such as gene knock-down, overexpression or small molecules) derived from the Connectivity Map. Such analyses allow not only @@ -21,7 +23,8 @@ URL: https://github.com/nuno-agostinho/cTRAP BugReports: https://github.com/nuno-agostinho/cTRAP/issues Suggests: testthat, knitr, - covr + covr, + biomaRt RoxygenNote: 6.1.0 Imports: data.table, limma, @@ -29,7 +32,6 @@ Imports: data.table, fgsea, pbapply, plyr, - biomaRt, cowplot, ggplot2, rhdf5, diff --git a/NAMESPACE b/NAMESPACE index a885fb1e..d93ab781 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,9 +12,6 @@ export(performDifferentialExpression) export(plotL1000comparison) export(prepareENCODEgeneExpression) importFrom(R.utils,gunzip) -importFrom(biomaRt,getBM) -importFrom(biomaRt,useDataset) -importFrom(biomaRt,useMart) importFrom(cowplot,plot_grid) importFrom(data.table,data.table) importFrom(data.table,fread) diff --git a/NEWS.md b/NEWS.md index b6e80b41..210e2a9e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,14 @@ -# 0.0.0.9000 - -* Added a `NEWS.md` file to track changes to the package. - - - +# 1.0.1 (2 November, 2018) + +* Update title, author names, version and README +* Remove biomaRt dependency +* By default, `getL1000conditions` now shows CMap perturbation types except for +controls +* Compare against CMap perturbations (`compareAgainstL1000` function): + - Remove "_t" from resulting column names (as the t-statistic may or may not + be used) + - Select p-value adjustment method when performing correlation analyses + (Benjamini-Hochberg is set by default) +* Documentation: + - Fix obsolete function calls in function documentation + - Hide non-exported functions from reference PDF manual diff --git a/R/ENCODE.R b/R/ENCODE.R index af28c56b..23993253 100644 --- a/R/ENCODE.R +++ b/R/ENCODE.R @@ -4,6 +4,7 @@ #' @param table Data frame #' #' @return Character vector with respective experiment identifiers +#' @keywords internal getENCODEcontrols <- function(control, table) { sub <- table[table$`Experiment accession` == control, ] exp <- sub$`File accession` @@ -90,6 +91,7 @@ downloadENCODEknockdownMetadata <- function(cellLine=NULL, gene=NULL) { #' #' @importFrom data.table fread #' @return Data table with ENCODE sample data +#' @keywords internal loadENCODEsample <- function (metadata, replicate, control=FALSE) { metadata <- metadata[metadata$`Biological replicate(s)` == replicate, ] diff --git a/R/L1000.R b/R/L1000.R index cb98686f..d869cc9d 100644 --- a/R/L1000.R +++ b/R/L1000.R @@ -62,19 +62,25 @@ downloadL1000data <- function(file, type=c("metadata", "geneInfo", "zscores"), #' #' Downloads metadata if not available #' -#' @param metadata frame: L1000 metadata +#' @param metadata Data table: L1000 metadata +#' @param control Boolean: show controls for perturbation types? #' #' @return List of conditions in L1000 datasets #' @export #' #' @examples #' data("l1000metadata") -#' # l1000metadata <- downloadL1000metadata("l1000metadata.txt") +#' # l1000metadata <- downloadL1000data("l1000metadata.txt", "metadata") #' getL1000conditions(l1000metadata) -getL1000conditions <- function(metadata) { +getL1000conditions <- function(metadata, control=FALSE) { pertTypes <- getL1000perturbationTypes() pertTypes <- names(pertTypes)[pertTypes %in% unique(metadata$pert_type)] + if (!control) { + pertTypes <- grep("Control", pertTypes, value=TRUE, invert=TRUE, + fixed=TRUE) + } + list("Perturbation type"=pertTypes, "Cell line"=unique(metadata$cell_id), "Dosage"=unique(metadata$pert_idose), @@ -91,8 +97,9 @@ getL1000conditions <- function(metadata) { #' @importFrom stats cor.test p.adjust #' #' @return Data frame with correlations statistics, p-value and q-value +#' @keywords internal correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations, - method) { + method, pAdjustMethod="BH") { cat(paste("Comparing with cell line", cellLine), fill=TRUE) perturbation <- perturbations[ , tolower(attr(perturbations, "cellLines")) == tolower(cellLine)] @@ -113,11 +120,11 @@ correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations, cor <- sapply(cors, "[[", "estimate") pval <- sapply(cors, "[[", "p.value") - qval <- p.adjust(pval) + qval <- p.adjust(pval, pAdjustMethod) names(cor) <- names(pval) <- names(qval) <- colnames(perturbation) res <- data.table(names(cor), cor, pval, qval) - names(res) <- c("genes", sprintf("%s_t_%s_%s", cellLine, method, + names(res) <- c("genes", sprintf("%s_%s_%s", cellLine, method, c("coef", "pvalue", "qvalue"))) attr(res, "perturbation") <- ref return(res) @@ -135,6 +142,7 @@ correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations, #' #' @return Data frame containing gene set enrichment analysis (GSEA) results per #' cell line +#' @keywords internal performGSAperCellLine <- function(cellLine, perturbations, pathways) { perturbation <- perturbations[ , tolower(attr(perturbations, "cellLines")) == tolower(cellLine)] @@ -181,14 +189,16 @@ performGSAperCellLine <- function(cellLine, perturbations, pathways) { #' where the name of the vector are gene names and the values are a statistic #' that represents significance and magnitude of differentially expressed #' genes (e.g. t-statistics) -#' @param geneSize Number: top and bottom differentially expressed genes to use -#' for gene set enrichment (GSE); if \code{method} is not \code{gsea}, this -#' argument does nothing #' @param perturbations \code{l1000perturbations} object: file with L1000 loaded #' perturbations (check \code{\link{loadL1000perturbations}}) #' @param cellLine Character: cell line(s) #' @param method Character: comparison method (\code{spearman}, \code{pearson} #' or \code{gsea}) +#' @param geneSize Number: top and bottom differentially expressed genes to use +#' for gene set enrichment (GSE) (only used if \code{method} is \code{gsea}) +#' @param pAdjustMethod Character: method for p-value adjustment (for more +#' details, see \code{\link{p.adjust.methods}}; only used if \code{method} is +#' \code{spearman} or \code{pearson}) #' #' @importFrom data.table setkeyv #' @importFrom piano loadGSC @@ -216,12 +226,12 @@ performGSAperCellLine <- function(cellLine, perturbations, pathways) { #' compareAgainstL1000(diffExprStat, perturbations, cellLine, method="gsea") compareAgainstL1000 <- function(diffExprGenes, perturbations, cellLine, method=c("spearman", "pearson", "gsea"), - geneSize=150) { + geneSize=150, pAdjustMethod="BH") { method <- match.arg(method) if (method %in% c("spearman", "pearson")) { - cellLineRes <- lapply(cellLine, correlatePerCellLine, - diffExprGenes, perturbations, method) - colnameSuffix <- sprintf("_t_%s_coef", method) + cellLineRes <- lapply(cellLine, correlatePerCellLine, diffExprGenes, + perturbations, method, pAdjustMethod) + colnameSuffix <- sprintf("_%s_coef", method) } else if (method == "gsea") { ordered <- order(diffExprGenes) topGenes <- names(diffExprGenes)[head(ordered, geneSize)] @@ -296,7 +306,6 @@ filterL1000metadata <- function(metadata, cellLine=NULL, timepoint=NULL, #' Load L1000 perturbation data #' -#' @inheritParams downloadL1000metadata #' @param metadata Data frame: L1000 Metadata #' @param zscores Data frame: GCTX z-scores #' @param geneInfo Data frame: L1000 gene info @@ -309,10 +318,11 @@ filterL1000metadata <- function(metadata, cellLine=NULL, timepoint=NULL, #' @export #' @examples #' if (interactive()) { -#' metadata <- downloadL1000metadata("l1000metadata.txt") +#' metadata <- downloadL1000data("l1000metadata.txt", "metadata") #' metadata <- filterL1000metadata(metadata, cellLine="HepG2") -#' zscores <- downloadL1000zscores("l1000zscores.gctx", metadata$sig_id) -#' geneInfo <- downloadL1000geneInfo("l1000geneInfo.txt") +#' zscores <- downloadL1000data("l1000zscores.gctx", "zscores", +#' metadata$sig_id) +#' geneInfo <- downloadL1000data("l1000geneInfo.txt", "geneInfo") #' loadL1000perturbations(metadata, zscores, geneInfo) #' } loadL1000perturbations <- function(metadata, zscores, geneInfo, diff --git a/R/cTRAP-package.r b/R/cTRAP-package.r index 3ddbeea6..6a2476ce 100644 --- a/R/cTRAP-package.r +++ b/R/cTRAP-package.r @@ -65,6 +65,7 @@ NULL #' #' @name counts #' @docType data +#' @keywords internal NULL #' Differential expression's t-statistics sample @@ -100,6 +101,7 @@ NULL #' #' @name diffExprStat #' @docType data +#' @keywords internal NULL #' ENCODE metadata sample @@ -115,6 +117,7 @@ NULL #' #' @name ENCODEmetadata #' @docType data +#' @keywords internal NULL #' L1000 metadata @@ -130,6 +133,7 @@ NULL #' #' @name l1000metadata #' @docType data +#' @keywords internal NULL #' L1000 perturbations sample for knockdown experiments @@ -170,6 +174,7 @@ NULL #' #' @name l1000perturbationsKnockdown #' @docType data +#' @keywords internal NULL #' L1000 perturbations sample for small molecules @@ -213,6 +218,7 @@ NULL #' #' @name l1000perturbationsSmallMolecules #' @docType data +#' @keywords internal NULL #' Sample of ENCODE samples @@ -240,4 +246,5 @@ NULL #' #' @name ENCODEsamples #' @docType data +#' @keywords internal NULL diff --git a/R/cmapR_subset.R b/R/cmapR_subset.R index 1e80a956..52588cbd 100644 --- a/R/cmapR_subset.R +++ b/R/cmapR_subset.R @@ -20,6 +20,7 @@ #' @seealso \url{http://clue.io/help} for more information on the GCT format #' #' @source https://github.com/cmap/cmapR +#' @keywords internal setClass("GCT", representation( mat = "matrix", rid = "character", cid = "character", rdesc = "data.frame", cdesc = "data.frame", version = "character", src = "character")) @@ -37,11 +38,10 @@ setClass("GCT", representation( #' @details This is a low-level helper function #' which most users will not need to access directly #' -#' @return meta the same data frame with (potentially) adjusted -#' column types. +#' @return meta the same data frame with (potentially) adjusted column types +#' @keywords internal #' #' @family GCTX parsing functions -#' @keywords internal #' #' @source https://github.com/cmap/cmapR fix.datatypes <- function(meta) { @@ -92,6 +92,7 @@ fix.datatypes <- function(meta) { #' @importFrom rhdf5 h5read #' #' @return a \code{data.frame} of metadata +#' @keywords internal #' #' @source https://github.com/cmap/cmapR #' @@ -143,6 +144,7 @@ readGctxMeta <- function(gctx_path, dimension="row", ids=NULL, #' @param dimension which ids to read (row or column) #' #' @return a character vector of row or column ids from the provided file +#' @keywords internal #' #' @source https://github.com/cmap/cmapR #' @@ -410,6 +412,7 @@ setMethod("initialize", signature = "GCT", definition = function( #' @importFrom utils packageVersion #' #' @return Closes all open identifiers +#' @keywords internal closeOpenHandles <- function() { if(packageVersion('rhdf5') < "2.23.0") rhdf5::H5close() @@ -428,6 +431,7 @@ closeOpenHandles <- function() { #' columns of \code{df} #' #' @source https://github.com/cmap/cmapR +#' @keywords internal checkColnames <- function(test_names, df, throw_error=TRUE) { # check whether test_names are valid names in df # throw error if specified @@ -451,7 +455,6 @@ checkColnames <- function(test_names, df, throw_error=TRUE) { #' @return a subset version of \code{df} #' #' @source https://github.com/cmap/cmapR -#' #' @keywords internal subsetToIds <- function(df, ids) { # helper function to do a robust df subset diff --git a/R/plots.R b/R/plots.R index e77d0ed8..99d95770 100644 --- a/R/plots.R +++ b/R/plots.R @@ -12,6 +12,7 @@ #' @importFrom stats na.omit #' #' @return Grid of plots illustrating a GSEA plot +#' @keywords internal plotGSEA <- function(pathways, stats, title="GSEA plot") { # Custom axis_title_size <- 12 diff --git a/R/utils.R b/R/utils.R index 54ab05be..e6af7f1f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -4,13 +4,14 @@ #' #' @importFrom stats model.matrix aggregate #' @importFrom limma voom lmFit eBayes topTable -#' @importFrom biomaRt useDataset getBM useMart #' #' @return Data frame with differential gene expression results between #' knockdown and control #' @export #' #' @examples +#' data("ENCODEsamples") +#' #' ## Download ENCODE metadata for a specific cell line and gene #' # cellLine <- "HepG2" #' # gene <- "EIF4G1" @@ -19,7 +20,6 @@ #' ## Download samples based on filtered ENCODE metadata #' # ENCODEsamples <- downloadENCODEsamples(ENCODEmetadata) #' -#' data("ENCODEsamples") #' counts <- prepareENCODEgeneExpression(ENCODEsamples) #' #' # Remove low coverage (at least 10 counts shared across two samples) @@ -28,11 +28,19 @@ #' filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples #' counts <- counts[filter, ] #' -#' # Perform differential gene expression analysis -#' diffExpr <- performDifferentialExpression(counts) +#' ## Convert ENSEMBL identifier to gene symbol +#' # library(biomaRt) +#' # mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl")) +#' # genes <- sapply(strsplit(counts$gene_id, "\\."), `[`, 1) +#' # geneConversion <- getBM(filters="ensembl_gene_id", values=genes, mart=mart, +#' # attributes=c("ensembl_gene_id", "hgnc_symbol")) +#' # counts$gene_id <- geneConversion$hgnc_symbol[ +#' # match(genes, geneConversion$ensembl_gene_id)] +#' +#' ## Perform differential gene expression analysis +#' # diffExpr <- performDifferentialExpression(counts) performDifferentialExpression <- function(counts) { counts <- data.frame(counts) - rownames(counts) <- counts$gene_id # Design matrix Sample_info <- data.frame( @@ -46,27 +54,19 @@ performDifferentialExpression <- function(counts) { normalize.method="quantile") # Fit linear model - fit <- lmFit(voom[ , colnames(voom$E) %in% rownames(design)], - design=design) - ebayes <- eBayes(fit) - results <- topTable(ebayes, coef = 2, number = nrow(ebayes), - sort.by = "logFC", resort.by = "p") - - # Convert to gene symbol - mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl")) - genes <- sapply(strsplit(rownames(results), "\\."), `[`, 1) - geneConversion <- getBM(filters="ensembl_gene_id", values=genes, mart=mart, - attributes=c("ensembl_gene_id", "hgnc_symbol")) - results$Gene_symbol <- geneConversion$hgnc_symbol[ - match(genes, geneConversion$ensembl_gene_id)] + fit <- lmFit(voom[ , colnames(voom$E) %in% rownames(design)], design=design) + ebayes <- eBayes(fit) + results <- topTable(ebayes, coef=2, number=nrow(ebayes), + genelist=counts$gene_id) # Mean-aggregation per gene symbol to compare unique gene knockdowns - results2 <- aggregate(results[ , 1:6], data=results, FUN=mean, - by=list(Gene_symbol=results$Gene_symbol)) + meanAggr <- aggregate(results[ , -1], data=results, FUN=mean, + by=list(Gene_symbol=results$ID)) - # Remove non-matching genes - results2 <- results2[results2$Gene_symbol != "", ] - return(results2) + # Remove non-matching genes (if any) + meanAggr <- meanAggr[meanAggr$Gene_symbol != "", ] + rownames(meanAggr) <- meanAggr$Gene_symbol + return(meanAggr) } #' Download data if a file does not exist @@ -78,6 +78,7 @@ performDifferentialExpression <- function(counts) { #' @importFrom utils download.file #' #' @return Download file if a file does not exist +#' @keywords internal downloadIfNeeded <- function(file, link, gz=TRUE) { if (!file.exists(file)) { if (gz) { diff --git a/README.md b/README.md index 0c894605..248da814 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,17 @@ Such analyses allow not only to infer the molecular causes of the observed difference in gene expression but also to identify small molecules that could drive or revert specific transcriptomic alterations. -## Installation +## Installing -You can install cTRAP from GitHub with: +[cTRAP is available in Bioconductor][bioconductor] and can be installed with: + +``` r +if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") +BiocManager::install("cTRAP") +``` + +If you prefer, you can install cTRAP from GitHub instead: ``` r install.packages("devtools") @@ -24,4 +32,5 @@ devtools::install_github("nuno-agostinho/cTRAP") [codecov]: https://codecov.io/github/nuno-agostinho/cTRAP?branch=master [codecovBadge]: https://img.shields.io/codecov/c/github/nuno-agostinho/cTRAP/master.svg [appveyor]: https://ci.appveyor.com/project/nuno-agostinho/cTRAP -[appveyorBadge]: https://ci.appveyor.com/api/projects/status/github/nuno-agostinho/cTRAP?branch=master&svg=true \ No newline at end of file +[appveyorBadge]: https://ci.appveyor.com/api/projects/status/github/nuno-agostinho/cTRAP?branch=master&svg=true +[bioconductor]: http://bioconductor.org/packages/cTRAP diff --git a/data/ENCODEsamples.rda b/data/ENCODEsamples.rda index 541ede22..894dfbdc 100644 Binary files a/data/ENCODEsamples.rda and b/data/ENCODEsamples.rda differ diff --git a/man/ENCODEmetadata.Rd b/man/ENCODEmetadata.Rd index 1070a824..f6b92766 100644 --- a/man/ENCODEmetadata.Rd +++ b/man/ENCODEmetadata.Rd @@ -13,3 +13,4 @@ cellLine <- "HepG2" ENCODEmetadata <- downloadENCODEknockdownMetadata(cellLine, gene) } } +\keyword{internal} diff --git a/man/ENCODEsamples.Rd b/man/ENCODEsamples.Rd index e2f39eb3..271beee5 100644 --- a/man/ENCODEsamples.Rd +++ b/man/ENCODEsamples.Rd @@ -25,3 +25,4 @@ for (k in seq(ENCODEsamples)) { } } } +\keyword{internal} diff --git a/man/GCT-class.Rd b/man/GCT-class.Rd index 7bb85c0b..6fd0d5bf 100644 --- a/man/GCT-class.Rd +++ b/man/GCT-class.Rd @@ -32,3 +32,4 @@ The GCT class serves to represent annotated \seealso{ \url{http://clue.io/help} for more information on the GCT format } +\keyword{internal} diff --git a/man/checkColnames.Rd b/man/checkColnames.Rd index 401d7f0b..bd6cb9c8 100644 --- a/man/checkColnames.Rd +++ b/man/checkColnames.Rd @@ -24,3 +24,4 @@ boolean indicating whether or not all \code{test_names} are \description{ Check whether \code{test_names} are columns in the \code{\link{data.frame}} } +\keyword{internal} diff --git a/man/closeOpenHandles.Rd b/man/closeOpenHandles.Rd index 3a541638..ad098f1f 100644 --- a/man/closeOpenHandles.Rd +++ b/man/closeOpenHandles.Rd @@ -12,3 +12,4 @@ Closes all open identifiers \description{ Close open handles } +\keyword{internal} diff --git a/man/compareAgainstL1000.Rd b/man/compareAgainstL1000.Rd index 91b11596..7868b000 100644 --- a/man/compareAgainstL1000.Rd +++ b/man/compareAgainstL1000.Rd @@ -5,7 +5,8 @@ \title{Compare against L1000 datasets} \usage{ compareAgainstL1000(diffExprGenes, perturbations, cellLine, - method = c("spearman", "pearson", "gsea"), geneSize = 150) + method = c("spearman", "pearson", "gsea"), geneSize = 150, + pAdjustMethod = "BH") } \arguments{ \item{diffExprGenes}{Numeric: named vector of differentially expressed genes @@ -22,8 +23,11 @@ perturbations (check \code{\link{loadL1000perturbations}})} or \code{gsea})} \item{geneSize}{Number: top and bottom differentially expressed genes to use -for gene set enrichment (GSE); if \code{method} is not \code{gsea}, this -argument does nothing} +for gene set enrichment (GSE) (only used if \code{method} is \code{gsea})} + +\item{pAdjustMethod}{Character: method for p-value adjustment (for more +details, see \code{\link{p.adjust.methods}}; only used if \code{method} is +\code{spearman} or \code{pearson})} } \value{ Data table with correlation or GSEA results comparing differential diff --git a/man/correlatePerCellLine.Rd b/man/correlatePerCellLine.Rd index 56bf1838..8d0ec757 100644 --- a/man/correlatePerCellLine.Rd +++ b/man/correlatePerCellLine.Rd @@ -4,7 +4,8 @@ \alias{correlatePerCellLine} \title{Correlate differential expression scores per cell line} \usage{ -correlatePerCellLine(cellLine, diffExprGenes, perturbations, method) +correlatePerCellLine(cellLine, diffExprGenes, perturbations, method, + pAdjustMethod = "BH") } \arguments{ \item{cellLine}{Character: cell line(s)} @@ -18,6 +19,10 @@ genes (e.g. t-statistics)} perturbations (check \code{\link{loadL1000perturbations}})} \item{method}{Character: correlation method} + +\item{pAdjustMethod}{Character: method for p-value adjustment (for more +details, see \code{\link{p.adjust.methods}}; only used if \code{method} is +\code{spearman} or \code{pearson})} } \value{ Data frame with correlations statistics, p-value and q-value @@ -25,3 +30,4 @@ Data frame with correlations statistics, p-value and q-value \description{ Correlate differential expression scores per cell line } +\keyword{internal} diff --git a/man/counts.Rd b/man/counts.Rd index 643cb421..5f688c63 100644 --- a/man/counts.Rd +++ b/man/counts.Rd @@ -25,3 +25,4 @@ filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples counts <- counts[filter, ] } } +\keyword{internal} diff --git a/man/diffExprStat.Rd b/man/diffExprStat.Rd index 19545c00..7637b6fd 100644 --- a/man/diffExprStat.Rd +++ b/man/diffExprStat.Rd @@ -33,3 +33,4 @@ diffExprStat <- diffExpr$t names(diffExprStat) <- diffExpr$Gene_symbol } } +\keyword{internal} diff --git a/man/downloadIfNeeded.Rd b/man/downloadIfNeeded.Rd index 98c3ce12..cb8eef08 100644 --- a/man/downloadIfNeeded.Rd +++ b/man/downloadIfNeeded.Rd @@ -19,3 +19,4 @@ Download file if a file does not exist \description{ Download data if a file does not exist } +\keyword{internal} diff --git a/man/fix.datatypes.Rd b/man/fix.datatypes.Rd index 145f589c..a4ce30a6 100644 --- a/man/fix.datatypes.Rd +++ b/man/fix.datatypes.Rd @@ -13,8 +13,7 @@ fix.datatypes(meta) \item{meta}{a data.frame} } \value{ -meta the same data frame with (potentially) adjusted - column types. +meta the same data frame with (potentially) adjusted column types } \description{ GCT(X) parsing initially returns data frames diff --git a/man/getENCODEcontrols.Rd b/man/getENCODEcontrols.Rd index 76974bbb..0347350e 100644 --- a/man/getENCODEcontrols.Rd +++ b/man/getENCODEcontrols.Rd @@ -17,3 +17,4 @@ Character vector with respective experiment identifiers \description{ Get experiments files for a given control } +\keyword{internal} diff --git a/man/getL1000Conditions.Rd b/man/getL1000Conditions.Rd index d60880f8..59333c2c 100644 --- a/man/getL1000Conditions.Rd +++ b/man/getL1000Conditions.Rd @@ -4,10 +4,12 @@ \alias{getL1000conditions} \title{List available conditions in L1000 datasets} \usage{ -getL1000conditions(metadata) +getL1000conditions(metadata, control = FALSE) } \arguments{ -\item{metadata}{frame: L1000 metadata} +\item{metadata}{Data table: L1000 metadata} + +\item{control}{Boolean: show controls for perturbation types?} } \value{ List of conditions in L1000 datasets @@ -17,6 +19,6 @@ Downloads metadata if not available } \examples{ data("l1000metadata") -# l1000metadata <- downloadL1000metadata("l1000metadata.txt") +# l1000metadata <- downloadL1000data("l1000metadata.txt", "metadata") getL1000conditions(l1000metadata) } diff --git a/man/l1000metadata.Rd b/man/l1000metadata.Rd index 177cc304..60d26579 100644 --- a/man/l1000metadata.Rd +++ b/man/l1000metadata.Rd @@ -13,3 +13,4 @@ l1000metadata <- filterL1000metadata(l1000metadata, cellLine = "HEPG2", timepoint = "2 h") } } +\keyword{internal} diff --git a/man/l1000perturbationsKnockdown.Rd b/man/l1000perturbationsKnockdown.Rd index ab07ea39..7d5192cd 100644 --- a/man/l1000perturbationsKnockdown.Rd +++ b/man/l1000perturbationsKnockdown.Rd @@ -38,3 +38,4 @@ filter <- c(unlist(lapply(genes, head)), unlist(lapply(genes, tail))) l1000perturbationsKnockdown <- l1000perturbationsKnockdown[ , filter] } } +\keyword{internal} diff --git a/man/l1000perturbationsSmallMolecules.Rd b/man/l1000perturbationsSmallMolecules.Rd index be387703..9741144b 100644 --- a/man/l1000perturbationsSmallMolecules.Rd +++ b/man/l1000perturbationsSmallMolecules.Rd @@ -41,3 +41,4 @@ l1000perturbationsSmallMolecules <- l1000perturbationsSmallMolecules[ , filter] } } +\keyword{internal} diff --git a/man/loadENCODEsample.Rd b/man/loadENCODEsample.Rd index 68f5662e..e6ce2313 100644 --- a/man/loadENCODEsample.Rd +++ b/man/loadENCODEsample.Rd @@ -19,3 +19,4 @@ Data table with ENCODE sample data \description{ Load ENCODE sample } +\keyword{internal} diff --git a/man/loadL1000perturbations.Rd b/man/loadL1000perturbations.Rd index 1a754ec3..f67b5f97 100644 --- a/man/loadL1000perturbations.Rd +++ b/man/loadL1000perturbations.Rd @@ -24,10 +24,11 @@ Load L1000 perturbation data } \examples{ if (interactive()) { - metadata <- downloadL1000metadata("l1000metadata.txt") + metadata <- downloadL1000data("l1000metadata.txt", "metadata") metadata <- filterL1000metadata(metadata, cellLine="HepG2") - zscores <- downloadL1000zscores("l1000zscores.gctx", metadata$sig_id) - geneInfo <- downloadL1000geneInfo("l1000geneInfo.txt") + zscores <- downloadL1000data("l1000zscores.gctx", "zscores", + metadata$sig_id) + geneInfo <- downloadL1000data("l1000geneInfo.txt", "geneInfo") loadL1000perturbations(metadata, zscores, geneInfo) } } diff --git a/man/performDifferentialExpression.Rd b/man/performDifferentialExpression.Rd index 16d933c0..c6207642 100644 --- a/man/performDifferentialExpression.Rd +++ b/man/performDifferentialExpression.Rd @@ -17,6 +17,8 @@ knockdown and control Perform differential gene expression based on ENCODE data } \examples{ +data("ENCODEsamples") + ## Download ENCODE metadata for a specific cell line and gene # cellLine <- "HepG2" # gene <- "EIF4G1" @@ -25,7 +27,6 @@ Perform differential gene expression based on ENCODE data ## Download samples based on filtered ENCODE metadata # ENCODEsamples <- downloadENCODEsamples(ENCODEmetadata) -data("ENCODEsamples") counts <- prepareENCODEgeneExpression(ENCODEsamples) # Remove low coverage (at least 10 counts shared across two samples) @@ -34,6 +35,15 @@ minSamples <- 2 filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples counts <- counts[filter, ] -# Perform differential gene expression analysis -diffExpr <- performDifferentialExpression(counts) +## Convert ENSEMBL identifier to gene symbol +# library(biomaRt) +# mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl")) +# genes <- sapply(strsplit(counts$gene_id, "\\\\."), `[`, 1) +# geneConversion <- getBM(filters="ensembl_gene_id", values=genes, mart=mart, +# attributes=c("ensembl_gene_id", "hgnc_symbol")) +# counts$gene_id <- geneConversion$hgnc_symbol[ +# match(genes, geneConversion$ensembl_gene_id)] + +## Perform differential gene expression analysis +# diffExpr <- performDifferentialExpression(counts) } diff --git a/man/performGSAperCellLine.Rd b/man/performGSAperCellLine.Rd index 581dd212..597fd8d6 100644 --- a/man/performGSAperCellLine.Rd +++ b/man/performGSAperCellLine.Rd @@ -21,3 +21,4 @@ cell line \description{ Perform gene set enrichment (GSA) per cell line } +\keyword{internal} diff --git a/man/plotGSEA.Rd b/man/plotGSEA.Rd index eccc4359..fd805f93 100644 --- a/man/plotGSEA.Rd +++ b/man/plotGSEA.Rd @@ -19,3 +19,4 @@ Grid of plots illustrating a GSEA plot \description{ Plot gene set enrichment analysis (GSEA) } +\keyword{internal} diff --git a/man/readGctxIds.Rd b/man/readGctxIds.Rd index d009539b..d0cb8cae 100644 --- a/man/readGctxIds.Rd +++ b/man/readGctxIds.Rd @@ -25,3 +25,4 @@ Other GCTX parsing functions: \code{\link{fix.datatypes}}, \code{\link{processIds}}, \code{\link{readGctxMeta}} } \concept{GCTX parsing functions} +\keyword{internal} diff --git a/man/readGctxMeta.Rd b/man/readGctxMeta.Rd index 1fb69e40..47a87c72 100644 --- a/man/readGctxMeta.Rd +++ b/man/readGctxMeta.Rd @@ -33,3 +33,4 @@ Other GCTX parsing functions: \code{\link{fix.datatypes}}, \code{\link{processIds}}, \code{\link{readGctxIds}} } \concept{GCTX parsing functions} +\keyword{internal} diff --git a/tests/testthat/test_L1000.R b/tests/testthat/test_L1000.R index ae0ffd11..c724499e 100644 --- a/tests/testthat/test_L1000.R +++ b/tests/testthat/test_L1000.R @@ -32,8 +32,8 @@ test_that("Compare using Spearman correlation", { method="spearman") expect_is(data, "l1000comparison") expect_identical(colnames(data), c( - "genes", "HepG2_t_spearman_coef", "HepG2_t_spearman_pvalue", - "HepG2_t_spearman_qvalue", "Average_t_spearman_coef")) + "genes", "HepG2_spearman_coef", "HepG2_spearman_pvalue", + "HepG2_spearman_qvalue", "Average_spearman_coef")) }) test_that("Compare using Pearson correlation", { @@ -41,8 +41,8 @@ test_that("Compare using Pearson correlation", { method="pearson") expect_is(data, "l1000comparison") expect_identical(colnames(data), c( - "genes", "HepG2_t_pearson_coef", "HepG2_t_pearson_pvalue", - "HepG2_t_pearson_qvalue", "Average_t_pearson_coef")) + "genes", "HepG2_pearson_coef", "HepG2_pearson_pvalue", + "HepG2_pearson_qvalue", "Average_pearson_coef")) }) test_that("Compare using GSEA", { diff --git a/vignettes/comparing_DGE_with_perturbations.Rmd b/vignettes/comparing_DGE_with_perturbations.Rmd index cd157865..32cf58b3 100644 --- a/vignettes/comparing_DGE_with_perturbations.Rmd +++ b/vignettes/comparing_DGE_with_perturbations.Rmd @@ -1,5 +1,5 @@ --- -title: 'cTRAP: differential expression comparison with cellular perturbations' +title: 'cTRAP: identifying candidate causal perturbations from differential gene expression data' author: Bernardo P. de Almeida & Nuno Saraiva-Agostinho date: "`r Sys.Date()`" output: @@ -83,6 +83,15 @@ minSamples <- 2 filter <- rowSums(counts[ , -c(1, 2)] >= minReads) >= minSamples counts <- counts[filter, ] +# Convert ENSEMBL identifier to gene symbol +library(biomaRt) +mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl")) +genes <- sapply(strsplit(counts$gene_id, "\\."), `[`, 1) +geneConversion <- getBM(filters="ensembl_gene_id", values=genes, mart=mart, + attributes=c("ensembl_gene_id", "hgnc_symbol")) +counts$gene_id <- geneConversion$hgnc_symbol[ + match(genes, geneConversion$ensembl_gene_id)] + # Perform differential gene expression analysis diffExpr <- performDifferentialExpression(counts) ``` @@ -91,7 +100,8 @@ As our metric of differential expression after EIF4G1 shRNA knock-down, we will use the respective t-statistic. ```{r t-statistics} -# Get t-statistics of differential expression with respective gene names +# Get t-statistics of differential expression with respective gene names +# (expected input for downstream analyses) diffExprStat <- diffExpr$t names(diffExprStat) <- diffExpr$Gene_symbol ``` @@ -205,9 +215,9 @@ the Benjamini-Hochberg-adjusted p-value. ```{r order knockdown perturbation comparison} # Order knockdown perturbations according to similarity compareKnockdown$spearman_ordered <- compareKnockdown$spearman[ - order(compareKnockdown$spearman$HepG2_t_spearman_coef, decreasing=TRUE)] + order(compareKnockdown$spearman$HepG2_spearman_coef, decreasing=TRUE)] compareKnockdown$pearson_ordered <- compareKnockdown$pearson[ - order(compareKnockdown$pearson$HepG2_t_pearson_coef, decreasing=TRUE)] + order(compareKnockdown$pearson$HepG2_pearson_coef, decreasing=TRUE)] compareKnockdown$gsea_ordered <- compareKnockdown$gsea[ order(compareKnockdown$gsea$HepG2_WTCS, decreasing=FALSE)] @@ -229,9 +239,9 @@ For small molecules: ```{r order small molecule perturbation comparison} # Order small molecule perturbations according to similarity compareSmallMolecule$spearman_ordered <- compareSmallMolecule$spearman[ - order(compareSmallMolecule$spearman$HepG2_t_spearman_coef, decreasing=TRUE)] + order(compareSmallMolecule$spearman$HepG2_spearman_coef, decreasing=TRUE)] compareSmallMolecule$pearson_ordered <- compareSmallMolecule$pearson[ - order(compareSmallMolecule$pearson$HepG2_t_pearson_coef, decreasing=TRUE)] + order(compareSmallMolecule$pearson$HepG2_pearson_coef, decreasing=TRUE)] compareSmallMolecule$gsea_ordered <- compareSmallMolecule$gsea[ order(compareSmallMolecule$gsea$HepG2_WTCS, decreasing=FALSE)] @@ -304,7 +314,7 @@ this tutorial) is welcome. Please send any suggestions and comments to: > Nuno Saraiva-Agostinho (nunoagostinho@medicina.ulisboa.pt) > -> Bernardo P. de Almeida (bernardo.almeida@imp.ac.at) +> Bernardo P. de Almeida (bernardo.almeida94@gmail.com) > > [Disease Transcriptomics Lab, Instituto de Medicina Molecular (Portugal)][iMM]