Skip to content

Commit

Permalink
1.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
nuno-agostinho authored Nov 4, 2018
2 parents 2611ec8 + 74e338b commit ecb24eb
Show file tree
Hide file tree
Showing 36 changed files with 179 additions and 90 deletions.
18 changes: 10 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
Package: cTRAP
Title: Identification of potential transcriptome perturbations
Version: 0.99.12
Authors@R: c(person("Nuno", "Agostinho",
email="[email protected]", role=c("aut", "cre")),
person("Bernardo", "de Almeida", role="aut"),
person(c("Nuno", "Luís"), "Barbosa-Morais", role=c("aut", "led")))
Title: Identification of candidate causal perturbations from differential gene
expression data
Version: 1.0.1
Authors@R: c(
person(c("Bernardo", "P."), "de Almeida", role="aut"),
person("Nuno", "Saraiva-Agostinho",
email="[email protected]", role=c("aut", "cre")),
person(c("Nuno", "L."), "Barbosa-Morais", role=c("aut", "led")))
Description: Compare differential gene expression results with those from known
cellular perturbations (such as gene knock-down, overexpression or small
molecules) derived from the Connectivity Map. Such analyses allow not only
Expand All @@ -21,15 +23,15 @@ URL: https://github.com/nuno-agostinho/cTRAP
BugReports: https://github.com/nuno-agostinho/cTRAP/issues
Suggests: testthat,
knitr,
covr
covr,
biomaRt
RoxygenNote: 6.1.0
Imports: data.table,
limma,
stats,
fgsea,
pbapply,
plyr,
biomaRt,
cowplot,
ggplot2,
rhdf5,
Expand Down
3 changes: 0 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,6 @@ export(performDifferentialExpression)
export(plotL1000comparison)
export(prepareENCODEgeneExpression)
importFrom(R.utils,gunzip)
importFrom(biomaRt,getBM)
importFrom(biomaRt,useDataset)
importFrom(biomaRt,useMart)
importFrom(cowplot,plot_grid)
importFrom(data.table,data.table)
importFrom(data.table,fread)
Expand Down
20 changes: 14 additions & 6 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# 0.0.0.9000

* Added a `NEWS.md` file to track changes to the package.



# 1.0.1 (2 November, 2018)

* Update title, author names, version and README
* Remove biomaRt dependency
* By default, `getL1000conditions` now shows CMap perturbation types except for
controls
* Compare against CMap perturbations (`compareAgainstL1000` function):
- Remove "_t" from resulting column names (as the t-statistic may or may not
be used)
- Select p-value adjustment method when performing correlation analyses
(Benjamini-Hochberg is set by default)
* Documentation:
- Fix obsolete function calls in function documentation
- Hide non-exported functions from reference PDF manual
2 changes: 2 additions & 0 deletions R/ENCODE.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#' @param table Data frame
#'
#' @return Character vector with respective experiment identifiers
#' @keywords internal
getENCODEcontrols <- function(control, table) {
sub <- table[table$`Experiment accession` == control, ]
exp <- sub$`File accession`
Expand Down Expand Up @@ -90,6 +91,7 @@ downloadENCODEknockdownMetadata <- function(cellLine=NULL, gene=NULL) {
#'
#' @importFrom data.table fread
#' @return Data table with ENCODE sample data
#' @keywords internal
loadENCODEsample <- function (metadata, replicate, control=FALSE) {
metadata <- metadata[metadata$`Biological replicate(s)` == replicate, ]

Expand Down
44 changes: 27 additions & 17 deletions R/L1000.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,25 @@ downloadL1000data <- function(file, type=c("metadata", "geneInfo", "zscores"),
#'
#' Downloads metadata if not available
#'
#' @param metadata frame: L1000 metadata
#' @param metadata Data table: L1000 metadata
#' @param control Boolean: show controls for perturbation types?
#'
#' @return List of conditions in L1000 datasets
#' @export
#'
#' @examples
#' data("l1000metadata")
#' # l1000metadata <- downloadL1000metadata("l1000metadata.txt")
#' # l1000metadata <- downloadL1000data("l1000metadata.txt", "metadata")
#' getL1000conditions(l1000metadata)
getL1000conditions <- function(metadata) {
getL1000conditions <- function(metadata, control=FALSE) {
pertTypes <- getL1000perturbationTypes()
pertTypes <- names(pertTypes)[pertTypes %in% unique(metadata$pert_type)]

if (!control) {
pertTypes <- grep("Control", pertTypes, value=TRUE, invert=TRUE,
fixed=TRUE)
}

list("Perturbation type"=pertTypes,
"Cell line"=unique(metadata$cell_id),
"Dosage"=unique(metadata$pert_idose),
Expand All @@ -91,8 +97,9 @@ getL1000conditions <- function(metadata) {
#' @importFrom stats cor.test p.adjust
#'
#' @return Data frame with correlations statistics, p-value and q-value
#' @keywords internal
correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations,
method) {
method, pAdjustMethod="BH") {
cat(paste("Comparing with cell line", cellLine), fill=TRUE)
perturbation <- perturbations[
, tolower(attr(perturbations, "cellLines")) == tolower(cellLine)]
Expand All @@ -113,11 +120,11 @@ correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations,

cor <- sapply(cors, "[[", "estimate")
pval <- sapply(cors, "[[", "p.value")
qval <- p.adjust(pval)
qval <- p.adjust(pval, pAdjustMethod)
names(cor) <- names(pval) <- names(qval) <- colnames(perturbation)

res <- data.table(names(cor), cor, pval, qval)
names(res) <- c("genes", sprintf("%s_t_%s_%s", cellLine, method,
names(res) <- c("genes", sprintf("%s_%s_%s", cellLine, method,
c("coef", "pvalue", "qvalue")))
attr(res, "perturbation") <- ref
return(res)
Expand All @@ -135,6 +142,7 @@ correlatePerCellLine <- function(cellLine, diffExprGenes, perturbations,
#'
#' @return Data frame containing gene set enrichment analysis (GSEA) results per
#' cell line
#' @keywords internal
performGSAperCellLine <- function(cellLine, perturbations, pathways) {
perturbation <- perturbations[
, tolower(attr(perturbations, "cellLines")) == tolower(cellLine)]
Expand Down Expand Up @@ -181,14 +189,16 @@ performGSAperCellLine <- function(cellLine, perturbations, pathways) {
#' where the name of the vector are gene names and the values are a statistic
#' that represents significance and magnitude of differentially expressed
#' genes (e.g. t-statistics)
#' @param geneSize Number: top and bottom differentially expressed genes to use
#' for gene set enrichment (GSE); if \code{method} is not \code{gsea}, this
#' argument does nothing
#' @param perturbations \code{l1000perturbations} object: file with L1000 loaded
#' perturbations (check \code{\link{loadL1000perturbations}})
#' @param cellLine Character: cell line(s)
#' @param method Character: comparison method (\code{spearman}, \code{pearson}
#' or \code{gsea})
#' @param geneSize Number: top and bottom differentially expressed genes to use
#' for gene set enrichment (GSE) (only used if \code{method} is \code{gsea})
#' @param pAdjustMethod Character: method for p-value adjustment (for more
#' details, see \code{\link{p.adjust.methods}}; only used if \code{method} is
#' \code{spearman} or \code{pearson})
#'
#' @importFrom data.table setkeyv
#' @importFrom piano loadGSC
Expand Down Expand Up @@ -216,12 +226,12 @@ performGSAperCellLine <- function(cellLine, perturbations, pathways) {
#' compareAgainstL1000(diffExprStat, perturbations, cellLine, method="gsea")
compareAgainstL1000 <- function(diffExprGenes, perturbations, cellLine,
method=c("spearman", "pearson", "gsea"),
geneSize=150) {
geneSize=150, pAdjustMethod="BH") {
method <- match.arg(method)
if (method %in% c("spearman", "pearson")) {
cellLineRes <- lapply(cellLine, correlatePerCellLine,
diffExprGenes, perturbations, method)
colnameSuffix <- sprintf("_t_%s_coef", method)
cellLineRes <- lapply(cellLine, correlatePerCellLine, diffExprGenes,
perturbations, method, pAdjustMethod)
colnameSuffix <- sprintf("_%s_coef", method)
} else if (method == "gsea") {
ordered <- order(diffExprGenes)
topGenes <- names(diffExprGenes)[head(ordered, geneSize)]
Expand Down Expand Up @@ -296,7 +306,6 @@ filterL1000metadata <- function(metadata, cellLine=NULL, timepoint=NULL,

#' Load L1000 perturbation data
#'
#' @inheritParams downloadL1000metadata
#' @param metadata Data frame: L1000 Metadata
#' @param zscores Data frame: GCTX z-scores
#' @param geneInfo Data frame: L1000 gene info
Expand All @@ -309,10 +318,11 @@ filterL1000metadata <- function(metadata, cellLine=NULL, timepoint=NULL,
#' @export
#' @examples
#' if (interactive()) {
#' metadata <- downloadL1000metadata("l1000metadata.txt")
#' metadata <- downloadL1000data("l1000metadata.txt", "metadata")
#' metadata <- filterL1000metadata(metadata, cellLine="HepG2")
#' zscores <- downloadL1000zscores("l1000zscores.gctx", metadata$sig_id)
#' geneInfo <- downloadL1000geneInfo("l1000geneInfo.txt")
#' zscores <- downloadL1000data("l1000zscores.gctx", "zscores",
#' metadata$sig_id)
#' geneInfo <- downloadL1000data("l1000geneInfo.txt", "geneInfo")
#' loadL1000perturbations(metadata, zscores, geneInfo)
#' }
loadL1000perturbations <- function(metadata, zscores, geneInfo,
Expand Down
7 changes: 7 additions & 0 deletions R/cTRAP-package.r
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ NULL
#'
#' @name counts
#' @docType data
#' @keywords internal
NULL

#' Differential expression's t-statistics sample
Expand Down Expand Up @@ -100,6 +101,7 @@ NULL
#'
#' @name diffExprStat
#' @docType data
#' @keywords internal
NULL

#' ENCODE metadata sample
Expand All @@ -115,6 +117,7 @@ NULL
#'
#' @name ENCODEmetadata
#' @docType data
#' @keywords internal
NULL

#' L1000 metadata
Expand All @@ -130,6 +133,7 @@ NULL
#'
#' @name l1000metadata
#' @docType data
#' @keywords internal
NULL

#' L1000 perturbations sample for knockdown experiments
Expand Down Expand Up @@ -170,6 +174,7 @@ NULL
#'
#' @name l1000perturbationsKnockdown
#' @docType data
#' @keywords internal
NULL

#' L1000 perturbations sample for small molecules
Expand Down Expand Up @@ -213,6 +218,7 @@ NULL
#'
#' @name l1000perturbationsSmallMolecules
#' @docType data
#' @keywords internal
NULL

#' Sample of ENCODE samples
Expand Down Expand Up @@ -240,4 +246,5 @@ NULL
#'
#' @name ENCODEsamples
#' @docType data
#' @keywords internal
NULL
11 changes: 7 additions & 4 deletions R/cmapR_subset.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' @seealso \url{http://clue.io/help} for more information on the GCT format
#'
#' @source https://github.com/cmap/cmapR
#' @keywords internal
setClass("GCT", representation(
mat = "matrix", rid = "character", cid = "character", rdesc = "data.frame",
cdesc = "data.frame", version = "character", src = "character"))
Expand All @@ -37,11 +38,10 @@ setClass("GCT", representation(
#' @details This is a low-level helper function
#' which most users will not need to access directly
#'
#' @return meta the same data frame with (potentially) adjusted
#' column types.
#' @return meta the same data frame with (potentially) adjusted column types
#' @keywords internal
#'
#' @family GCTX parsing functions
#' @keywords internal
#'
#' @source https://github.com/cmap/cmapR
fix.datatypes <- function(meta) {
Expand Down Expand Up @@ -92,6 +92,7 @@ fix.datatypes <- function(meta) {
#' @importFrom rhdf5 h5read
#'
#' @return a \code{data.frame} of metadata
#' @keywords internal
#'
#' @source https://github.com/cmap/cmapR
#'
Expand Down Expand Up @@ -143,6 +144,7 @@ readGctxMeta <- function(gctx_path, dimension="row", ids=NULL,
#' @param dimension which ids to read (row or column)
#'
#' @return a character vector of row or column ids from the provided file
#' @keywords internal
#'
#' @source https://github.com/cmap/cmapR
#'
Expand Down Expand Up @@ -410,6 +412,7 @@ setMethod("initialize", signature = "GCT", definition = function(
#' @importFrom utils packageVersion
#'
#' @return Closes all open identifiers
#' @keywords internal
closeOpenHandles <- function() {
if(packageVersion('rhdf5') < "2.23.0")
rhdf5::H5close()
Expand All @@ -428,6 +431,7 @@ closeOpenHandles <- function() {
#' columns of \code{df}
#'
#' @source https://github.com/cmap/cmapR
#' @keywords internal
checkColnames <- function(test_names, df, throw_error=TRUE) {
# check whether test_names are valid names in df
# throw error if specified
Expand All @@ -451,7 +455,6 @@ checkColnames <- function(test_names, df, throw_error=TRUE) {
#' @return a subset version of \code{df}
#'
#' @source https://github.com/cmap/cmapR
#'
#' @keywords internal
subsetToIds <- function(df, ids) {
# helper function to do a robust df subset
Expand Down
1 change: 1 addition & 0 deletions R/plots.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#' @importFrom stats na.omit
#'
#' @return Grid of plots illustrating a GSEA plot
#' @keywords internal
plotGSEA <- function(pathways, stats, title="GSEA plot") {
# Custom
axis_title_size <- 12
Expand Down
Loading

0 comments on commit ecb24eb

Please sign in to comment.