forked from Katsevich-Lab/sceptre-manuscript
-
Notifications
You must be signed in to change notification settings - Fork 1
/
load_data_for_plotting.R
85 lines (71 loc) · 6.63 KB
/
load_data_for_plotting.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# offsite_dir <- if (is.na(args[2])) "/Volumes/tims_new_drive/research/sceptre_files/" else args[2]
offsite_dir <- .get_config_path("LOCAL_SCEPTRE_DATA_DIR")
manuscript_figure_dir <- paste0(code_dir, "/sceptre_paper/manuscript/figures")
# Gasperini results
original_results_gasp = paste0(offsite_dir, "/data/gasperini/processed/original_results.fst") %>% read.fst()
resampling_results_gasp = read.fst(sprintf("%s/results/gasperini/sceptre/resampling_results.fst", offsite_dir)) %>% as_tibble()
likelihood_results_gasp = read.fst(sprintf("%s/results/gasperini/negative_binomial/likelihood_results.fst", offsite_dir)) %>% as_tibble()
covariates_gasp = read.fst(sprintf("%s/data/gasperini/processed/cell_covariate_model_matrix.fst", offsite_dir)) %>% as_tibble()
# Xie results: 4 methods: original, resampling_results (sceptre), likelihood results (iNB), and monocle_nb
gRNA.gene.pair = read.fst(sprintf("%s/data/xie/processed/gRNA_gene_pairs.fst", offsite_dir)) %>% as_tibble()
original_results_xie = readRDS(sprintf("%s/data/xie/processed/raw_pval_xie.rds", offsite_dir)) %>% as_tibble()
resampling_results_xie_with_names = read.fst(sprintf("%s/results/xie/sceptre/all_results_annotated.fst", offsite_dir)) %>% as_tibble()
likelihood_results_xie = read.fst(sprintf("%s/results/xie/negative_binomial/all_results.fst", offsite_dir)) %>% as_tibble()
p_vals_bulk <- paste0(offsite_dir, "/results/xie/bulk_rna_seq/pvals_arl15_enh.rds") %>% readRDS() %>% as_tibble() %>% rename(gene_names = gene_id)
p_vals_bulk_myb3_enh3 <- paste0(offsite_dir, "/results/xie/bulk_rna_seq/pvals_myb_enh3.rds") %>% readRDS() %>% as_tibble() %>% rename(gene_names = gene_id)
covariates_xie = read.fst(sprintf("%s/data/xie/analysis_ready/covariate_model_matrix.fst", offsite_dir)) %>% as_tibble()
grna_indicator_matrix_xie = read.fst(sprintf("%s/data/xie/analysis_ready/gRNA_indicator_matrix.fst", offsite_dir)) %>% as_tibble()
ss_xie_cis = readRDS(sprintf("%s/data/xie/processed/ss_xie_cis.rds", offsite_dir)) %>% as_tibble()
resampling_results_xie_cis = resampling_results_xie_with_names %>% filter(type == 'cis')
gene.mart = readRDS(sprintf("%s/data/xie/processed/gene_mart.rds", offsite_dir))
gRNA.mart = readRDS(sprintf("%s/data/xie/processed/gRNA_mart.rds", offsite_dir))
gene.ensembl.id <- lapply(strsplit(as.character(resampling_results_xie_cis$gene_id), '[.]'), function(x){x[1]}) %>% unlist
resampling_results_xie_cis = cbind(resampling_results_xie_cis,
gene.mart[match(gene.ensembl.id, gene.mart$ensembl_gene_id),
c('hgnc_symbol', 'chr', 'strand', 'start_position', 'end_position')])
resampling_results_xie_cis <- resampling_results_xie_cis %>% dplyr::rename(gene_short_name = hgnc_symbol,
target_gene.start = start_position,
target_gene.stop = end_position)
resampling_results_xie_cis <- resampling_results_xie_cis %>% mutate(TSS = ifelse(strand > 0, target_gene.start, target_gene.stop))
resampling_results_xie_cis = cbind(resampling_results_xie_cis, gRNA.mart[match(resampling_results_xie_cis$gRNA_id, gRNA.mart$gRNA_id),
c('start_position', 'end_position', 'mid_position')])
resampling_results_xie_cis <- resampling_results_xie_cis %>% dplyr::rename(target_site.start = start_position,
target_site.stop = end_position,
target_site.mid = mid_position)
# need to change
monocle_results_xie = read.fst(sprintf("%s/results/xie/negative_binomial/monocle_results_annotated.fst", offsite_dir))
nb_results_xie = read.fst(sprintf("%s/results/xie/negative_binomial/all_results_annotated.fst", offsite_dir))
monocle_results_xie_cis = monocle_results_xie %>% filter(type == 'cis')
nb_results_xie_cis = nb_results_xie %>% filter(type == 'cis')
monocle_results_xie_cis <- left_join(monocle_results_xie_cis, resampling_results_xie_cis[, c('gene_id', 'gRNA_id', 'gene_short_name', 'chr', 'strand', 'target_gene.start',
'target_gene.stop', 'TSS', 'target_site.start', 'target_site.stop',
'target_site.mid')],
by = c("gene_id", "gRNA_id"))
nb_results_xie_cis <- left_join(nb_results_xie_cis, resampling_results_xie_cis[, c('gene_id', 'gRNA_id', 'gene_short_name', 'chr', 'strand', 'target_gene.start',
'target_gene.stop', 'TSS', 'target_site.start', 'target_site.stop',
'target_site.mid')],
by = c("gene_id", "gRNA_id"))
# simulation results
simulation_results = read.fst(sprintf("%s/results/simulations/all_results.fst", offsite_dir)) %>% as_tibble()
# enrichment results Gasperini
rejected_pairs_HIC_gasp = read_tsv(sprintf("%s/results/gasperini/enrichment/rejected_pairs_HIC.tsv", offsite_dir),
col_types = "cciiiciillliid")
TF_enrichments_gasp = read_tsv(sprintf("%s/results/gasperini/enrichment/TF_enrichments.tsv", offsite_dir),
col_types = "ccd")
paired_fractions_gasp = read_tsv(sprintf("%s/results/gasperini/enrichment/TF_paired_enhancer_fractions.tsv", offsite_dir),
col_types = "ciddddddd")
# enrichment results Xie
rejected_pairs_HIC_xie <- read_tsv(sprintf("%s/results/xie/enrichment/rejected_pairs_HIC.tsv", offsite_dir),
col_types = "cciiiciilllliid") %>% as_tibble()
TF_enrichments_xie = read_tsv(sprintf("%s/results/xie/enrichment/TF_enrichments.tsv", offsite_dir),
col_types = "ccddd") %>% as_tibble()
paired_fractions_xie = read_tsv(sprintf("%s/results/xie/enrichment/TF_paired_enhancer_fractions.tsv", offsite_dir),
col_types = "cidddddddddd") %>% as_tibble()
# dispersion coefficients alpha_0 and alpha_1 from equation (6) in DESeq2 paper
disp_coeffs = as.numeric(readRDS(sprintf("%s/data/gasperini/processed/disp_coefficients.rds", offsite_dir)))
# dispersion table (mean expressions and dispersion estimates for each gene)
disp_table = read.fst(sprintf("%s/data/gasperini/processed/disp_table.fst", offsite_dir)) %>% as_tibble()
# additional items
ci <- 0.95
plot_colors <- c(gasperini_nb = "firebrick3", hf_nb = "hotpink2", fixed_dispersion_nb = "darkslategray4", sceptre = "royalblue4", hypergeometric = "darkslategray4", scMAGeCK = "magenta4")
subsampling_factor <- 250