fig_5B.Rmd

---
title: "Fig_5B"
author: "Max Schubert"
date: "6/18/2019"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)

library(data.table)
library(plyr)
library(gridExtra)
require(ggplot2)
require(data.table)
require(scales)
#library(ggpubr)
#library(plotly)
library(ggrepel)
library(R.utils)
library(RColorBrewer) #install.packages("RColorBrewer")

parent_folder = here::here('fig_5B')
source(here::here('constants.R'))

#set global variables for shifting plotting:
ori_pos = 3900000
ter_pos = 1700000
shift_over_value = 1e6
genome_size = 4641652

```

# data for enrichment of alleles, 
```{r get_enriched_SNP_data}
SNP_table = "a3a4.csv"

SNP_path = paste(parent_folder, SNP_table, sep = "/")
SNP_file_dt = data.table(fread(SNP_path))


SNP_file_dt = Filter(function(x)!all(is.na(x)), SNP_file_dt) #kills columns with all NA
SNP_file_trimmed_dt = SNP_file_dt[,c( "UID", "POSITION",
                                        "REF", "ALT",
                                        "EXPERIMENT_SAMPLE_LABEL", "DP",
                                        "INFO_EFF_CONTEXT","INFO_EFF_EFFECT",
                                        "INFO_EFF_AA","INFO_EFF_GT",
                                        "INFO_EFF_CLASS", "INFO_EFF_GENE")] #keeps only specified columns

```

#unzip coverage files
```{r unzip coverage files}
gunzip_dp <- function(x){
  dp_name = paste(parent_folder, '/', 's',
               as.character(x), '_depths.gz', sep = "")
  #print(dp_name)
  gunzip(dp_name, remove = FALSE)
}

gunzip_dp(4)
```


```{r before_coverage_data}
#note, could add more samples later
samplenum = 's4'
#depthspath <- "/Users/Max_Schubert/dropbox_hms/active_seq_reads/NovNextseq_again/FC_04398/Unaligned_1234_PF_mm1/Data/bwa_sam_bam_depths/"
path_cov <- paste(parent_folder,"/",samplenum, "_depths", sep = "")
cov <- fread(path_cov)
names(cov) <- c('chr','pos','count')

#slice into windows, label window with middle base of window
window_size <- 1000
cov[, window := cut(pos,
                    breaks=c(seq(0, max(pos), window_size),
                             max(pos)),
                    labels=c(seq(window_size/2, max(pos)-window_size/2, window_size),
                             max(pos)-window_size/2))]

#calculate average depth per window
#wcov <- cov[, window_dp_mean := mean(count), by=window]
wcov_new <- cov[, list(mean_window_count=mean(count)), by=window]


wcov_new[, window := as.numeric(window)*window_size] #change to number
#wcov_new[, position_modulus := (window+shift_over_value)%%(genome_size - shift_over_value)]

#calculate coverage depth as a factor relative to the mean depth
wcov_new[, dp_rel_mean := mean_window_count/mean(mean_window_count)]
wcov_new[, dp_rel_median := mean_window_count/median(mean_window_count)]
# plot coverage at windows
#wcov <- wcov[count > 1000]
SNP_file_trimmed_dt[, dp_rel_mean_log := log10(DP/mean(DP)), by='EXPERIMENT_SAMPLE_LABEL']
SNP_file_trimmed_dt[, dp_rel_mean_log := DP/mean(DP), by='EXPERIMENT_SAMPLE_LABEL']
```

```{r calc_relative_depth_of_snps}
#calculating SNP depth relative to maximum
SNP_file_trimmed_dt[, dp_rel_max := DP/max(DP), by='EXPERIMENT_SAMPLE_LABEL']
#average replicates into new column
SNP_file_trimmed_dt[, mean_dp_rel_max := mean(dp_rel_max), by = UID]
  
#calculating coverage depth relative to maximum
wcov_new[, dp_rel_max := mean_window_count/max(mean_window_count)]

```

# figure 5B
### 5B setup
```{r 5B_setup}
#calculating SNP depth relative to maximum
SNP_file_trimmed_dt[, dp_rel := DP/mean(DP), by='EXPERIMENT_SAMPLE_LABEL']
SNP_file_trimmed_dt[, dp_rel_max_mean := dp_rel/max(dp_rel), by='EXPERIMENT_SAMPLE_LABEL']
#average replicates into new column
SNP_file_trimmed_dt[, mean_dp_rel_max := mean(dp_rel_max), by = UID]


colors <- replicate_cols <- brewer.pal(n=9, name='YlGnBu')[c(5,8)]
y_offset = 0.02
genome_x = 4640000
total_x = genome_x + 170000
x_offset = 7000
genome_breaks = c(0.01E6,1E6, 2E6, 3E6, 4E6, genome_x)
genome_labels = c("", "1 Mb", "2 Mb", "3 Mb", "4 Mb","4.6 Mb / 0")
tick_dt = data.table(x_vals = genome_breaks, lab=genome_labels)
```

```{r execute_fig}
all_polar_plot <- ggplot() +
  
  # white box containing data
  annotate("rect", ymin=0, ymax=1-y_offset*2, xmin=1, xmax=total_x-1, fill='grey90', size=0.25) +
  annotate("rect", ymin=1-y_offset*3, ymax=2, xmin=1, xmax=genome_x, fill=NA, color='grey50', size=0.25) +
  
  #data for "before", offset by one to create a circle
  geom_freqpoly(data = wcov_new, stat = "identity",
              aes(y= dp_rel_max + 1 , x=window, color = "Library coverage, before treatment"), color='black', alpha = 0.4, size=0.25) +

  #data for "after", offset by one to create a circle
  geom_segment(aes(y=1, yend=1, x=1, xend=genome_x), color=colors[1], size=1, alpha=0.8) +
  geom_segment(aes(y=1+y_offset, yend=1+y_offset, x=1, xend=genome_x), color=colors[2], size=1, alpha=0.8) +
  geom_linerange(data = SNP_file_trimmed_dt,
    aes(
        x=POSITION+x_offset*(EXPERIMENT_SAMPLE_LABEL == '291_303'), 
        ymin=1+y_offset*(EXPERIMENT_SAMPLE_LABEL == '291_303'), 
        ymax = 1+dp_rel_max_mean, color=EXPERIMENT_SAMPLE_LABEL),
        stat = "identity", size = 0.25) +
    theme(text = element_text(size=18)) +
    theme_minimal() +
    labs( y = "",
          x = "") +
  scale_x_continuous(limits=c(0,total_x),
                    breaks = genome_breaks,
                     labels= genome_labels) +
  scale_y_continuous(limits=c(0,2+y_offset),
                     expand=c(0,0),
                     breaks = c(1, 2),
                     labels = c('', '')) +
  scale_color_manual(values=colors) +
  
  # grey ticks and axis labels for y axis inside circle
  geom_segment(data=data.table(y_vals = 1+seq(0,1,0.2), x_vals=total_x), 
    aes(y=y_vals, yend=y_vals, x=x_vals-25000, xend=x_vals-1), color='grey50') +
  geom_text(data=data.table(y_vals = 1+seq(0,1,0.2), x_vals=total_x, lab=c(paste0(seq(0,80,20),'%'),'Max')), 
    aes(y=y_vals, x=x_vals-20000, label=lab), color='black', size=1.7, hjust=1.1) +

  # label for genome positions
  geom_text(data=tick_dt, 
    aes(y=0.8, x=x_vals, label=lab), color='black', size=1.7) +
  geom_linerange(data=tick_dt, aes(ymin=0.88, ymax=.95, x=x_vals), color='grey50') + 
    
  theme(
    #strip.background=element_blank(), strip.placement='outside',
    strip.text=element_text(color='black'), 
    axis.title.x=element_text(margin = margin(1.2*fig_font_size,0,0,0, "pt")), 
    panel.border=element_blank(),
    axis.text.x=element_blank(),
    axis.ticks=element_blank(),
    panel.grid.minor.x=element_blank(),
    panel.grid.minor.y=element_blank(),
    panel.grid.major.y=element_blank(),
    axis.line=element_blank(), #+#+#,
    legend.position = 'none') + 
  coord_polar()

all_polar_plot
```
```{r save_fig}
ggsave('fig_5B_out.pdf', plot=all_polar_plot, width=5, height=5.5, units='in', dpi=300, useDingbats = FALSE)

```


#optional: get rid of unzipped dp files
```{r delete_unzipped_tlen_files}
remove_dp <- function(x){
  dp_name = paste(parent_folder,'/','s', as.character(x),
                  '_depths', sep = "")
  file.remove(dp_name)
}

remove_dp(4)
```