This repository has been archived by the owner on Sep 4, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
5_edge_overlap.R
150 lines (112 loc) · 5.2 KB
/
5_edge_overlap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# This script is to calculate edges overlap between
# four tissu GRN and the PlantRegMap. These numbers are used for Venn diagram.
# Save top 1 million edges name from four tissues,
# and all edges name from platRegMap.
# Also, in Part II, it compared tissue GRN with Briggs atlas RNA/protein GRNs.
# File in: 1. Four top 1 million edges:
# (ll_leaf, ll_root, ll_sam, ll_seed)
# 2. Network file from PlantRegMap. plantReg
# 3. Three Briggs GRNs (part II)
# File out: 1. Top 1 million edges, each line as string.
# edge_top1M_five_ntwk.RData: edge_leaf, edge_root, edge_sam,
# edge_seed, edge_plantReg.
# 2. A table for edges shared by four tissue GRNs:
# overlap_four_2679.txt
# 3. Other numbers were recorded after run script.
################################################################################
# Load packages and data.
setwd("~/projects/NTWK/tissue_ntwk/")
load("data/right_seed/link_list_four_tissue_1million.RData")
library(tidyverse)
library(stringr)
# Read network file from PlantRegMap
plantReg <- read_tsv("regulation_merged_Zma.txt",
col_names = c("regulator", "direction",
"target", "species", "type"))
head(plantReg)
################################################################################
# PART 1. CALCULATE EDGES OVERLAPS
################################################################################
# Numbers were used to draw Venn diagram.
# combine regulator and target as edge
edge_plantReg <- plantReg %>%
mutate(name = paste(regulator, target, sep = "+")) %>%
arrange(name) %>%
select(name)
# concatenate regulator with target, so each $name is a edge
edge_leaf <- ll_leaf %>%
mutate(name = paste(regulatory.gene, target.gene, sep = "+")) %>%
arrange(name) %>%
select(name)
edge_root <- ll_root %>%
mutate(name = paste(regulatory.gene, target.gene, sep = "+")) %>%
arrange(name) %>%
select(name)
edge_seed <- ll_seed %>%
mutate(name = paste(regulatory.gene, target.gene, sep = "+")) %>%
arrange(name) %>%
select(name)
edge_sam <- ll_sam %>%
mutate(name = paste(regulatory.gene, target.gene, sep = "+")) %>%
arrange(name) %>%
select(name)
## save these five edges
save(edge_plantReg, edge_leaf, edge_root, edge_seed, edge_sam,
file = "data/right_seed/edge_top1M_five_ntwk.RData")
# Calculate each categories value to draw venn diagram.
load("data/right_seed/edge_top1M_five_ntwk.RData")
# Two overlap
leaf_root <- intersect(edge_leaf$name, edge_root$name)
leaf_sam <- intersect(edge_leaf$name, edge_sam$name)
leaf_seed <- intersect(edge_leaf$name, edge_seed$name)
root_seed <- intersect(edge_root$name, edge_seed$name)
root_sam <- intersect(edge_root$name, edge_sam$name)
sam_seed <- intersect(edge_sam$name, edge_seed$name)
# Three overlap
leaf_root_sam <- Reduce(intersect, list(edge_leaf$name,
edge_root$name, edge_sam$name))
leaf_root_seed <- Reduce(intersect, list(edge_leaf$name,
edge_root$name, edge_seed$name))
leaf_seed_sam <- Reduce(intersect, list(edge_leaf$name,
edge_seed$name, edge_sam$name))
root_seed_sam <- Reduce(intersect, list(edge_root$name,
edge_seed$name, edge_sam$name))
# Four overlap
four_overlap <- Reduce(intersect, list(edge_leaf$name, edge_root$name,
edge_seed$name, edge_sam$name))
write.table(four_overlap, file = "results/overlap_four_2679.txt",
quote = F, sep = "\t", row.names = F)
###############################################################################
# PART 2. COMPARE WITH BRIGGS TOP 1MILLION NETWORKS
###############################################################################
# Briggs networks were downloaded from https://goo.gl/9YYgBX.
# 1. Read briggs networks. 1 million networks
brig_protein <- read_tsv(file = "data/briggs/briggs_protein_only.txt")
brig_rna <- read_tsv(file = "data/briggs/briggs_rna_only.txt")
brig_three <- read_tsv(file = "data/briggs/briggs_protein+rna+phospho.txt")
# 2. Get all edges
edge_brig_protein <- brig_protein %>%
mutate(name = paste(regulator, target, sep = "+")) %>%
arrange(name) %>%
select(name)
edge_brig_rna <- brig_rna %>%
mutate(name = paste(regulator, target, sep = "+")) %>%
arrange(name) %>%
select(name)
edge_brig_three <- brig_three %>%
mutate(name = paste(regulator, target, sep = "+")) %>%
arrange(name) %>%
select(name)
# Overlap Briggs protein only with RNA only.
length(intersect(edge_brig_protein$name, edge_brig_rna$name)) # 48574
# Overlap Briggs with my leaf GRN.
length(intersect(edge_leaf$name, edge_brig_protein$name)) # 18314
length(intersect(edge_leaf$name, edge_brig_rna$name)) # 29040
length(intersect(edge_leaf$name, edge_brig_three$name)) # 19004
# Calculate how many TFs in Brigs are also in my leaf GRN.
brig_protein_leaf <- intersect(ll_leaf$regulatory.gene, brig_protein$regulator)
brig_rna_leaf <- intersect(ll_leaf$regulatory.gene, brig_rna$regulator)
brig_three_leaf <- intersect(ll_leaf$regulatory.gene, brig_three$regulator)
# Clean everything
rm(list = ls())
gc()