Skip to content

Commit

Permalink
fix issue #26
Browse files Browse the repository at this point in the history
  • Loading branch information
kepbod committed Dec 31, 2018
1 parent de3102d commit 0aa723f
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 172 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 2.3.5 (2018-12-30)

Improvements:
Using the temporary file to allow to run CIRCexplorer2 in the same directory simultaneously (issue #26). Thanks @egaffo. Happy New Year!

## 2.3.4 (2018-10-26)

Bugfixes:
Expand Down
169 changes: 86 additions & 83 deletions circ2/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .parser import parse_ref, parse_bed, check_fasta
from .helper import logger, map_fusion_to_iso, fix_bed, generate_bed
from collections import defaultdict
import tempfile

__author__ = [
'Xiao-Ou Zhang ([email protected])',
Expand All @@ -30,15 +31,17 @@

@logger
def annotate(options):
# create temporary annotated fusion file
fusion_tmp = tempfile.TemporaryFile(mode='w+')
# annotate fusion junctions
annotate_fusion(options['--ref'], options['--bed'],
annotate_fusion(options['--ref'], options['--bed'], fusion_tmp,
secondary_flag=options['--low-confidence'])
# fix fusion juncrions
fix_fusion(options['--ref'], options['--genome'], options['--output'],
fix_fusion(options['--ref'], options['--genome'], fusion_tmp, options['--output'],
options['--no-fix'], secondary_flag=options['--low-confidence'])


def annotate_fusion(ref_f, junc_bed, secondary_flag=0, denovo_flag=0):
def annotate_fusion(ref_f, junc_bed, fusion_tmp, secondary_flag=0, denovo_flag=0):
"""
Align fusion juncrions to gene annotations
"""
Expand All @@ -48,96 +51,96 @@ def annotate_fusion(ref_f, junc_bed, secondary_flag=0, denovo_flag=0):
fusion_bed = junc_bed
fusions, fusion_index = parse_bed(fusion_bed) # fusion junctions
total = set()
annotated_fusion_f = 'annotated_fusion.txt.tmp'
with open(annotated_fusion_f, 'w') as outf:
for chrom in chrom_info:
# overlap gene annotations with fusion juncrions
result = []
# overlap genes
if chrom in genes:
result += Interval.overlapwith(genes[chrom].interval,
fusions[chrom])
# overlap novel genes in denovo mode
if denovo_flag and chrom in novel_genes:
result += Interval.overlapwith(novel_genes[chrom].interval,
fusions[chrom])
for itl in result:
# extract gene annotations
iso = list([x for x in itl[2:] if x.startswith('iso')])
# for each overlapped fusion junction
for fus in itl[(2 + len(iso)):]:
reads = fus.split()[1]
fus_start, fus_end = fusion_index[fus]
fus_loc = '%s\t%d\t%d\tFUSIONJUNC/%s' % (chrom, fus_start,
fus_end, reads)
edge_annotations = [] # first or last exon flag
secondary_exon = defaultdict(dict) # secondary exons
annotate_flag = 0
for iso_id in iso:
g, i, c, s = iso_id.split()[1:]
start = gene_info[iso_id][0][0]
end = gene_info[iso_id][-1][-1]
# fusion junction excesses boundaries of gene
# annotation
if fus_start < start - 10 or fus_end > end + 10:
if not secondary_flag:
continue
(fusion_info,
index,
edge,
secondary) = map_fusion_to_iso(fus_start,
fus_end, s,
gene_info[iso_id])
if fusion_info:
annotate_flag += 1
bed_info = '\t'.join([fus_loc, '0', s,
str(fus_start),
str(fus_start), '0,0,0'])
bed = '\t'.join([bed_info, fusion_info, g, i,
index])
if not edge: # not first or last exon
outf.write(bed + '\n')
total.add(fus)
else: # first or last exon
edge_annotations.append(bed)
elif secondary_flag and secondary is not None:
li, ri = secondary
gene = ':'.join([g, s])
if li is not None:
li = str(li)
secondary_exon['left'][gene] = ':'.join([i,
li])
if ri is not None:
ri = str(ri)
secondary_exon['right'][gene] = ':'.join([i,
ri])
if edge_annotations:
for bed in edge_annotations:
outf = fusion_tmp
for chrom in chrom_info:
# overlap gene annotations with fusion juncrions
result = []
# overlap genes
if chrom in genes:
result += Interval.overlapwith(genes[chrom].interval,
fusions[chrom])
# overlap novel genes in denovo mode
if denovo_flag and chrom in novel_genes:
result += Interval.overlapwith(novel_genes[chrom].interval,
fusions[chrom])
for itl in result:
# extract gene annotations
iso = list([x for x in itl[2:] if x.startswith('iso')])
# for each overlapped fusion junction
for fus in itl[(2 + len(iso)):]:
reads = fus.split()[1]
fus_start, fus_end = fusion_index[fus]
fus_loc = '%s\t%d\t%d\tFUSIONJUNC/%s' % (chrom, fus_start,
fus_end, reads)
edge_annotations = [] # first or last exon flag
secondary_exon = defaultdict(dict) # secondary exons
annotate_flag = 0
for iso_id in iso:
g, i, c, s = iso_id.split()[1:]
start = gene_info[iso_id][0][0]
end = gene_info[iso_id][-1][-1]
# fusion junction excesses boundaries of gene
# annotation
if fus_start < start - 10 or fus_end > end + 10:
if not secondary_flag:
continue
(fusion_info,
index,
edge,
secondary) = map_fusion_to_iso(fus_start,
fus_end, s,
gene_info[iso_id])
if fusion_info:
annotate_flag += 1
bed_info = '\t'.join([fus_loc, '0', s,
str(fus_start),
str(fus_start), '0,0,0'])
bed = '\t'.join([bed_info, fusion_info, g, i,
index])
if not edge: # not first or last exon
outf.write(bed + '\n')
total.add(fus)
if secondary_flag and not annotate_flag:
for gene in secondary_exon['left']:
if gene in secondary_exon['right']:
left = secondary_exon['left'][gene]
right = secondary_exon['right'][gene]
g, s = gene.split(':')
# for avoid dup, use fus_loc_new
fus_loc_new = fus_loc + '\t0\t%s' % s
outf.write('%s\t%s:%s\t%s:%s\n' % (fus_loc_new,
g, left, g,
right))
total.add(fus)
else: # first or last exon
edge_annotations.append(bed)
elif secondary_flag and secondary is not None:
li, ri = secondary
gene = ':'.join([g, s])
if li is not None:
li = str(li)
secondary_exon['left'][gene] = ':'.join([i,
li])
if ri is not None:
ri = str(ri)
secondary_exon['right'][gene] = ':'.join([i,
ri])
if edge_annotations:
for bed in edge_annotations:
outf.write(bed + '\n')
total.add(fus)
if secondary_flag and not annotate_flag:
for gene in secondary_exon['left']:
if gene in secondary_exon['right']:
left = secondary_exon['left'][gene]
right = secondary_exon['right'][gene]
g, s = gene.split(':')
# for avoid dup, use fus_loc_new
fus_loc_new = fus_loc + '\t0\t%s' % s
outf.write('%s\t%s:%s\t%s:%s\n' % (fus_loc_new,
g, left, g,
right))
print('Annotated %d fusion junctions!' % len(total))


def fix_fusion(ref_f, genome_fa, out_file, no_fix, secondary_flag=0,
def fix_fusion(ref_f, genome_fa, fusion_tmp, out_file, no_fix, secondary_flag=0,
denovo_flag=0):
"""
Realign fusion juncrions
"""
print('Start to fix fusion junctions...')
fa = check_fasta(genome_fa)
ref = parse_ref(ref_f, 2)
annotated_fusion_f = 'annotated_fusion.txt.tmp'
annotated_fusion_f = fusion_tmp
annotated_fusion_f.seek(0) ## from the start
fusions, fusion_names, fixed_flag = fix_bed(annotated_fusion_f, ref, fa,
no_fix, denovo_flag)
total = 0
Expand Down Expand Up @@ -216,6 +219,6 @@ def fix_fusion(ref_f, genome_fa, out_file, no_fix, secondary_flag=0,
if secondary_flag:
secondary_f.close()

os.remove('annotated_fusion.txt.tmp')
fusion_tmp.close()

print('Fixed %d fusion junctions!' % total)
7 changes: 5 additions & 2 deletions circ2/denovo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import pysam
from scipy.stats import fisher_exact, binom
from .genomic_interval import Interval
import tempfile

__author__ = [
'Xiao-Ou Zhang ([email protected])',
Expand Down Expand Up @@ -66,11 +67,13 @@ def denovo(options):
print('Warning: no cufflinks directory %s!' % options['--cuff'])
print('Please run CIRCexplorer2 assembly before this step!')
ref_path = options['--ref']
# create temporary annotated fusion file
fusion_tmp = tempfile.TemporaryFile(mode='w+')
# annotate fusion junctions
annotate_fusion(ref_path, options['--bed'], denovo_flag=1)
annotate_fusion(ref_path, options['--bed'], fusion_tmp, denovo_flag=1)
# fix fusion juncrions
out_f = '%s/circularRNA_full.txt' % denovo_dir
fix_fusion(ref_path, options['--genome'], out_f,
fix_fusion(ref_path, options['--genome'], fusion_tmp, out_f,
options['--no-fix'], denovo_flag=1)
# extract novel circRNAs
extract_novel_circ(denovo_dir, options['--ref'])
Expand Down
Loading

0 comments on commit 0aa723f

Please sign in to comment.