forked from mpg-age-bioinformatics/htseq-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getChromosome
executable file
·47 lines (43 loc) · 1.57 KB
/
getChromosome
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python
import os
import argparse
import sys
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--multifasta", help="multifasta file")
parser.add_argument("-o", "--output_folder", help="Output folder")
parser.add_argument("-c", "--chromosome", help="Graphics label")
args = parser.parse_args()
multifasta=os.path.realpath(str(args.multifasta))
if not os.path.exists(str(args.output_folder)):
os.makedirs(str(args.output_folder))
output=os.path.realpath(str(args.output_folder))+"/"
c=str(args.chromosome)
def write_fasta(chrom,fasta_in,output_folder):
with open(fasta_in, 'r+') as f:
sys.stdout.flush()
lines = f.readlines()
for i in range(0, len(lines)):
line = lines[i]
if line[0] == ">":
print line
sys.stdout.flush()
fChr=line.split(" ")[0]
fChr=fChr[1:]
if fChr == chrom:
print line
sys.stdout.flush()
seq=str("")
s=i
code=['N','A','C','T','G']
firstbase=lines[s+1][0]
f2=open(output_folder+chrom+".fa",'w')
f2.write(line)
while firstbase in code:
s=s + 1
seq=seq+lines[s]
firstbase=lines[s+1][0]
f2.write(seq)
print seq
sys.stdout.flush()
f2.close
write_fasta(c,multifasta,output)