Skip to content

Commit

Permalink
Allows CDS (as well as gene) features to create new gene references f…
Browse files Browse the repository at this point in the history
…iles #55
  • Loading branch information
j23414 authored Mar 12, 2024
2 parents b9f4f4f + 8cd6a13 commit 1df2377
Showing 1 changed file with 10 additions and 1 deletion.
11 changes: 10 additions & 1 deletion scripts/newreference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,27 @@
from Bio.SeqFeature import SeqFeature, FeatureLocation, Seq
import shutil
import argparse
import sys

def new_reference(referencefile, outgenbank, outfasta, gene):
ref = SeqIO.read(referencefile, "genbank")
startofgene = None
endofgene = None
for feature in ref.features:
if feature.type == 'source':
ref_source_feature = feature
if feature.type =='gene':
if feature.type =='gene' or feature.type == 'CDS':
a = list(feature.qualifiers.items())[0][-1][0]
if a == gene:
startofgene = int(list(feature.location)[0])
endofgene = int(list(feature.location)[-1])+1

# If user provides a --gene 'some name' is not found, print a warning and use the entire genome.
# Otherwise do not print a warning.
if(gene is not None and startofgene is None and endofgene is None):
print(f"ERROR: No '{gene}' was found under 'gene' or 'CDS' features in the GenBank file.", file=sys.stderr)
sys.exit(1)

record = ref[startofgene:endofgene]
source_feature = SeqFeature(FeatureLocation(start=0, end=len(record)), type='source',
qualifiers=ref_source_feature.qualifiers)
Expand Down

0 comments on commit 1df2377

Please sign in to comment.