Skip to content

Commit

Permalink
small bugfixes (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonas-fuchs authored Oct 9, 2023
1 parent 78af9a2 commit 947d51c
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 10 deletions.
2 changes: 1 addition & 1 deletion bamdash/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""interactively visualize coverage and tracks"""
_program = "bamdash"
__version__ = "0.1"
__version__ = "0.1.1"
Binary file modified bamdash/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
26 changes: 18 additions & 8 deletions bamdash/scripts/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ def vcf_to_df(vcf_file, ref):
variant_dict["reference"].append(rec.ref)
variant_dict["mutation"].append(rec.alts[0])
# annotate the point mutation type
base_exchange = rec.ref + rec.alts[0]
base_exchange = f"{rec.ref}{rec.alts[0]}"
if len(base_exchange) == 2:
if base_exchange in ["AG", "GA", "CT", "TC"]:
variant_dict["point mutation type"] = "transition"
variant_dict["point mutation type"].append("TRANSITION")
else:
variant_dict["point mutation type"] = "transversion"
variant_dict["point mutation type"].append("TRANSVERSION")
else:
variant_dict["point mutation type"] = "none"
variant_dict["point mutation type"].append("NONE")

# get mutation type
if len(rec.alts[0]) > len(rec.ref):
Expand Down Expand Up @@ -289,6 +289,9 @@ def annotate_vcf_df(vcf_df, cds_dict, seq):
:return: annotated df
"""

# define the potential identifiers to look for
potential_cds_identifiers = ["protein_id", "gene", "locus_tag", "product"]

proteins, as_exchange, as_effect = [], [], []

for variant in vcf_df.iterrows():
Expand All @@ -297,22 +300,29 @@ def annotate_vcf_df(vcf_df, cds_dict, seq):
# check each cds for potential mutations
for cds in cds_dict:
# check if a protein identifier is present
if "protein_id" not in cds_dict[cds]:
if not any(identifier in potential_cds_identifiers for identifier in cds_dict[cds]):
continue
start, stop = cds_dict[cds]["start"], cds_dict[cds]["stop"]
# check if pos is in range
if pos not in range(start, stop):
continue
# now we know the protein
proteins_temp.append(cds_dict[cds]["protein_id"])
for identifier in potential_cds_identifiers:
if identifier in cds_dict[cds]:
proteins_temp.append(cds_dict[cds][identifier])
break
# at the moment only check SNPs
if mut_type != "SNP":
as_exchange_temp.append("AMBIG"), as_effect_temp.append(variant[1]["type"])
continue
# now we can analyse for a potential as exchange
strand, seq_mut, codon_start = cds_dict[cds]["strand"], str(seq), int(cds_dict[cds]["codon_start"])
if "codon_start" in cds_dict[cds]:
codon_start = int(cds_dict[cds]["codon_start"])
else:
codon_start = 1
strand, seq_mut = cds_dict[cds]["strand"], str(seq)
# mutate the sequence and get the CDS nt seq
seq_mut = Seq.Seq(seq_mut[:pos] + mut + seq_mut[pos+1:])
seq_mut = Seq.Seq(seq_mut[:pos-1] + mut + seq_mut[pos:])
seq_cds, seq_mut_cds = seq[start+codon_start-2:stop], seq_mut[start+codon_start-2:stop]
# translate strand depend
if strand == "+":
Expand Down
2 changes: 1 addition & 1 deletion bamdash/scripts/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def create_vcf_plot(fig, row, vcf_df):
if "AF" not in vcf_df:
fig.update_yaxes(visible=False, row=row, col=1)
else:
fig.update_yaxes(title_text="frequency", range=[0, 1], row=row, col=1)
fig.update_yaxes(title_text="frequency", range=[0, 1.15], row=row, col=1)


def create_track_plot(fig, row, feature_dict, box_size, box_alpha):
Expand Down

0 comments on commit 947d51c

Please sign in to comment.