add new attribute, 'gene_symbol', from 'gene' in the JSON file

cidgoh · Apr 30, 2024 · 82f2959 · 82f2959 · miseminger · Apr 30, 2024
1 parent e18dfa9
commit 82f2959
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 1 deletion.
diff --git a/bin/functions.py b/bin/functions.py
@@ -3,7 +3,7 @@
 import logging
 
 # standard variables used by all scripts
-empty_attributes = 'ID=;Name=;alias=;gene=;protein_name=;protein_symbol=;\
+empty_attributes = 'ID=;Name=;alias=;gene=;gene_symbol=;protein_name=;protein_symbol=;\
     protein_id=;alias_protein_id=;transcript_id=;ps_filter=;ps_exc=; \
     mat_pep=;mat_pep_desc=;mat_pep_acc=;ro=;ao=;dp=;sample_size=; \
     Reference_seq=;Variant_seq=;nt_name=;aa_name=;hgvs_nt=;hgvs_aa=;hgvs_alias=; \

diff --git a/bin/vcf2gvf.py b/bin/vcf2gvf.py
@@ -84,6 +84,7 @@ def vcftogvf(vcf, strain, GENE_PROTEIN_POSITIONS_DICT, sample_size):
     json_df = map_pos_to_gene_protein(
         vcf_df['POS'].astype(int), GENE_PROTEIN_POSITIONS_DICT)
     new_gvf["gene"] = json_df["gene"]
+    new_gvf["gene_symbol"] = json_df["gene"]
     new_gvf["protein_name"] = json_df["protein_name"]
     new_gvf["protein_symbol"] = json_df["protein_symbol"]
     new_gvf["protein_id"] = json_df["protein_id"]