From 84fe73d06a41be6e3364daa8798cdfb6561727bf Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 10 Jan 2024 18:16:05 -0500 Subject: [PATCH 1/8] build: update cool-seq-tool + ga4gh.vrs versions --- Pipfile | 4 +- setup.cfg | 4 +- tests/fixtures/validators.yml | 2 +- tests/test_normalize.py | 4 -- .../test_amplification_to_cx_var.py | 2 +- variation/gnomad_vcf_to_protein_variation.py | 27 ++++++------- variation/hgvs_dup_del_mode.py | 7 ++-- variation/normalize.py | 6 +-- variation/to_copy_number_variation.py | 6 +-- variation/translate.py | 8 ++-- variation/translators/genomic_del_dup_base.py | 39 ++++++++++++------- variation/translators/genomic_delins.py | 24 +++++++----- variation/translators/genomic_insertion.py | 22 ++++++----- .../translators/genomic_reference_agree.py | 20 +++++----- variation/translators/genomic_substitution.py | 24 ++++++------ variation/translators/translator.py | 30 ++++++++------ variation/validate.py | 4 +- variation/validators/cdna_deletion.py | 20 +++++----- variation/validators/genomic_base.py | 6 +-- variation/validators/genomic_deletion.py | 8 ++-- variation/validators/genomic_delins.py | 4 +- variation/validators/genomic_insertion.py | 5 ++- .../validators/genomic_reference_agree.py | 3 +- variation/validators/genomic_substitution.py | 2 +- variation/validators/protein_deletion.py | 2 +- variation/validators/validator.py | 4 +- variation/vrs_representation.py | 17 +++++--- 27 files changed, 171 insertions(+), 133 deletions(-) diff --git a/Pipfile b/Pipfile index 5d5d2768..ea36fa69 100644 --- a/Pipfile +++ b/Pipfile @@ -20,8 +20,8 @@ black = "*" fastapi = "*" uvicorn = "*" pydantic = "==2.*" -"ga4gh.vrs" = {version = "~=2.0.0a1", extras = ["extras"]} +"ga4gh.vrs" = {version = "~=2.0.0a2", extras = ["extras"]} gene-normalizer = "~=0.3.0.dev1" boto3 = "*" -cool-seq-tool = "~=0.3.0.dev1" +cool-seq-tool = "~=0.4.0.dev1" bioutils = "*" diff --git a/setup.cfg b/setup.cfg index c132d067..edcda243 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,10 +33,10 @@ install_requires = fastapi uvicorn pydantic ==2.* - ga4gh.vrs[extras] ~= 2.0.0a1 + ga4gh.vrs[extras] ~= 2.0.0a2 gene-normalizer ~=0.3.0.dev1 boto3 - cool-seq-tool ~=0.3.0.dev1 + cool-seq-tool ~=0.4.0.dev1 bioutils tests_require = diff --git a/tests/fixtures/validators.yml b/tests/fixtures/validators.yml index 90674465..5b10f61b 100644 --- a/tests/fixtures/validators.yml +++ b/tests/fixtures/validators.yml @@ -116,6 +116,7 @@ genomic_delins: - query: X-70350063-AG-AGGCAGCGCATAAAGCGCATTCTCCG - query: 16-2138199-GTGAG-G - query: 1-55509715-AC-A + - query: chr6-31239170-C-CA should_not_match: - query: NC_000023.21:g.32386323delinsGA - query: NC_000007.13:g.159138664delinsAT @@ -197,7 +198,6 @@ genomic_insertion: - query: NC_000022.10:g.30051593_30051594insT - query: NC_000017.10:g.37880993_37880994insGCTTACGTGATG - query: ERBB2 g.37880993_37880994insGCTTACGTGATG - - query: chr6-31239170-C-CA should_not_match: - query: NC_000022.10:g.51304566_51304567insT - query: NC_000022.10:g.51304567_51304568insT diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 4fe49729..066da0ff 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -573,10 +573,6 @@ async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g2 resp = await test_handler.normalize("DIS3 P63A") assertion_checks(resp, dis3_p63a) - # Case where NA priority - resp = await test_handler.normalize("TP53 G262C") - assertion_checks(resp, tp53_g262c) - @pytest.mark.asyncio async def test_polypeptide_truncation(test_handler, vhl): diff --git a/tests/to_copy_number_variation/test_amplification_to_cx_var.py b/tests/to_copy_number_variation/test_amplification_to_cx_var.py index 1e4a24b1..8f76374d 100644 --- a/tests/to_copy_number_variation/test_amplification_to_cx_var.py +++ b/tests/to_copy_number_variation/test_amplification_to_cx_var.py @@ -79,7 +79,7 @@ def test_amplification_to_cx_var( assert resp.copy_number_change is None assert resp.amplification_label == "BRAF Amplification" assert resp.warnings == [ - "End inter-residue coordinate (9955599320) is out of " "index on NC_000007.13" + "End inter-residue coordinate (9955599321) is out of index on NC_000007.13" ] # invalid gene diff --git a/variation/gnomad_vcf_to_protein_variation.py b/variation/gnomad_vcf_to_protein_variation.py index f6e2648b..e70cdfd6 100644 --- a/variation/gnomad_vcf_to_protein_variation.py +++ b/variation/gnomad_vcf_to_protein_variation.py @@ -4,11 +4,11 @@ from typing import Dict, List, Optional, Tuple from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.mappers import MANETranscript +from cool_seq_tool.mappers import ManeTranscript from cool_seq_tool.schemas import ResidueMode from cool_seq_tool.sources import ( - MANETranscriptMappings, - UTADatabase, + ManeTranscriptMappings, + UtaDatabase, ) from variation.classify import Classify @@ -125,9 +125,9 @@ def __init__( classifier: Classify, validator: Validate, translator: Translate, - uta: UTADatabase, - mane_transcript: MANETranscript, - mane_transcript_mappings: MANETranscriptMappings, + uta: UtaDatabase, + mane_transcript: ManeTranscript, + mane_transcript_mappings: ManeTranscriptMappings, ) -> None: """Initialize the GnomadVcfToProteinVariation class @@ -401,7 +401,7 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService: g_start_pos = classification_token.pos g_end_pos = classification_token.pos ref_seq, w = self.seqrepo_access.get_reference_sequence( - alt_ac, g_start_pos + alt_ac, start=g_start_pos, end=g_start_pos ) if not ref_seq: all_warnings.add(w) @@ -476,9 +476,9 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService: current_mane_data, (mane_c_pos_change[0] + 1, mane_c_pos_change[1] + 1), ) - if mane_p["pos"][0] > mane_p["pos"][1]: - mane_p["pos"] = (mane_p["pos"][1], mane_p["pos"][0]) - p_ac = mane_p["refseq"] + if mane_p.pos[0] > mane_p.pos[1]: + mane_p.pos = (mane_p.pos[1], mane_p.pos[0]) + p_ac = mane_p.refseq aa_alt = self._get_gnomad_vcf_protein_alt( classification_token, alt_type, @@ -493,12 +493,13 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService: # mane_p is 0-based, but to_vrs allele takes 1-based variation = self.to_vrs_allele( p_ac, - mane_p["pos"][0], - mane_p["pos"][1], + mane_p.pos[0], + mane_p.pos[1], "p", alt_type, [], alt=aa_alt, + residue_mode=ResidueMode.INTER_RESIDUE, ) if variation: translation_result = TranslationResult( @@ -508,7 +509,7 @@ async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService: tr_copy = deepcopy(translation_result) tr_copy.vrs_seq_loc_ac = p_ac - tr_copy.vrs_seq_loc_ac_status = mane_p["status"] + tr_copy.vrs_seq_loc_ac_status = mane_p.status try: vrs_variation = tr_copy.vrs_variation diff --git a/variation/hgvs_dup_del_mode.py b/variation/hgvs_dup_del_mode.py index 0a324118..c7df9e1c 100644 --- a/variation/hgvs_dup_del_mode.py +++ b/variation/hgvs_dup_del_mode.py @@ -2,6 +2,7 @@ from typing import Dict, List, Optional, Union from cool_seq_tool.handlers import SeqRepoAccess +from cool_seq_tool.schemas import ResidueMode from ga4gh.core import ga4gh_identify from ga4gh.vrs import models, normalize @@ -138,11 +139,11 @@ def allele_mode( return None if alt_type == AltType.DUPLICATION: - # start is start - 1, end is end ref, _ = self.seqrepo_access.get_reference_sequence( vrs_seq_loc_ac, - location["start"] + 1, - location["end"] + 1, + start=location["start"], + end=location["end"], + residue_mode=ResidueMode.INTER_RESIDUE, ) if ref: diff --git a/variation/normalize.py b/variation/normalize.py index b8c6c4b2..5e350895 100644 --- a/variation/normalize.py +++ b/variation/normalize.py @@ -4,7 +4,7 @@ from urllib.parse import unquote from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.sources import UTADatabase +from cool_seq_tool.sources import UtaDatabase from ga4gh.vrs import models from variation.classify import Classify @@ -38,7 +38,7 @@ def __init__( classifier: Classify, validator: Validate, translator: Translate, - uta: UTADatabase, + uta: UtaDatabase, ) -> None: """Initialize Normalize class. @@ -47,7 +47,7 @@ def __init__( :param classifier: Classifier class for classifying tokens :param validator: Validator class for validating valid inputs :param translator: Translating valid inputs - :param UTADatabase uta: Access to db containing alignment data + :param UtaDatabase uta: Access to db containing alignment data """ super().__init__( seqrepo_access, diff --git a/variation/to_copy_number_variation.py b/variation/to_copy_number_variation.py index ac368642..3c71f393 100644 --- a/variation/to_copy_number_variation.py +++ b/variation/to_copy_number_variation.py @@ -4,7 +4,7 @@ from urllib.parse import unquote from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.sources import UTADatabase +from cool_seq_tool.sources import UtaDatabase from ga4gh.core import ga4gh_identify from ga4gh.vrs import models from gene.query import QueryHandler as GeneQueryHandler @@ -80,7 +80,7 @@ def __init__( validator: Validate, translator: Translate, gene_normalizer: GeneQueryHandler, - uta: UTADatabase, + uta: UtaDatabase, ) -> None: """Initialize theToCopyNumberVariation class @@ -673,7 +673,7 @@ def amplification_to_cx_var( else: # Validate start/end are actually on the sequence _, w = self.seqrepo_access.get_reference_sequence( - sequence_id, start, end + sequence_id, start=start, end=end ) if w: warnings.append(w) diff --git a/variation/translate.py b/variation/translate.py index ef1efeac..789d6f37 100644 --- a/variation/translate.py +++ b/variation/translate.py @@ -2,8 +2,8 @@ from typing import List, Optional from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.mappers import MANETranscript -from cool_seq_tool.sources import UTADatabase +from cool_seq_tool.mappers import ManeTranscript +from cool_seq_tool.sources import UtaDatabase from ga4gh.vrs import models from variation.hgvs_dup_del_mode import HGVSDupDelMode @@ -43,8 +43,8 @@ class Translate: def __init__( self, seqrepo_access: SeqRepoAccess, - mane_transcript: MANETranscript, - uta: UTADatabase, + mane_transcript: ManeTranscript, + uta: UtaDatabase, vrs: VRSRepresentation, hgvs_dup_del_mode: HGVSDupDelMode, ) -> None: diff --git a/variation/translators/genomic_del_dup_base.py b/variation/translators/genomic_del_dup_base.py index e7a756c0..3103823e 100644 --- a/variation/translators/genomic_del_dup_base.py +++ b/variation/translators/genomic_del_dup_base.py @@ -109,6 +109,7 @@ async def translate( grch38_data = None vrs_variation = None vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA + residue_mode = ResidueMode.RESIDUE if do_liftover or endpoint_name == Endpoint.NORMALIZE: errors = [] @@ -126,8 +127,12 @@ async def translate( warnings += errors return None - pos0 = grch38_data.pos0 - pos1 = grch38_data.pos1 + pos0 = grch38_data.pos0 - 1 + if grch38_data.pos1 is None: + pos1 = grch38_data.pos0 + else: + pos1 = grch38_data.pos1 + residue_mode = ResidueMode.INTER_RESIDUE ac = grch38_data.ac if alt_type == AltType.DELETION: @@ -135,9 +140,10 @@ async def translate( ref = classification.matching_tokens[0].ref invalid_ref_msg = self.validate_reference_sequence( ac, - pos0 - 1, - pos0 - 1 + len(ref), + pos0, + pos0 + (len(ref) - 1), ref, + residue_mode=residue_mode, ) if invalid_ref_msg: warnings.append(invalid_ref_msg) @@ -146,6 +152,7 @@ async def translate( pos0 = classification.pos0 pos1 = classification.pos1 ac = validation_result.accession + grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) assembly = ClinVarAssembly.GRCH38 else: @@ -168,10 +175,13 @@ async def translate( warnings += errors return None - pos0 = grch38_data.pos0 + ac = grch38_data.ac + pos0 = grch38_data.pos0 - 1 + if grch38_data.pos1 is None: + pos1 = grch38_data.pos0 + else: pos1 = grch38_data.pos1 - ac = grch38_data.ac - + residue_mode = ResidueMode.INTER_RESIDUE self.is_valid(classification.gene_token, ac, pos0, pos1, errors) if errors: @@ -181,10 +191,10 @@ async def translate( mane = await self.mane_transcript.get_mane_transcript( ac, pos0, + pos1, "g", - end_pos=pos1, try_longest_compatible=True, - residue_mode=ResidueMode.RESIDUE, + residue_mode=residue_mode, gene=classification.gene_token.token if classification.gene_token else None, @@ -192,10 +202,11 @@ async def translate( if mane: # mane is 0 - based, but we are using residue - ac = mane["refseq"] - vrs_seq_loc_ac_status = mane["status"] - pos0 = mane["pos"][0] + mane["coding_start_site"] + 1 - pos1 = mane["pos"][1] + mane["coding_start_site"] + 1 + ac = mane.refseq + vrs_seq_loc_ac_status = mane.status + pos0 = mane.pos[0] + mane.coding_start_site + pos1 = mane.pos[1] + mane.coding_start_site + residue_mode = ResidueMode.INTER_RESIDUE else: return None @@ -209,7 +220,7 @@ async def translate( if alt_type == AltType.INSERTION: alt = classification.inserted_sequence - start = pos0 - 1 + start = pos0 if residue_mode == ResidueMode.INTER_RESIDUE else pos0 - 1 end = pos1 if pos1 else pos0 refget_accession = get_refget_accession(self.seqrepo_access, ac, warnings) diff --git a/variation/translators/genomic_delins.py b/variation/translators/genomic_delins.py index 15567348..a9934b78 100644 --- a/variation/translators/genomic_delins.py +++ b/variation/translators/genomic_delins.py @@ -66,40 +66,43 @@ async def translate( mane = await self.mane_transcript.get_mane_transcript( validation_result.accession, classification.pos0, + classification.pos1 + if classification.pos1 is not None + else classification.pos0, AnnotationLayer.GENOMIC, - end_pos=classification.pos1, try_longest_compatible=True, - residue_mode=ResidueMode.RESIDUE.value, + residue_mode=ResidueMode.RESIDUE, gene=gene, ) if mane: - vrs_seq_loc_ac_status = mane["status"] + vrs_seq_loc_ac_status = mane.status if gene: classification = CdnaDelInsClassification( matching_tokens=classification.matching_tokens, nomenclature=classification.nomenclature, gene_token=classification.gene_token, - pos0=mane["pos"][0] + 1, - pos1=mane["pos"][1] + 1, + pos0=mane.pos[0] + 1, # 1-based for classification + pos1=mane.pos[1] + 1, # 1-based for classification inserted_sequence=classification.inserted_sequence, ) - vrs_seq_loc_ac = mane["refseq"] + vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification else: - vrs_seq_loc_ac = mane["alt_ac"] + vrs_seq_loc_ac = mane.alt_ac coord_type = AnnotationLayer.GENOMIC vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, - mane["pos"][0] + 1, - mane["pos"][1] + 1, + mane.pos[0], + mane.pos[1], coord_type, AltType.DELINS, warnings, alt=classification.inserted_sequence, - cds_start=mane["coding_start_site"] if gene else None, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, ) else: vrs_seq_loc_ac = validation_result.accession @@ -111,6 +114,7 @@ async def translate( AltType.DELINS, warnings, alt=classification.inserted_sequence, + residue_mode=ResidueMode.RESIDUE, ) if vrs_allele and vrs_seq_loc_ac: diff --git a/variation/translators/genomic_insertion.py b/variation/translators/genomic_insertion.py index e9ef180e..eec2b556 100644 --- a/variation/translators/genomic_insertion.py +++ b/variation/translators/genomic_insertion.py @@ -69,40 +69,41 @@ async def translate( mane = await self.mane_transcript.get_mane_transcript( validation_result.accession, classification.pos0, + classification.pos1, AnnotationLayer.GENOMIC, - end_pos=classification.pos1, try_longest_compatible=True, - residue_mode=ResidueMode.RESIDUE.value, + residue_mode=ResidueMode.RESIDUE, gene=gene, ) if mane: - vrs_seq_loc_ac_status = mane["status"] + vrs_seq_loc_ac_status = mane.status if gene: classification = CdnaInsertionClassification( matching_tokens=classification.matching_tokens, nomenclature=classification.nomenclature, gene_token=classification.gene_token, - pos0=mane["pos"][0] + 1, - pos1=mane["pos"][1] + 1, + pos0=mane.pos[0] + 1, # 1-based for classification + pos1=mane.pos[1] + 1, # 1-based for classification inserted_sequence=classification.inserted_sequence, ) - vrs_seq_loc_ac = mane["refseq"] + vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification else: - vrs_seq_loc_ac = mane["alt_ac"] + vrs_seq_loc_ac = mane.alt_ac coord_type = AnnotationLayer.GENOMIC vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, - mane["pos"][0] + 1, - mane["pos"][1] + 1, + mane.pos[0], + mane.pos[1], coord_type, AltType.INSERTION, warnings, alt=classification.inserted_sequence, - cds_start=mane["coding_start_site"] if gene else None, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, ) else: vrs_seq_loc_ac = validation_result.accession @@ -114,6 +115,7 @@ async def translate( AltType.INSERTION, warnings, alt=classification.inserted_sequence, + residue_mode=ResidueMode.RESIDUE, ) if vrs_allele and vrs_seq_loc_ac: diff --git a/variation/translators/genomic_reference_agree.py b/variation/translators/genomic_reference_agree.py index d5aec47c..d4719993 100644 --- a/variation/translators/genomic_reference_agree.py +++ b/variation/translators/genomic_reference_agree.py @@ -68,38 +68,39 @@ async def translate( mane = await self.mane_transcript.get_mane_transcript( validation_result.accession, classification.pos, + classification.pos, AnnotationLayer.GENOMIC, - end_pos=classification.pos, try_longest_compatible=True, - residue_mode=ResidueMode.RESIDUE.value, + residue_mode=ResidueMode.RESIDUE, gene=gene, ) if mane: - vrs_seq_loc_ac_status = mane["status"] + vrs_seq_loc_ac_status = mane.status if gene: classification = CdnaReferenceAgreeClassification( matching_tokens=classification.matching_tokens, nomenclature=classification.nomenclature, gene_token=classification.gene_token, - pos=mane["pos"][0] + 1, + pos=mane.pos[0] + 1, # 1-based for classification ) - vrs_seq_loc_ac = mane["refseq"] + vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification else: - vrs_seq_loc_ac = mane["alt_ac"] + vrs_seq_loc_ac = mane.alt_ac coord_type = AnnotationLayer.GENOMIC vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, - mane["pos"][0] + 1, - mane["pos"][1] + 1, + mane.pos[0], + mane.pos[1], coord_type, AltType.REFERENCE_AGREE, warnings, - cds_start=mane["coding_start_site"] if gene else None, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, ) else: vrs_seq_loc_ac = validation_result.accession @@ -110,6 +111,7 @@ async def translate( AnnotationLayer.GENOMIC, AltType.REFERENCE_AGREE, warnings, + residue_mode=ResidueMode.RESIDUE, ) if vrs_allele and vrs_seq_loc_ac: diff --git a/variation/translators/genomic_substitution.py b/variation/translators/genomic_substitution.py index da943eba..789c6015 100644 --- a/variation/translators/genomic_substitution.py +++ b/variation/translators/genomic_substitution.py @@ -1,7 +1,7 @@ """Module for Genomic Substitution Translation.""" from typing import List, Optional -from cool_seq_tool.schemas import AnnotationLayer, ResidueMode +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode, Strand from ga4gh.vrs import models from variation.schemas.app_schemas import Endpoint @@ -71,18 +71,18 @@ async def translate( mane = await self.mane_transcript.get_mane_transcript( validation_result.accession, classification.pos, + classification.pos, AnnotationLayer.GENOMIC, - end_pos=classification.pos, try_longest_compatible=True, - residue_mode=ResidueMode.RESIDUE.value, + residue_mode=ResidueMode.RESIDUE, gene=gene, ) if mane: - vrs_seq_loc_ac_status = mane["status"] + vrs_seq_loc_ac_status = mane.status if gene: - if mane["strand"] == "-": + if mane.strand == Strand.NEGATIVE: ref_rev = classification.ref[::-1] alt_rev = classification.alt[::-1] @@ -103,26 +103,27 @@ async def translate( matching_tokens=classification.matching_tokens, nomenclature=classification.nomenclature, gene_token=classification.gene_token, - pos=mane["pos"][0] + 1, + pos=mane.pos[0] + 1, # 1-based for classification ref=ref, alt=alt, ) - vrs_seq_loc_ac = mane["refseq"] + vrs_seq_loc_ac = mane.refseq coord_type = AnnotationLayer.CDNA validation_result.classification = classification else: - vrs_seq_loc_ac = mane["alt_ac"] + vrs_seq_loc_ac = mane.alt_ac coord_type = AnnotationLayer.GENOMIC vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, - mane["pos"][0] + 1, - mane["pos"][1] + 1, + mane.pos[0], + mane.pos[1], coord_type, AltType.SUBSTITUTION, errors, alt=classification.alt, - cds_start=mane["coding_start_site"] if gene else None, + cds_start=mane.coding_start_site if gene else None, + residue_mode=ResidueMode.INTER_RESIDUE, ) else: vrs_seq_loc_ac = validation_result.accession @@ -134,6 +135,7 @@ async def translate( AltType.SUBSTITUTION, errors, alt=classification.alt, + residue_mode=ResidueMode.RESIDUE, ) if vrs_allele and vrs_seq_loc_ac: diff --git a/variation/translators/translator.py b/variation/translators/translator.py index 6f5451c5..2f01c011 100644 --- a/variation/translators/translator.py +++ b/variation/translators/translator.py @@ -3,9 +3,9 @@ from typing import List, Optional, Union from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.mappers import MANETranscript +from cool_seq_tool.mappers import ManeTranscript from cool_seq_tool.schemas import AnnotationLayer, ResidueMode -from cool_seq_tool.sources import UTADatabase +from cool_seq_tool.sources import UtaDatabase from ga4gh.vrs import models from variation.hgvs_dup_del_mode import HGVSDupDelMode @@ -28,8 +28,8 @@ class Translator(ABC): def __init__( self, seqrepo_access: SeqRepoAccess, - mane_transcript: MANETranscript, - uta: UTADatabase, + mane_transcript: ManeTranscript, + uta: UtaDatabase, vrs: VRSRepresentation, hgvs_dup_del_mode: HGVSDupDelMode, ) -> None: @@ -195,25 +195,32 @@ async def get_p_or_cdna_translation_result( mane = await self.mane_transcript.get_mane_transcript( validation_result.accession, start_pos, + end_pos if end_pos is not None else start_pos, coordinate_type, - end_pos=end_pos, try_longest_compatible=True, - residue_mode=ResidueMode.RESIDUE.value, + residue_mode=ResidueMode.RESIDUE, ref=ref, ) if mane: - vrs_seq_loc_ac = mane["refseq"] - vrs_seq_loc_ac_status = mane["status"] + vrs_seq_loc_ac = mane.refseq + vrs_seq_loc_ac_status = mane.status + + try: + cds_start = mane.coding_start_site + except AttributeError: + cds_start = None + vrs_allele = self.vrs.to_vrs_allele( vrs_seq_loc_ac, - mane["pos"][0] + 1, - mane["pos"][1] + 1, + mane.pos[0], + mane.pos[1], coordinate_type, alt_type, errors, - cds_start=mane.get("coding_start_site", None), + cds_start=cds_start, alt=alt, + residue_mode=ResidueMode.INTER_RESIDUE, ) if not vrs_allele: @@ -227,6 +234,7 @@ async def get_p_or_cdna_translation_result( errors, cds_start=cds_start, alt=alt, + residue_mode=ResidueMode.RESIDUE, ) if vrs_allele and vrs_seq_loc_ac: diff --git a/variation/validate.py b/variation/validate.py index 390175c4..088469df 100644 --- a/variation/validate.py +++ b/variation/validate.py @@ -2,7 +2,7 @@ from typing import List from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.sources import TranscriptMappings, UTADatabase +from cool_seq_tool.sources import TranscriptMappings, UtaDatabase from gene.query import QueryHandler as GeneQueryHandler from variation.schemas.classification_response_schema import Classification @@ -39,7 +39,7 @@ def __init__( self, seqrepo_access: SeqRepoAccess, transcript_mappings: TranscriptMappings, - uta: UTADatabase, + uta: UtaDatabase, gene_normalizer: GeneQueryHandler, ) -> None: """Initialize the validate class. Will create an instance variable, diff --git a/variation/validators/cdna_deletion.py b/variation/validators/cdna_deletion.py index 0e5d48ce..c93ed452 100644 --- a/variation/validators/cdna_deletion.py +++ b/variation/validators/cdna_deletion.py @@ -51,14 +51,18 @@ async def get_valid_invalid_results( }: # # validate deleted sequence # HGVS deleted sequence includes start and end + start = cds_start + classification.pos0 + end = ( + cds_start + classification.pos1 + if classification.pos1 is not None + else start + ) if classification.deleted_sequence: invalid_del_seq_msg = self.validate_reference_sequence( c_ac, - cds_start + classification.pos0, - cds_start + classification.pos1 + 1 - if classification.pos1 - else None, - classification.deleted_sequence, + start, + end_pos=end, + expected_ref=classification.deleted_sequence, ) if invalid_del_seq_msg: @@ -67,10 +71,8 @@ async def get_valid_invalid_results( # Validate accession and positions invalid_ac_pos_msg = self.validate_ac_and_pos( c_ac, - cds_start + classification.pos0, - end_pos=cds_start + classification.pos1 - if classification.pos1 - else None, + start, + end_pos=end, ) if invalid_ac_pos_msg: errors.append(invalid_ac_pos_msg) diff --git a/variation/validators/genomic_base.py b/variation/validators/genomic_base.py index 7d171653..b0400d1c 100644 --- a/variation/validators/genomic_base.py +++ b/variation/validators/genomic_base.py @@ -3,7 +3,7 @@ from typing import List, Optional from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.sources import UTADatabase +from cool_seq_tool.sources import UtaDatabase from variation.schemas.classification_response_schema import ( Classification, @@ -17,11 +17,11 @@ class GenomicBase: """Genomic Base class for validation methods.""" - def __init__(self, seqrepo_access: SeqRepoAccess, uta: UTADatabase) -> None: + def __init__(self, seqrepo_access: SeqRepoAccess, uta: UtaDatabase) -> None: """Initialize the Genomic base class. :param SeqRepoAccess seqrepo_access: Access to seqrepo - :param UTADatabase uta: Access to UTA queries + :param UtaDatabase uta: Access to UTA queries """ self.seqrepo_access = seqrepo_access self.uta = uta diff --git a/variation/validators/genomic_deletion.py b/variation/validators/genomic_deletion.py index af38d83c..7b2b3f87 100644 --- a/variation/validators/genomic_deletion.py +++ b/variation/validators/genomic_deletion.py @@ -57,7 +57,9 @@ async def get_valid_invalid_results( invalid_del_seq_message = self.validate_reference_sequence( alt_ac, classification.pos0, - classification.pos1 + 1 if classification.pos1 else None, + classification.pos1 + if classification.pos1 + else classification.pos0, classification.deleted_sequence, ) @@ -71,8 +73,8 @@ async def get_valid_invalid_results( validate_ref_msg = self.validate_reference_sequence( alt_ac, classification.pos0 - 1, - classification.pos0 - 1 + len(ref), - ref, + end_pos=classification.pos0 + (len(ref) - 1), + expected_ref=ref, ) if validate_ref_msg: diff --git a/variation/validators/genomic_delins.py b/variation/validators/genomic_delins.py index fc68c653..1e8f9b36 100644 --- a/variation/validators/genomic_delins.py +++ b/variation/validators/genomic_delins.py @@ -51,9 +51,7 @@ async def get_valid_invalid_results( invalid_ref_msg = self.validate_reference_sequence( alt_ac, classification.pos0, - classification.pos1 + 1 - if classification.pos1 - else classification.pos0, + classification.pos1 if classification.pos1 else classification.pos0, ref, ) if invalid_ref_msg: diff --git a/variation/validators/genomic_insertion.py b/variation/validators/genomic_insertion.py index a887e1b4..262128ea 100644 --- a/variation/validators/genomic_insertion.py +++ b/variation/validators/genomic_insertion.py @@ -49,7 +49,10 @@ async def get_valid_invalid_results( if ref: # gnomAD VCF provides reference, so we should validate this invalid_ref_msg = self.validate_reference_sequence( - alt_ac, classification.pos0, classification.pos1, ref + alt_ac, + classification.pos0, + end_pos=classification.pos1, + expected_ref=ref, ) if invalid_ref_msg: errors.append(invalid_ref_msg) diff --git a/variation/validators/genomic_reference_agree.py b/variation/validators/genomic_reference_agree.py index 3ad30b23..ec30c1da 100644 --- a/variation/validators/genomic_reference_agree.py +++ b/variation/validators/genomic_reference_agree.py @@ -32,8 +32,7 @@ async def get_valid_invalid_results( token = classification.matching_tokens[0] ref = token.ref start_pos = token.pos - end_pos = token.pos + len(ref) - + end_pos = token.pos + (len(ref) - 1) invalid_ref_msg = self.validate_reference_sequence( alt_ac, start_pos, end_pos, ref ) diff --git a/variation/validators/genomic_substitution.py b/variation/validators/genomic_substitution.py index 98fffdb4..898750b1 100644 --- a/variation/validators/genomic_substitution.py +++ b/variation/validators/genomic_substitution.py @@ -26,7 +26,7 @@ async def get_valid_invalid_results( validation_results = [] if classification.nomenclature == Nomenclature.GNOMAD_VCF: - end_pos = classification.pos + len(classification.alt) + end_pos = classification.pos + (len(classification.alt) - 1) else: # HGVS is only 1 nuc end_pos = classification.pos diff --git a/variation/validators/protein_deletion.py b/variation/validators/protein_deletion.py index 8ca2b0d7..5756fb58 100644 --- a/variation/validators/protein_deletion.py +++ b/variation/validators/protein_deletion.py @@ -84,7 +84,7 @@ async def get_valid_invalid_results( invalid_del_seq_msg = self.validate_reference_sequence( p_ac, classification.pos0, - classification.pos1 + 1, + classification.pos1, classification.deleted_sequence, ) diff --git a/variation/validators/validator.py b/variation/validators/validator.py index 270360c4..6d51a8f6 100644 --- a/variation/validators/validator.py +++ b/variation/validators/validator.py @@ -4,7 +4,7 @@ from cool_seq_tool.handlers import SeqRepoAccess from cool_seq_tool.schemas import ResidueMode -from cool_seq_tool.sources import TranscriptMappings, UTADatabase +from cool_seq_tool.sources import TranscriptMappings, UtaDatabase from gene.query import QueryHandler as GeneQueryHandler from gene.schemas import SourceName @@ -34,7 +34,7 @@ def __init__( self, seqrepo_access: SeqRepoAccess, transcript_mappings: TranscriptMappings, - uta: UTADatabase, + uta: UtaDatabase, gene_normalizer: GeneQueryHandler, ) -> None: """Initialize the DelIns validator. diff --git a/variation/vrs_representation.py b/variation/vrs_representation.py index ae78eff5..bde025f9 100644 --- a/variation/vrs_representation.py +++ b/variation/vrs_representation.py @@ -2,7 +2,7 @@ from typing import Dict, List, Optional, Tuple, Union from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.schemas import AnnotationLayer +from cool_seq_tool.schemas import AnnotationLayer, ResidueMode from ga4gh.core import ga4gh_identify from ga4gh.vrs import models, normalize from pydantic import ValidationError @@ -155,6 +155,7 @@ def to_vrs_allele( errors: List[str], cds_start: Optional[int] = None, alt: Optional[str] = None, + residue_mode: ResidueMode = ResidueMode.RESIDUE, ) -> Optional[Dict]: """Translate accession and position to VRS Allele Object. @@ -166,6 +167,7 @@ def to_vrs_allele( :param errors: List of errors :param cds_start: Coding start site :param alt: Alteration + :param residue_mode: Residue mode for ``start`` and ``end`` positions :return: VRS Allele Object """ coords = self.get_start_end(coordinate, start, end, cds_start, errors) @@ -176,10 +178,15 @@ def to_vrs_allele( else: new_start, new_end = coords + if residue_mode == ResidueMode.RESIDUE: + new_start -= 1 + residue_mode = ResidueMode.INTER_RESIDUE + # Right now, this follows HGVS conventions # This will change once we support other representations if alt_type == AltType.INSERTION: state = alt + new_start += 1 new_end = new_start elif alt_type in { AltType.SUBSTITUTION, @@ -190,7 +197,9 @@ def to_vrs_allele( AltType.NONSENSE, }: if alt_type == AltType.REFERENCE_AGREE: - state, _ = self.seqrepo_access.get_reference_sequence(ac, new_start) + state, _ = self.seqrepo_access.get_reference_sequence( + ac, start=new_start, end=new_end, residue_mode=residue_mode + ) if state is None: errors.append( f"Unable to get sequence on {ac} from " f"{new_start}" @@ -203,10 +212,9 @@ def to_vrs_allele( # This accounts for MNVs new_end += len(state) - 1 - new_start -= 1 elif alt_type == AltType.DUPLICATION: ref, _ = self.seqrepo_access.get_reference_sequence( - ac, new_start, new_end + 1 + ac, start=new_start, end=new_end, residue_mode=residue_mode ) if ref is not None: state = ref + ref @@ -215,7 +223,6 @@ def to_vrs_allele( f"Unable to get sequence on {ac} from {new_start} to {new_end + 1}" ) return None - new_start -= 1 else: errors.append(f"alt_type not supported: {alt_type}") return None From 52855ab74696541e7137e245950cfd07e48d3851 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Fri, 2 Feb 2024 09:46:48 -0500 Subject: [PATCH 2/8] temp remove gnomad_vcf_to_protein --- tests/test_gnomad_vcf_to_protein.py | 291 ---------- variation/gnomad_vcf_to_protein_variation.py | 553 ------------------- variation/main.py | 22 - variation/query.py | 10 - 4 files changed, 876 deletions(-) delete mode 100644 tests/test_gnomad_vcf_to_protein.py delete mode 100644 variation/gnomad_vcf_to_protein_variation.py diff --git a/tests/test_gnomad_vcf_to_protein.py b/tests/test_gnomad_vcf_to_protein.py deleted file mode 100644 index 2ad39312..00000000 --- a/tests/test_gnomad_vcf_to_protein.py +++ /dev/null @@ -1,291 +0,0 @@ -"""Module for testing gnomad_vcf_to_protein works correctly""" -import pytest -from ga4gh.vrs import models - -from tests.conftest import assertion_checks -from variation.gnomad_vcf_to_protein_variation import dna_to_rna - - -@pytest.fixture(scope="module") -def test_handler(test_query_handler): - """Create test fixture for gnomad vcf to protein handler""" - return test_query_handler.gnomad_vcf_to_protein_handler - - -@pytest.fixture(scope="module") -def mmel1_l30m(): - """Create test fixture for MMEL1 L30M""" - params = { - "id": "ga4gh:VA.OqqETz467CITELOZsYDukkab7JaOWiZf", - "location": { - "id": "ga4gh:SL.Q7kfcqUWpIyEOgxcgPK1sRfgWPDv7zKA", - "end": 30, - "start": 29, - "sequenceReference": { - "type": "SequenceReference", - "refgetAccession": "SQ.iQ8F_pnsiQOLohiV2qh3OWRZiftUt8jZ", - }, - "type": "SequenceLocation", - }, - "state": {"sequence": "M", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="module") -def cdk11a_e314del(): - """Create test fixture for CDK11A Glu314del""" - params = { - "id": "ga4gh:VA._CVnGazN6KosqrFnDx7kny-rb6yAZWtB", - "location": { - "id": "ga4gh:SL.VqI6HuIFmm4XP3ocOTaobGxwqg4m6Ooi", - "end": 321, - "start": 308, - "sequenceReference": { - "type": "SequenceReference", - "refgetAccession": "SQ.N728VSRRMHJ1SrhJgKqJOCaa3l5Z4sqm", - }, - "type": "SequenceLocation", - }, - "state": { - "length": 12, - "repeatSubunitLength": 1, - "sequence": "EEEEEEEEEEEE", - "type": "ReferenceLengthExpression", - }, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="module") -def protein_insertion2(): - """Create test fixture for LRP8 p.Gln25_Leu26insArg""" - params = { - "id": "ga4gh:VA.5KWhsli69ac5zyoGf40Owu4CVNKy27So", - "location": { - "id": "ga4gh:SL.I4c4NL0g3vBajHe44faZFQtrcqrbA14d", - "end": 25, - "start": 25, - "sequenceReference": { - "type": "SequenceReference", - "refgetAccession": "SQ.qgIh8--4F6IpxRwX_lVtD2BhepH5B5Ef", - }, - "type": "SequenceLocation", - }, - "state": {"sequence": "R", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="module") -def atad3a_loc(): - """Create test fixture for ATAD3A location""" - return { - "id": "ga4gh:SL.xiP3uciIfJy_f44wNKCBvtsb35BC330Q", - "end": 7, - "start": 6, - "sequenceReference": { - "type": "SequenceReference", - "refgetAccession": "SQ.MHPOY_7fv8V9SktyvaTxulVFSK6XCxM8", - }, - "type": "SequenceLocation", - } - - -@pytest.fixture(scope="module") -def atad3a_i7v(atad3a_loc): - """Create test fixture for ATAD3A Ile7Val""" - params = { - "id": "ga4gh:VA.i_L_bjPfI4XLMIKmVklV6eDLKEl1f7PD", - "location": atad3a_loc, - "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="module") -def atad3a_i7t(atad3a_loc): - """Create test fixture for ATAD3A Ile7Thr""" - params = { - "id": "ga4gh:VA.C8QO-YAfG66yj7cEwjEhkEfSd-oCSKfc", - "location": atad3a_loc, - "state": {"sequence": "T", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="module") -def atad3a_i7m(atad3a_loc): - """Create test fixture for ATAD3A Ile7Met""" - params = { - "id": "ga4gh:VA.Fhmv3GK3bcIJRXOkigS9QNMzAWGW3WGa", - "location": atad3a_loc, - "state": {"sequence": "M", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="session") -def braf_v600l(braf_600loc): - """Create test fixture for BRAF Val600Leu.""" - params = { - "id": "ga4gh:VA.c6f1MPfquVRPZO46wVzCaGaU8QnXoHNN", - "location": braf_600loc, - "state": {"sequence": "L", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="session") -def braf_600_reference_agree(braf_600loc): - """Create test fixture for BRAF Val600=.""" - params = { - "id": "ga4gh:VA.wS6kJNbPkRJDIWg8F4CjOMQ5mcJzD_X4", - "location": braf_600loc, - "state": {"sequence": "V", "type": "LiteralSequenceExpression"}, - "type": "Allele", - } - return models.Allele(**params) - - -@pytest.fixture(scope="module") -def kras_g12d(): - """Fixture for KRAS G12C""" - params = { - "id": "ga4gh:VA.CB571ja_KfZM_Hjn9zjjgV1an3tDWRcl", - "type": "Allele", - "location": { - "id": "ga4gh:SL.OndkjmujtyUEZSjjCv0C-gpwnVbRgfj8", - "type": "SequenceLocation", - "sequenceReference": { - "type": "SequenceReference", - "refgetAccession": "SQ.fytWhQSNGnA-86vDiQCxTSzgkk_WfQRS", - }, - "start": 11, - "end": 12, - }, - "state": {"type": "LiteralSequenceExpression", "sequence": "D"}, - } - return models.Allele(**params) - - -def test_dna_to_rna(): - """Test that dna_to_rna method works correctly.""" - resp = dna_to_rna("GTA") - assert resp == "CAU" - - resp = dna_to_rna("AAGTGACA") - assert resp == "UUCACUGU" - - -@pytest.mark.asyncio -async def test_substitution( - test_handler, - braf_v600e, - braf_v600l, - braf_600_reference_agree, - mmel1_l30m, - atad3a_i7v, - atad3a_i7t, - atad3a_i7m, - kras_g12d, -): - """Test that substitution queries return correct response""" - # Reading Frame 1, Negative Strand - resp = await test_handler.gnomad_vcf_to_protein("7-140753337-C-A") - assertion_checks(resp, braf_v600l) - assert resp.warnings == [] - - # Reading Frame 2, Negative Strand - resp = await test_handler.gnomad_vcf_to_protein("7-140753336-A-T") - assertion_checks(resp, braf_v600e) - assert resp.warnings == [] - - # Reading Frame 3, Negative Strand - resp = await test_handler.gnomad_vcf_to_protein("7-140753335-C-A") - assertion_checks(resp, braf_600_reference_agree) - assert resp.warnings == [] - - # Reading Frame 3, Negative Strand - resp = await test_handler.gnomad_vcf_to_protein("1-2629397-G-T") - assertion_checks(resp, mmel1_l30m) - assert resp.warnings == [] - - # Reading Frame 1, Positive Strand - resp = await test_handler.gnomad_vcf_to_protein("1-1512287-A-G") - assertion_checks(resp, atad3a_i7v) - assert resp.warnings == [] - - # Reading Frame 2, Positive Strand - resp = await test_handler.gnomad_vcf_to_protein("1-1512288-T-C") - assertion_checks(resp, atad3a_i7t) - assert resp.warnings == [] - - # Reading Frame 3, Positive Strand - resp = await test_handler.gnomad_vcf_to_protein("1-1512289-T-G") - assertion_checks(resp, atad3a_i7m) - assert resp.warnings == [] - - resp = await test_handler.gnomad_vcf_to_protein("12-25245350-C-T") - assertion_checks(resp, kras_g12d) - - -@pytest.mark.asyncio -async def test_reference_agree(test_handler, vhl_reference_agree): - """Test that reference agree queries return correct response""" - # https://www.ncbi.nlm.nih.gov/clinvar/variation/379039/?new_evidence=true - resp = await test_handler.gnomad_vcf_to_protein("3-10142030-C-C") - assertion_checks(resp, vhl_reference_agree) - assert resp.warnings == [] - - -@pytest.mark.asyncio -async def test_insertion(test_handler, protein_insertion, protein_insertion2): - """Test that insertion queries return correct response""" - resp = await test_handler.gnomad_vcf_to_protein("7-55181319-C-CGGGTTG") - assertion_checks(resp, protein_insertion) - assert resp.warnings == [] - - resp = await test_handler.gnomad_vcf_to_protein("1-53327836-A-ACGC") - assertion_checks(resp, protein_insertion2) - assert resp.warnings == [] - - -@pytest.mark.asyncio -async def test_deletion(test_handler, protein_deletion_np_range, cdk11a_e314del): - """Test that deletion queries return correct response""" - resp = await test_handler.gnomad_vcf_to_protein("17-39723966-TTGAGGGAAAACACAT-T") - assertion_checks(resp, protein_deletion_np_range) - assert resp.warnings == [] - - resp = await test_handler.gnomad_vcf_to_protein("1-1708855-TTCC-T") - assertion_checks(resp, cdk11a_e314del) - assert resp.warnings == [] - - -@pytest.mark.asyncio -async def test_invalid(test_handler): - """Test that invalid queries return correct response""" - resp = await test_handler.gnomad_vcf_to_protein("dummy") - assert resp.variation is None - - resp = await test_handler.gnomad_vcf_to_protein("BRAF V600E") - assert resp.variation is None - assert resp.warnings == ["BRAF V600E is not a supported gnomad vcf query"] - - resp = await test_handler.gnomad_vcf_to_protein("7-140753336-T-G") - assert resp.variation is None - assert set(resp.warnings) == { - "Expected T but found A on NC_000007.14 at position 140753336" - } - - resp = await test_handler.gnomad_vcf_to_protein("20-2-TC-TG") - assert resp.variation is None - assert resp.warnings == ["20-2-TC-TG is not a valid gnomad vcf query"] diff --git a/variation/gnomad_vcf_to_protein_variation.py b/variation/gnomad_vcf_to_protein_variation.py deleted file mode 100644 index e70cdfd6..00000000 --- a/variation/gnomad_vcf_to_protein_variation.py +++ /dev/null @@ -1,553 +0,0 @@ -"""Module for going from gnomAD VCF to VRS variation on the protein coordinate""" -from copy import deepcopy -from datetime import datetime -from typing import Dict, List, Optional, Tuple - -from cool_seq_tool.handlers import SeqRepoAccess -from cool_seq_tool.mappers import ManeTranscript -from cool_seq_tool.schemas import ResidueMode -from cool_seq_tool.sources import ( - ManeTranscriptMappings, - UtaDatabase, -) - -from variation.classify import Classify -from variation.schemas.app_schemas import Endpoint -from variation.schemas.classification_response_schema import ( - ClassificationType, - Nomenclature, -) -from variation.schemas.normalize_response_schema import ( - HGVSDupDelModeOption, - NormalizeService, - ServiceMeta, -) -from variation.schemas.token_response_schema import AltType, Token -from variation.schemas.translation_response_schema import TranslationResult -from variation.schemas.validation_response_schema import ValidationResult -from variation.to_vrs import ToVRS -from variation.tokenize import Tokenize -from variation.translate import Translate -from variation.utils import update_warnings_for_no_resp -from variation.validate import Validate -from variation.version import __version__ - -DNA_TO_RNA = {"T": "A", "A": "U", "G": "C", "C": "G"} - -CODON_TABLE = { - "AUA": "I", - "AUC": "I", - "AUU": "I", - "AUG": "M", - "ACA": "T", - "ACC": "T", - "ACG": "T", - "ACU": "T", - "AAC": "N", - "AAU": "N", - "AAA": "K", - "AAG": "K", - "AGC": "S", - "AGU": "S", - "AGA": "R", - "AGG": "R", - "CUA": "L", - "CUC": "L", - "CUG": "L", - "CUU": "L", - "CCA": "P", - "CCC": "P", - "CCG": "P", - "CCU": "P", - "CAC": "H", - "CAU": "H", - "CAA": "Q", - "CAG": "Q", - "CGA": "R", - "CGC": "R", - "CGG": "R", - "CGU": "R", - "GUA": "V", - "GUC": "V", - "GUG": "V", - "GUU": "V", - "GCA": "A", - "GCC": "A", - "GCG": "A", - "GCU": "A", - "GAC": "D", - "GAU": "D", - "GAA": "E", - "GAG": "E", - "GGA": "G", - "GGC": "G", - "GGG": "G", - "GGU": "G", - "UCA": "S", - "UCC": "S", - "UCG": "S", - "UCU": "S", - "UUC": "F", - "UUU": "F", - "UUA": "L", - "UUG": "L", - "UAC": "Y", - "UAU": "Y", - "UAA": "*", - "UAG": "*", - "UGC": "C", - "UGU": "C", - "UGA": "*", - "UGG": "W", -} - - -def dna_to_rna(dna_codon: str) -> str: - """Convert DNA codon to RNA codon. - - :param str dna_codon: DNA codon - :return: RNA codon - """ - dna_codon_list = list(dna_codon) - rna_codon = "" - for char in dna_codon_list: - rna_codon += DNA_TO_RNA[char] - return rna_codon - - -class GnomadVcfToProteinVariation(ToVRS): - """Class for translating gnomAD VCF representation to protein representation""" - - def __init__( - self, - seqrepo_access: SeqRepoAccess, - tokenizer: Tokenize, - classifier: Classify, - validator: Validate, - translator: Translate, - uta: UtaDatabase, - mane_transcript: ManeTranscript, - mane_transcript_mappings: ManeTranscriptMappings, - ) -> None: - """Initialize the GnomadVcfToProteinVariation class - - :param seqrepo_access: Access to SeqRepo - :param tokenizer: Tokenizer class for tokenizing - :param classifier: Classifier class for classifying tokens - :param validator: Validator class for validating valid inputs - :param translator: Translating valid inputs - :param uta: Access to db containing alignment data - :param mane_transcript: Access MANE Transcript information - :param mane_transcript_mappings: Mappings for MANE Transcript data - """ - super().__init__(seqrepo_access, tokenizer, classifier, validator, translator) - self.uta = uta - self.mane_transcript = mane_transcript - self.mane_transcript_mappings = mane_transcript_mappings - - async def _get_valid_results( - self, q: str, warnings: List - ) -> List[ValidationResult]: - """Get gnomad vcf validation summary - - :param q: gnomad vcf input query - :param warnings: List of warnings - :return: List of valid results for a gnomad VCF query - """ - tokens = self.tokenizer.perform(q.strip(), warnings) - if not tokens: - return None - - classification = self.classifier.perform(tokens) - if not classification: - return None - - if classification.nomenclature != Nomenclature.GNOMAD_VCF: - warnings.append(f"{q} is not a supported gnomad vcf query") - return None - - validation_summary = await self.validator.perform(classification) - if validation_summary.valid_results: - valid_results = validation_summary.valid_results - else: - warnings.append(f"{q} is not a valid gnomad vcf query") - valid_results = [] - return valid_results - - def _get_refseq_alt_ac_from_variation(self, variation: Dict) -> str: - """Get genomic ac from variation sequence - - :param Dict variation: VRS variation object - :return: RefSeq genomic accession - """ - # genomic ac should always be in 38 - refget_accession = variation["location"]["sequenceReference"]["refgetAccession"] - ga4gh_alias = f"ga4gh:{refget_accession}" - aliases = self.seqrepo_access.sr.translate_identifier( - ga4gh_alias, target_namespaces="refseq" - ) - return aliases[0].split("refseq:")[-1] - - def _update_gnomad_vcf_mane_c_pos( - self, - reading_frame: int, - mane_c_ac: str, - mane_c_pos_change: Tuple[int, int], - coding_start_site: int, - warnings: List, - ) -> Optional[Tuple[int, int]]: - """Return updated mane c position change for a gnomad vcf variation - depending on reading frame base - - :param int reading_frame: reading frame base - :param str mane_c_ac: Mane transcript accession - :param Tuple[int, int] mane_c_pos_change: Mane transcript position - change - :param int coding_start_site: Coding start site - :param List warnings: List of warnings - :return: Mane c pos start and end - """ - if reading_frame == 1: - # first pos - mane_c_pos_change = mane_c_pos_change[0], mane_c_pos_change[0] + 2 - elif reading_frame == 2: - # middle pos - mane_c_pos_change = mane_c_pos_change[0] - 1, mane_c_pos_change[0] + 1 - elif reading_frame == 3: - # last pos - mane_c_pos_change = mane_c_pos_change[0] - 2, mane_c_pos_change[0] - - if not self.mane_transcript._validate_index( - mane_c_ac, mane_c_pos_change, coding_start_site - ): - warnings.append( - f"{mane_c_pos_change} are not valid positions on " - f"{mane_c_ac} with coding start site " - f"{coding_start_site}" - ) - return None - return mane_c_pos_change - - def _get_gnomad_vcf_protein_alt( - self, - classification_token: Token, - alt_type: AltType, - reading_frame: int, - strand: str, - alt_ac: str, - g_start_pos: int, - g_end_pos: int, - ) -> Optional[str]: - """Return protein alteration that corresponds to gnomad VCF alteration - - :param classification_token: Classification token for query - :param alt_type: Alteration type - :param reading_frame: cDNA reading frame number (1, 2, 3) - :param strand: Strand for query - :param alt_ac: RefSeq genomic accession - :param g_start_pos: Genomic start position - :param g_end_pos: Genomic end position - :return: Amino acid alteration (using 1-letter codes) - """ - alt = None - residue_mode = ResidueMode.INTER_RESIDUE - if alt_type in {AltType.SUBSTITUTION, AltType.REFERENCE_AGREE}: - alt_nuc = classification_token.matching_tokens[0].alt - - ref = None - if reading_frame == 1: - # first pos - if strand == "-": - ref, _ = self.seqrepo_access.get_reference_sequence( - alt_ac, - g_start_pos - 2, - g_end_pos + 1, - residue_mode=residue_mode, - ) - alt = alt_nuc + ref[1] + ref[0] - else: - ref, _ = self.seqrepo_access.get_reference_sequence( - alt_ac, g_start_pos, g_end_pos + 3, residue_mode=residue_mode - ) - alt = alt_nuc + ref[1] + ref[2] - elif reading_frame == 2: - # middle pos - ref, _ = self.seqrepo_access.get_reference_sequence( - alt_ac, g_start_pos - 1, g_end_pos + 2, residue_mode=residue_mode - ) - - if strand == "-": - alt = ref[2] + alt_nuc + ref[0] - else: - alt = ref[0] + alt_nuc + ref[2] - elif reading_frame == 3: - # last pos - if strand == "-": - ref, _ = self.seqrepo_access.get_reference_sequence( - alt_ac, g_start_pos, g_end_pos + 3, residue_mode=residue_mode - ) - alt = ref[2] + ref[1] + alt_nuc - else: - ref, _ = self.seqrepo_access.get_reference_sequence( - alt_ac, - g_start_pos - 2, - g_end_pos + 1, - residue_mode=residue_mode, - ) - alt = ref[0] + ref[1] + alt_nuc - if alt and strand == "-": - alt = dna_to_rna(alt) - else: - alt = alt.replace("T", "U") - elif alt_type == AltType.INSERTION: - alt = classification_token.inserted_sequence[1:].replace("T", "U") - if strand == "-": - alt = alt[::-1] - else: - return None - - if alt is None: - return None - else: - if len(alt) % 3 != 0: - return None - - aa_alt = "" - for i in range(int(len(alt) / 3)): - aa_alt += CODON_TABLE[alt[3 * i : (3 * i) + 3]] - return aa_alt - - async def gnomad_vcf_to_protein(self, q: str) -> NormalizeService: - """Get MANE protein consequence for gnomad vcf (chr-pos-ref-alt). - Assumes using GRCh38 coordinates - - :param str q: gnomad vcf (chr-pos-ref-alt) - :return: Normalize Service containing variation and warnings - """ - q = q.strip() - warnings = [] - - valid_results = await self._get_valid_results(q, warnings) - if valid_results: - translations, warnings = await self.get_translations( - valid_results, - warnings, - Endpoint.NORMALIZE, - hgvs_dup_del_mode=HGVSDupDelModeOption.ALLELE, - ) - - if translations: - translations.sort( - key=lambda t: (t.og_ac.split(".")[0], int(t.og_ac.split(".")[1])), - reverse=True, - ) - - all_warnings = set() - checked_valid_results = [] - for translation in translations: - warnings = [] - # all gnomad vcf will be alleles with a literal seq expression - variation = translation.vrs_variation - validation_result = translation.validation_result - classification_token = validation_result.classification - - # We do not need to check the same variation that has the same - # classification - checked_tuple = ( - variation["id"], - translation.vrs_seq_loc_ac, - classification_token.classification_type.value, - ) - if checked_tuple in checked_valid_results: - continue - - checked_valid_results.append(checked_tuple) - alt_ac = self._get_refseq_alt_ac_from_variation(variation) - - # 0-based - alt_type = None - g_start_pos = None - g_end_pos = None - if ( - classification_token.classification_type - == ClassificationType.GENOMIC_DELINS - ): - g_start_pos = classification_token.pos0 - g_end_pos = ( - classification_token.pos1 - if classification_token.pos1 - else classification_token.pos0 - ) - - # Right now, deletions and insertions are classified as delins - # Only support simple deletions and insertions - gnomad_vcf_token = classification_token.matching_tokens[0] - ref = gnomad_vcf_token.ref - alt = gnomad_vcf_token.alt - - if ref[0] == alt[0]: - if len(alt) == 1: - alt_type = AltType.DELETION - g_start_pos += 1 - g_end_pos += 1 - elif len(ref) == 1: - alt_type = AltType.INSERTION - else: - alt_type = AltType.DELINS - elif classification_token.classification_type in { - ClassificationType.GENOMIC_SUBSTITUTION, - ClassificationType.GENOMIC_REFERENCE_AGREE, - }: - g_start_pos = classification_token.pos - g_end_pos = classification_token.pos - ref_seq, w = self.seqrepo_access.get_reference_sequence( - alt_ac, start=g_start_pos, end=g_start_pos - ) - if not ref_seq: - all_warnings.add(w) - else: - if ref_seq != classification_token.matching_tokens[0].ref: - all_warnings.add( - f"Expected {classification_token.ref} but found " - f"{ref_seq} on {alt_ac} at position {g_start_pos}" - ) - continue - - if ( - classification_token.classification_type - == ClassificationType.GENOMIC_SUBSTITUTION - ): - alt_type = AltType.SUBSTITUTION - else: - alt_type = AltType.REFERENCE_AGREE - else: - all_warnings.add( - f"{classification_token.classification_type} classification_type not supported" # noqa: E501 - ) - continue - - mane_data = self.mane_transcript_mappings.get_mane_data_from_chr_pos( # noqa: E501 - alt_ac, g_start_pos, g_end_pos - ) - - mane_data_len = len(mane_data) - g_start_pos -= 1 - g_end_pos -= 1 - - for i in range(mane_data_len): - current_mane_data = mane_data[i] - mane_c_ac = current_mane_data["RefSeq_nuc"] - mane_tx_genomic_data = await self.uta.get_mane_c_genomic_data( - mane_c_ac, alt_ac, g_start_pos, g_end_pos - ) - if not mane_tx_genomic_data: - all_warnings.add( - f"Unable to get MANE data for {mane_c_ac} using " - f"{alt_ac} at positions {g_start_pos} to {g_end_pos}" - ) - continue - - coding_start_site = mane_tx_genomic_data["coding_start_site"] - mane_c_pos_change = self.mane_transcript.get_mane_c_pos_change( - mane_tx_genomic_data, coding_start_site - ) - - # We use 1-based - reading_frame = self.mane_transcript._get_reading_frame( - mane_c_pos_change[0] + 1 - ) - if classification_token.classification_type in { - ClassificationType.GENOMIC_SUBSTITUTION, - ClassificationType.GENOMIC_REFERENCE_AGREE, - }: - mane_c_pos_change = self._update_gnomad_vcf_mane_c_pos( - reading_frame, - mane_c_ac, - mane_c_pos_change, - coding_start_site, - warnings, - ) - if mane_c_pos_change is None: - if len(warnings) > 0: - all_warnings.add(warnings[0]) - continue - - mane_p = self.mane_transcript._get_mane_p( - current_mane_data, - (mane_c_pos_change[0] + 1, mane_c_pos_change[1] + 1), - ) - if mane_p.pos[0] > mane_p.pos[1]: - mane_p.pos = (mane_p.pos[1], mane_p.pos[0]) - p_ac = mane_p.refseq - aa_alt = self._get_gnomad_vcf_protein_alt( - classification_token, - alt_type, - reading_frame, - mane_tx_genomic_data["strand"], - alt_ac, - g_start_pos, - g_end_pos, - ) - # Deletions don't have an aa_alt - if aa_alt or alt_type == AltType.DELETION: - # mane_p is 0-based, but to_vrs allele takes 1-based - variation = self.to_vrs_allele( - p_ac, - mane_p.pos[0], - mane_p.pos[1], - "p", - alt_type, - [], - alt=aa_alt, - residue_mode=ResidueMode.INTER_RESIDUE, - ) - if variation: - translation_result = TranslationResult( - vrs_variation=variation, - validation_result=validation_result, - ) - - tr_copy = deepcopy(translation_result) - tr_copy.vrs_seq_loc_ac = p_ac - tr_copy.vrs_seq_loc_ac_status = mane_p.status - - try: - vrs_variation = tr_copy.vrs_variation - except AttributeError as e: - warnings.append(str(e)) - vrs_variation = None - - if not vrs_variation: - continue - - return NormalizeService( - variation_query=q, - variation=vrs_variation, - warnings=[], - service_meta_=ServiceMeta( - version=__version__, - response_datetime=datetime.now(), - ), - ) - else: - all_warnings.add( - "Unable to get associated amino acid change" - ) - - if all_warnings: - warnings = all_warnings - else: - warnings = [f"Unable to get protein variation for {q}"] - else: - update_warnings_for_no_resp(q, warnings) - else: - update_warnings_for_no_resp(q, warnings) - - return NormalizeService( - variation_query=q, - variation=None, - warnings=warnings, - service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() - ), - ) diff --git a/variation/main.py b/variation/main.py index ce111165..1f442520 100644 --- a/variation/main.py +++ b/variation/main.py @@ -296,28 +296,6 @@ def vrs_python_translate_from( q_description = "GRCh38 gnomAD VCF (chr-pos-ref-alt) to normalize to MANE protein variation." # noqa: E501 -@app.get( - "/variation/gnomad_vcf_to_protein", - summary=g_to_p_summary, - response_description=g_to_p_response_description, - response_model_exclude_none=True, - description=g_to_p_description, - response_model=NormalizeService, - tags=[Tag.TO_PROTEIN_VARIATION], -) -async def gnomad_vcf_to_protein( - q: str = Query(..., description=q_description), -) -> NormalizeService: - """Return VRS representation for variation on protein coordinate. - - :param q: gnomad VCF to normalize to protein variation. - :return: NormalizeService for variation - """ - q = unquote(q.strip()) - resp = await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) - return resp - - hgvs_dup_del_mode_decsr = ( "This parameter determines how to interpret HGVS dup/del expressions in VRS." ) diff --git a/variation/query.py b/variation/query.py index 6bc4e656..87add090 100644 --- a/variation/query.py +++ b/variation/query.py @@ -7,7 +7,6 @@ from gene.query import QueryHandler as GeneQueryHandler from variation.classify import Classify -from variation.gnomad_vcf_to_protein_variation import GnomadVcfToProteinVariation from variation.hgvs_dup_del_mode import HGVSDupDelMode from variation.normalize import Normalize from variation.to_copy_number_variation import ToCopyNumberVariation @@ -66,15 +65,6 @@ def __init__( self.to_vrs_handler = ToVRS(*to_vrs_params) normalize_params = to_vrs_params + [uta_db] self.normalize_handler = Normalize(*normalize_params) - - mane_transcript_mappings = cool_seq_tool.mane_transcript_mappings - to_protein_params = normalize_params + [ - mane_transcript, - mane_transcript_mappings, - ] - self.gnomad_vcf_to_protein_handler = GnomadVcfToProteinVariation( - *to_protein_params - ) self.to_copy_number_handler = ToCopyNumberVariation( *to_vrs_params + [gene_query_handler, uta_db] ) From 5729291d8378fa49612597796fb857866838950c Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 7 Feb 2024 17:36:40 -0500 Subject: [PATCH 3/8] style: use ruff == 0.2.0 --- .flake8 | 22 ---- .github/workflows/checks.yml | 13 ++- .pre-commit-config.yaml | 11 +- Pipfile | 3 +- README.md | 20 ++-- codebuild/deploy_eb_env_dev.py | 2 +- pyproject.toml | 105 +++++++++++++----- setup.cfg | 3 +- tests/conftest.py | 2 +- tests/test_classifier.py | 4 +- tests/test_hgvs_dup_del_mode.py | 28 ++--- tests/test_normalize.py | 40 +++---- tests/test_tokenizer.py | 4 +- tests/test_translator.py | 44 ++++---- tests/test_validator.py | 48 ++++---- .../test_hgvs_to_copy_number.py | 50 ++++----- .../test_parsed_to_copy_number.py | 6 +- variation/__init__.py | 2 +- variation/classifiers/__init__.py | 25 +++++ .../cdna_substitution_classifier.py | 2 + variation/classifiers/classifier.py | 3 +- .../genomic_substitution_classifier.py | 2 + .../classifiers/gnomad_vcf_classifier.py | 8 +- variation/classifiers/hgvs_classifier.py | 50 ++++++--- variation/classify.py | 4 +- variation/main.py | 76 +++++++------ variation/normalize.py | 19 ++-- variation/query.py | 4 +- variation/regex.py | 6 +- variation/schemas/__init__.py | 2 + variation/schemas/copy_number_schema.py | 20 ++-- .../schemas/translation_response_schema.py | 2 +- variation/to_copy_number_variation.py | 48 ++++---- variation/to_vrs.py | 5 +- variation/tokenizers/__init__.py | 22 ++++ .../cdna_and_genomic_reference_agree.py | 5 +- variation/tokenizers/cdna_deletion.py | 2 + variation/tokenizers/cdna_delins.py | 2 + variation/tokenizers/cdna_insertion.py | 2 + variation/tokenizers/cdna_substitution.py | 2 + variation/tokenizers/gene_symbol.py | 3 +- variation/tokenizers/genomic_deletion.py | 88 +++++++-------- variation/tokenizers/genomic_delins.py | 2 + variation/tokenizers/genomic_duplication.py | 87 +++++++-------- variation/tokenizers/genomic_insertion.py | 2 + variation/tokenizers/genomic_substitution.py | 2 + variation/tokenizers/hgvs.py | 6 +- variation/tokenizers/protein_deletion.py | 1 + variation/tokenizers/protein_delins.py | 1 + variation/tokenizers/protein_insertion.py | 2 + .../tokenizers/protein_reference_agree.py | 7 +- variation/tokenizers/protein_substitution.py | 6 +- variation/tokenizers/tokenizer.py | 6 +- variation/translators/__init__.py | 23 ++++ .../translators/ambiguous_translator_base.py | 48 ++++---- variation/translators/amplification.py | 4 +- variation/translators/cdna_deletion.py | 3 +- variation/translators/cdna_delins.py | 3 +- variation/translators/cdna_insertion.py | 3 +- variation/translators/cdna_reference_agree.py | 3 +- variation/translators/cdna_substitution.py | 3 +- variation/translators/genomic_del_dup_base.py | 93 ++++++++-------- variation/translators/genomic_delins.py | 4 +- variation/translators/genomic_insertion.py | 4 +- .../translators/genomic_reference_agree.py | 4 +- variation/translators/genomic_substitution.py | 4 +- variation/translators/protein_deletion.py | 3 +- variation/translators/protein_delins.py | 3 +- variation/translators/protein_insertion.py | 3 +- .../translators/protein_reference_agree.py | 3 +- variation/translators/protein_stop_gain.py | 3 +- variation/translators/protein_substitution.py | 3 +- variation/translators/translator.py | 17 ++- variation/utils.py | 20 ++-- variation/validate.py | 2 +- variation/validators/__init__.py | 24 ++++ variation/validators/genomic_base.py | 2 +- variation/validators/genomic_deletion.py | 56 +++++----- .../validators/genomic_deletion_ambiguous.py | 8 +- .../genomic_duplication_ambiguous.py | 8 +- variation/validators/protein_deletion.py | 33 +++--- variation/validators/validator.py | 78 ++++++------- variation/vrs_representation.py | 7 +- 83 files changed, 800 insertions(+), 603 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 1eca9f33..00000000 --- a/.flake8 +++ /dev/null @@ -1,22 +0,0 @@ -[flake8] -ignore = D205, D400, I101, ANN101, ANN002, ANN003 -max-line-length = 88 -exclude = - .git - venv - __pycache__ - source - outputs - docs/* - variation/version.py - build/* - codebuild/* -inline-quotes = " -import-order-style = pep8 -application-import-names = - variation - tests -per-file-ignores = - tests/*:ANN001, ANN2, ANN102 - setup.py:F821 - *__init__.py:F401 diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 72090fe0..ad485bc0 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -25,8 +25,13 @@ jobs: steps: - uses: actions/checkout@v3 - - name: black - uses: psf/black@stable + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install dependencies + run: python3 -m pip install ".[dev]" - - name: ruff - uses: chartboost/ruff-action@v1 + - name: Check style + run: python3 -m ruff check . && ruff format --check . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c1c3d02..8e29f947 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,20 +2,15 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v1.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files - id: detect-private-key - id: trailing-whitespace - id: end-of-file-fixer -- repo: https://github.com/psf/black - rev: 23.7.0 - hooks: - - id: black - language_version: python3.11 - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.0.280 + rev: v0.2.0 hooks: + - id: ruff-format - id: ruff args: [ --fix, --exit-non-zero-on-fix ] diff --git a/Pipfile b/Pipfile index ea36fa69..e3d15d97 100644 --- a/Pipfile +++ b/Pipfile @@ -12,8 +12,7 @@ variation-normalizer = {editable = true, path = "."} jupyter = "*" ipykernel = "*" psycopg2-binary = "*" -ruff = "*" -black = "*" +ruff = "==0.2.0" [packages] "biocommons.seqrepo" = "*" diff --git a/README.md b/README.md index 414cfcfc..2d4e7425 100644 --- a/README.md +++ b/README.md @@ -156,20 +156,26 @@ Next, view the OpenAPI docs on your local machine: ### Init coding style tests -Code style is managed by [Ruff](https://github.com/astral-sh/ruff) and checked prior to commit. +Code style is managed by [Ruff](https://docs.astral.sh/ruff/) and checked prior to commit. + +Check style with `ruff`: + +```shell +python3 -m ruff format . && python3 -m ruff check --fix . +``` We use [pre-commit](https://pre-commit.com/#usage) to run conformance tests. This ensures: -* Check code style -* Check for added large files -* Detect AWS Credentials -* Detect Private Key +* Style correctness +* No large files +* AWS credentials are present +* Private key is present -Before first commit run: +Pre-commit *must* be installed before your first commit. Use the following command: -```shell +```commandline pre-commit install ``` diff --git a/codebuild/deploy_eb_env_dev.py b/codebuild/deploy_eb_env_dev.py index 08ae38b3..9871a35b 100644 --- a/codebuild/deploy_eb_env_dev.py +++ b/codebuild/deploy_eb_env_dev.py @@ -43,6 +43,6 @@ "Status" ] print(eb_provisioned_product_status) -except Exception as e: # noqa: E722 +except Exception as e: print(e) print("The EB environment is already running....") diff --git a/pyproject.toml b/pyproject.toml index b472aaaf..7e578334 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,40 +2,93 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta:__legacy__" -[tool.black] -line-length = 88 - [tool.ruff] -# pycodestyle (E, W) -# Pyflakes (F) -# flake8-annotations (ANN) -# flake8-quotes (Q) -# pydocstyle (D) -# pep8-naming (N) -# isort (I) -select = ["E", "W", "F", "ANN", "Q", "D", "N", "I"] - -fixable = ["I", "F401"] +lint.select = [ + "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f + "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w + "I", # https://docs.astral.sh/ruff/rules/#isort-i + "N", # https://docs.astral.sh/ruff/rules/#pep8-naming-n + "D", # https://docs.astral.sh/ruff/rules/#pydocstyle-d + "UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up + "ANN", # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann + "ASYNC", # https://docs.astral.sh/ruff/rules/#flake8-async-async + "S", # https://docs.astral.sh/ruff/rules/#flake8-bandit-s + "B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "A", # https://docs.astral.sh/ruff/rules/#flake8-builtins-a + "C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em + "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g + "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie + "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 + "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt + "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q + "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse + "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret + "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth + "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf +] +lint.fixable = [ + "I", + "F401", + "D", + "UP", + "ANN", + "B", + "C4", + "G", + "PIE", + "PT", + "RSE", + "SIM", + "RUF" +] +# ANN101 - missing-type-self +# ANN003 - missing-type-kwargs +# D203 - one-blank-line-before-class # D205 - blank-line-after-summary +# D206 - indent-with-spaces* +# D213 - multi-line-summary-second-line +# D300 - triple-single-quotes* # D400 - ends-in-period # D415 - ends-in-punctuation -# ANN101 - missing-type-self -# ANN003 - missing-type-kwargs -# E501 - line-too-long -ignore = ["D205", "D400", "D415", "ANN101", "ANN003", "E501"] +# E111 - indentation-with-invalid-multiple* +# E114 - indentation-with-invalid-multiple-comment* +# E117 - over-indented* +# E501 - line-too-long* +# W191 - tab-indentation* +# PGH003 - blanket-type-ignore +# *ignored for compatibility with formatter +lint.ignore = [ + "ANN101", "ANN003", + "D203", "D205", "D206", "D213", "D300", "D400", "D415", + "E111", "E114", "E117", "E501", + "W191", + "PGH003", + "S321", +] -[tool.ruff.flake8-quotes] -docstring-quotes = "double" +exclude = [ + "setup.py" +] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type +# ANN201 - Missing type annotation # ANN102 - missing-type-cls # N805 - invalid-first-argument-name-for-method -# F821 - undefined-name -# F401 - unused-import -"tests/*" = ["ANN001", "ANN2", "ANN102"] -"setup.py" = ["F821"] -"*__init__.py" = ["F401"] -"variation/schemas/*" = ["ANN201", "N805", "ANN001"] +# S101 - assert +# B011 - assert-false +# RUF001 - ambiguous-unicode-character-string +"tests/*" = ["ANN001", "ANN102", "ANN2", "S101", "B011"] +"variation/schemas/*" = ["ANN001", "ANN201", "N805", "S101"] +"codebuild/*" = ["T201"] + +[tool.ruff.lint.flake8-bugbear] +# Allow default arguments like, e.g., `data: List[str] = fastapi.Query(None)`. +extend-immutable-calls = ["fastapi.Query"] diff --git a/setup.cfg b/setup.cfg index edcda243..7f37081a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,12 +50,11 @@ dev = pytest pytest-asyncio pytest-cov - ruff + ruff == 0.2.0 pre-commit jupyter ipykernel psycopg2-binary - black [tool:pytest] addopts = --ignore setup.py --ignore=codebuild/ --doctest-modules --cov-report term-missing --cov . diff --git a/tests/conftest.py b/tests/conftest.py index 50a2da80..3366709c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -558,7 +558,7 @@ def assertion_checks(normalize_response, test_variation): def cnv_assertion_checks(resp, test_fixture): """Check that actual response for to copy number matches expected""" try: - getattr(resp, "copy_number_count") + resp.copy_number_count # noqa: B018 except AttributeError: actual = resp.copy_number_change.model_dump(exclude_none=True) else: diff --git a/tests/test_classifier.py b/tests/test_classifier.py index 82f45c30..e0ca546f 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -1,4 +1,6 @@ """Module for testing classifiers""" +from pathlib import Path + import pytest import yaml @@ -30,7 +32,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for classifiers""" - with open(f"{PROJECT_ROOT}/tests/fixtures/classifiers.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/classifiers.yml").open() as stream: return yaml.safe_load(stream) diff --git a/tests/test_hgvs_dup_del_mode.py b/tests/test_hgvs_dup_del_mode.py index 576b9b88..aa56fe05 100644 --- a/tests/test_hgvs_dup_del_mode.py +++ b/tests/test_hgvs_dup_del_mode.py @@ -786,7 +786,7 @@ def no_variation_check(resp, q): assert resp.variation is None, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def invalid_query_list_checks(query_list, test_handler): """Check that invalid queries in query list do not normalize""" for q in query_list: @@ -794,7 +794,7 @@ async def invalid_query_list_checks(query_list, test_handler): no_variation_check(resp, q) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup1( test_handler, genomic_dup1_lse, @@ -872,7 +872,7 @@ async def test_genomic_dup1( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup2( test_handler, genomic_dup2_lse, @@ -942,7 +942,7 @@ async def test_genomic_dup2( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup3( test_handler, genomic_dup3_cx, @@ -1007,7 +1007,7 @@ async def test_genomic_dup3( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup4( test_handler, genomic_dup4_cn, @@ -1071,7 +1071,7 @@ async def test_genomic_dup4( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup5( test_handler, genomic_dup5_cn, @@ -1135,7 +1135,7 @@ async def test_genomic_dup5( assert resp.variation is None, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup6( test_handler, genomic_dup6_cn, @@ -1199,7 +1199,7 @@ async def test_genomic_dup6( assert resp.variation is None, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del1( test_handler, genomic_del1_lse, @@ -1270,7 +1270,7 @@ async def test_genomic_del1( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del2( test_handler, genomic_del2_lse, @@ -1356,7 +1356,7 @@ async def test_genomic_del2( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del3( test_handler, genomic_del3_dup3_cn_38, @@ -1421,7 +1421,7 @@ async def test_genomic_del3( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del4( test_handler, genomic_del4_cn, @@ -1493,7 +1493,7 @@ async def test_genomic_del4( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del5( test_handler, genomic_del5_cn_var, @@ -1554,7 +1554,7 @@ async def test_genomic_del5( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del6( test_handler, genomic_del6_cn_var, @@ -1616,7 +1616,7 @@ async def test_genomic_del6( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_parameters(test_handler): """Check that valid and invalid parameters work as intended.""" resp = await test_handler.normalize("7-140453136-A-T") diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 066da0ff..8a36f0c2 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -555,7 +555,7 @@ def gnomad_vcf_genomic_delins5(): return models.Allele(**params) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g262c): """Test that protein substitutions normalize correctly.""" resp = await test_handler.normalize(" BRAF V600E ") @@ -574,21 +574,21 @@ async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g2 assertion_checks(resp, dis3_p63a) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_polypeptide_truncation(test_handler, vhl): """Test that polypeptide truncations normalize correctly.""" resp = await test_handler.normalize("NP_000542.1:p.Tyr185Ter") assertion_checks(resp, vhl) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_reference_agree(test_handler, vhl_reference_agree): """Test that reference agrees normalize correctly.""" resp = await test_handler.normalize("NP_000542.1:p.Pro61=") assertion_checks(resp, vhl_reference_agree) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_and_genomic_substitution( test_handler, braf_v600e_nucleotide, @@ -642,7 +642,7 @@ async def test_cdna_and_genomic_substitution( assertion_checks(resp, gnomad_vcf_genomic_sub_mnv) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_reference_agree(test_handler, cdna_reference_agree): """Test that cdna Reference Agree normalizes correctly.""" resp = await test_handler.normalize("NM_004333.4:c.1799= ") @@ -658,7 +658,7 @@ async def test_cdna_reference_agree(test_handler, cdna_reference_agree): assertion_checks(resp, cdna_reference_agree) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_reference_agree( test_handler, cdna_reference_agree, grch38_braf_genom_reference_agree ): @@ -683,7 +683,7 @@ async def test_genomic_reference_agree( assertion_checks(resp, cdna_reference_agree) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_delins(test_handler, nm_004448_cdna_delins, nm_000551): """Test that cdna DelIns normalizes correctly.""" resp = await test_handler.normalize(" NM_004448.4:c.2326_2327delinsCT ") @@ -696,7 +696,7 @@ async def test_cdna_delins(test_handler, nm_004448_cdna_delins, nm_000551): assertion_checks(resp, nm_000551) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_delins( test_handler, grch38_genomic_delins1, @@ -770,7 +770,7 @@ async def test_genomic_delins( assertion_checks(resp, genomic_del1_lse) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_delins(test_handler, protein_delins): """Test that Amnio Acid DelIns normalizes correctly.""" resp = await test_handler.normalize("NP_001333827.1:p.Leu747_Thr751delinsPro") @@ -786,7 +786,7 @@ async def test_protein_delins(test_handler, protein_delins): assertion_checks(resp, protein_delins) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_deletion(test_handler, protein_deletion_np_range): """Test that Protein Deletion normalizes correctly.""" resp = await test_handler.normalize("NP_004439.2:p.Leu755_Thr759del") @@ -807,10 +807,10 @@ async def test_protein_deletion(test_handler, protein_deletion_np_range): assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_deletion(test_handler, cdna_deletion): """Test that cdna deletion normalizes correctly.""" - # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA645372623 # noqa: E501 + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA645372623 q = "NM_004448.3:c.2264_2278delTGAGGGAAAACACAT" resp1 = await test_handler.normalize(q) assertion_checks(resp1, cdna_deletion) @@ -827,7 +827,7 @@ async def test_cdna_deletion(test_handler, cdna_deletion): assertion_checks(resp, cdna_deletion) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion(test_handler, genomic_deletion): """Test that genomic deletion normalizes correctly""" # CA915940709 @@ -846,7 +846,7 @@ async def test_genomic_deletion(test_handler, genomic_deletion): assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_insertion(test_handler, protein_insertion): """Test that protein insertion normalizes correctly.""" resp = await test_handler.normalize("NP_005219.2:p.Asp770_Asn771insGlyLeu") @@ -865,14 +865,14 @@ async def test_protein_insertion(test_handler, protein_insertion): assertion_checks(resp, protein_insertion) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_insertion(test_handler, cdna_insertion): """Test that cdna insertion normalizes correctly.""" resp = await test_handler.normalize("ENST00000331728.9:c.2049_2050insA") assertion_checks(resp, cdna_insertion) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_insertion( test_handler, genomic_insertion, grch38_genomic_insertion_variation ): @@ -890,7 +890,7 @@ async def test_genomic_insertion( assertion_checks(resp, grch38_genomic_insertion_variation) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_amplification(test_handler, braf_amplification, prpf8_amplification): """Test that amplification normalizes correctly.""" q = "BRAF Amplification" @@ -907,7 +907,7 @@ async def test_amplification(test_handler, braf_amplification, prpf8_amplificati assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_valid_queries(test_handler): """Test that valid queries don"t throw exceptions. Used for queries that revealed bugs in service. @@ -932,7 +932,7 @@ async def test_valid_queries(test_handler): assert resp.variation, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_no_matches(test_handler): """Test no matches work correctly.""" queries = [ @@ -973,7 +973,7 @@ async def test_no_matches(test_handler): assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_service_meta(): """Test that service meta info populates correctly.""" response = await normalize_get_response("BRAF v600e", "default") diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 8ac3fb0c..bf3132e8 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -1,4 +1,6 @@ """Module for testing tokenizers""" +from pathlib import Path + import pytest import yaml @@ -48,7 +50,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for tokenizers""" - with open(f"{PROJECT_ROOT}/tests/fixtures/tokenizers.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/tokenizers.yml").open() as stream: return yaml.safe_load(stream) diff --git a/tests/test_translator.py b/tests/test_translator.py index babe94db..b3cc1338 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -1,4 +1,6 @@ """Module for testing translators""" +from pathlib import Path + import pytest import yaml @@ -33,7 +35,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for translators""" - with open(f"{PROJECT_ROOT}/tests/fixtures/translators.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/translators.yml").open() as stream: return yaml.safe_load(stream) @@ -92,7 +94,7 @@ async def translator_checks( assert len(translations) == len(expected), query -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_substitution( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -110,7 +112,7 @@ async def test_protein_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_substitution( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -128,7 +130,7 @@ async def test_cdna_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_substitution( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -146,7 +148,7 @@ async def test_genomic_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_stop_gain( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -164,7 +166,7 @@ async def test_protein_stop_gain( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_reference_agree( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -182,7 +184,7 @@ async def test_protein_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_reference_agree( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -200,7 +202,7 @@ async def test_cdna_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_reference_agree( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -218,7 +220,7 @@ async def test_genomic_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_delins( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -236,7 +238,7 @@ async def test_protein_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_delins( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -254,7 +256,7 @@ async def test_cdna_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_delins( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -272,7 +274,7 @@ async def test_genomic_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_deletion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -290,7 +292,7 @@ async def test_protein_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_deletion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -308,7 +310,7 @@ async def test_cdna_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -326,7 +328,7 @@ async def test_genomic_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion_ambiguous( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -344,7 +346,7 @@ async def test_genomic_deletion_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_insertion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -362,7 +364,7 @@ async def test_protein_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_insertion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -380,7 +382,7 @@ async def test_cdna_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_insertion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -398,7 +400,7 @@ async def test_genomic_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -416,7 +418,7 @@ async def test_genomic_duplication( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication_ambiguous( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -434,7 +436,7 @@ async def test_genomic_duplication_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_amplification( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): diff --git a/tests/test_validator.py b/tests/test_validator.py index b97707c6..ee07e501 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,4 +1,6 @@ """Module for testing validators""" +from pathlib import Path + import pytest import yaml @@ -30,7 +32,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for validators""" - with open(f"{PROJECT_ROOT}/tests/fixtures/validators.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/validators.yml").open() as stream: return yaml.safe_load(stream) @@ -67,9 +69,9 @@ async def validator_checks( classification ) except Exception as e: - raise Exception(f"{e}: {query}") + msg = f"{e}: {query}" + raise Exception(msg) from e else: - validator_instance is_valid = False for vr in validation_results: if vr.is_valid: @@ -79,7 +81,7 @@ async def validator_checks( assert is_valid if label == "should_match" else not is_valid, query -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_substitution( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -96,7 +98,7 @@ async def test_protein_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_substitution( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -113,7 +115,7 @@ async def test_cdna_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_substitution( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -130,7 +132,7 @@ async def test_genomic_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_stop_gain( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -147,7 +149,7 @@ async def test_protein_stop_gain( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_reference_agree( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -164,7 +166,7 @@ async def test_protein_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_reference_agree( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -181,7 +183,7 @@ async def test_cdna_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_reference_agree( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -198,7 +200,7 @@ async def test_genomic_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_delins( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -215,7 +217,7 @@ async def test_protein_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_delins(all_fixtures, test_tokenizer, test_classifier, val_params): """Test that cdna delins validator works correctly""" fixture_name = "cdna_delins" @@ -230,7 +232,7 @@ async def test_cdna_delins(all_fixtures, test_tokenizer, test_classifier, val_pa ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_delins( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -247,7 +249,7 @@ async def test_genomic_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_deletion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -264,7 +266,7 @@ async def test_protein_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_deletion(all_fixtures, test_tokenizer, test_classifier, val_params): """Test that cdna deletion validator works correctly""" fixture_name = "cdna_deletion" @@ -279,7 +281,7 @@ async def test_cdna_deletion(all_fixtures, test_tokenizer, test_classifier, val_ ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -296,7 +298,7 @@ async def test_genomic_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion_ambiguous( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -313,7 +315,7 @@ async def test_genomic_deletion_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_insertion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -330,7 +332,7 @@ async def test_protein_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_insertion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -347,7 +349,7 @@ async def test_cdna_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_insertion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -364,7 +366,7 @@ async def test_genomic_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -381,7 +383,7 @@ async def test_genomic_duplication( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication_ambiguous( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -398,7 +400,7 @@ async def test_genomic_duplication_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_amplification(all_fixtures, test_tokenizer, test_classifier, val_params): """Test that amplification validator works correctly""" fixture_name = "amplification" diff --git a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py index 3e950cbc..d2e3a9b5 100644 --- a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py +++ b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py @@ -679,7 +679,7 @@ def genomic_del6_cx_37(genomic_del6_37_loc): return models.CopyNumberChange(**params) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup1_copy_number_count( test_cnv_handler, genomic_dup1_38_cn, genomic_dup1_cn_37 ): @@ -712,7 +712,7 @@ async def test_genomic_dup1_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup1_copy_number_change( test_cnv_handler, genomic_dup1_cx_38, genomic_dup1_cx_37 ): @@ -735,7 +735,7 @@ async def test_genomic_dup1_copy_number_change( cnv_assertion_checks(resp, genomic_dup1_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup2_copy_number_count( test_cnv_handler, genomic_dup2_38_cn, genomic_dup2_cn_37 ): @@ -766,7 +766,7 @@ async def test_genomic_dup2_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup2_copy_number_change( test_cnv_handler, genomic_dup2_cx_38, genomic_dup2_cx_37 ): @@ -789,7 +789,7 @@ async def test_genomic_dup2_copy_number_change( cnv_assertion_checks(resp, genomic_dup2_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup3_copy_number_count( test_cnv_handler, genomic_del3_dup3_cn_38, genomic_del3_dup3_cn_37 ): @@ -820,7 +820,7 @@ async def test_genomic_dup3_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup3_copy_number_change( test_cnv_handler, genomic_dup3_cx_38, genomic_dup3_cx_37 ): @@ -843,7 +843,7 @@ async def test_genomic_dup3_copy_number_change( cnv_assertion_checks(resp, genomic_dup3_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup4_copy_number_count( test_cnv_handler, genomic_dup4_cn_38, genomic_dup4_cn_37 ): @@ -874,7 +874,7 @@ async def test_genomic_dup4_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup4_copy_number_change( test_cnv_handler, genomic_dup4_cx_38, genomic_dup4_cx_37 ): @@ -897,7 +897,7 @@ async def test_genomic_dup4_copy_number_change( cnv_assertion_checks(resp, genomic_dup4_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup5_copy_number_count( test_cnv_handler, genomic_dup5_cn_38, genomic_dup5_cn_37 ): @@ -928,7 +928,7 @@ async def test_genomic_dup5_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup5_copy_number_change( test_cnv_handler, genomic_dup5_cx_38, genomic_dup5_cx_37 ): @@ -951,7 +951,7 @@ async def test_genomic_dup5_copy_number_change( cnv_assertion_checks(resp, genomic_dup5_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup6_copy_number_count( test_cnv_handler, genomic_dup6_cn_38, genomic_dup6_cn_37 ): @@ -982,7 +982,7 @@ async def test_genomic_dup6_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup6_copy_number_change( test_cnv_handler, genomic_dup6_cx_38, genomic_dup6_cx_37 ): @@ -1005,7 +1005,7 @@ async def test_genomic_dup6_copy_number_change( cnv_assertion_checks(resp, genomic_dup6_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del1_copy_number_count( test_cnv_handler, genomic_del1_38_cn, genomic_del1_cn_37 ): @@ -1036,7 +1036,7 @@ async def test_genomic_del1_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del1_copy_number_change( test_cnv_handler, genomic_del1_cx_38, genomic_del1_cx_37 ): @@ -1059,7 +1059,7 @@ async def test_genomic_del1_copy_number_change( cnv_assertion_checks(resp, genomic_del1_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del2_copy_number_count( test_cnv_handler, genomic_del2_38_cn, genomic_del2_cn_37 ): @@ -1090,7 +1090,7 @@ async def test_genomic_del2_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del2_copy_number_change( test_cnv_handler, genomic_del2_cx_38, genomic_del2_cx_37 ): @@ -1113,7 +1113,7 @@ async def test_genomic_del2_copy_number_change( cnv_assertion_checks(resp, genomic_del2_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del3_copy_number_count( test_cnv_handler, genomic_del3_dup3_cn_38, genomic_del3_dup3_cn_37 ): @@ -1144,7 +1144,7 @@ async def test_genomic_del3_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del3_copy_number_change( test_cnv_handler, genomic_del3_cx_38, genomic_del3_cx_37 ): @@ -1167,7 +1167,7 @@ async def test_genomic_del3_copy_number_change( cnv_assertion_checks(resp, genomic_del3_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del4_copy_number_count( test_cnv_handler, genomic_del4_cn_38, genomic_del4_cn_37 ): @@ -1198,7 +1198,7 @@ async def test_genomic_del4_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del4_copy_number_change( test_cnv_handler, genomic_del4_cx_38, genomic_del4_cx_37 ): @@ -1221,7 +1221,7 @@ async def test_genomic_del4_copy_number_change( cnv_assertion_checks(resp, genomic_del4_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del5_copy_number_count( test_cnv_handler, genomic_del5_cn_38, genomic_del5_cn_37 ): @@ -1252,7 +1252,7 @@ async def test_genomic_del5_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del5_copy_number_change( test_cnv_handler, genomic_del5_cx_38, genomic_del5_cx_37 ): @@ -1275,7 +1275,7 @@ async def test_genomic_del5_copy_number_change( cnv_assertion_checks(resp, genomic_del5_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del6_copy_number_count( test_cnv_handler, genomic_del6_cn_38, genomic_del6_cn_37 ): @@ -1306,7 +1306,7 @@ async def test_genomic_del6_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del6_copy_number_change( test_cnv_handler, genomic_del6_cx_38, genomic_del6_cx_37 ): @@ -1329,7 +1329,7 @@ async def test_genomic_del6_copy_number_change( cnv_assertion_checks(resp, genomic_del6_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_invalid_cnv(test_cnv_handler): """Check that invalid input return warnings""" q = "DAG1 g.49568695dup" diff --git a/tests/to_copy_number_variation/test_parsed_to_copy_number.py b/tests/to_copy_number_variation/test_parsed_to_copy_number.py index d147b947..3e4ae807 100644 --- a/tests/to_copy_number_variation/test_parsed_to_copy_number.py +++ b/tests/to_copy_number_variation/test_parsed_to_copy_number.py @@ -303,7 +303,7 @@ def test_get_parsed_ac_chr(test_cnv_handler): test_cnv_handler._get_parsed_ac_chr("NC_00000713", False) assert ( str(e.value) == "SeqRepo unable to get translated identifiers for NC_00000713" - ) # noqa: E501 + ) def test_validate_pos(test_cnv_handler): @@ -888,7 +888,9 @@ def test_invalid(test_cnv_handler): assert resp.warnings == ["hg18 assembly is not currently supported"] # Must give both assembly + chromosome or accession - ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + ac_assembly_chr_msg = ( + "Must provide either `accession` or both `assembly` and `chromosome`" + ) with pytest.raises(ValidationError) as e: ParsedToCxVarQuery( start0=31738809, diff --git a/variation/__init__.py b/variation/__init__.py index ed2efae6..ed36bbc3 100644 --- a/variation/__init__.py +++ b/variation/__init__.py @@ -3,7 +3,7 @@ from os import environ if "VARIATION_NORM_EB_PROD" in environ: - LOG_FN = "/tmp/variation.log" + LOG_FN = "/var/variation.log" else: LOG_FN = "variation.log" diff --git a/variation/classifiers/__init__.py b/variation/classifiers/__init__.py index c174ba7c..6b68ec5f 100644 --- a/variation/classifiers/__init__.py +++ b/variation/classifiers/__init__.py @@ -21,3 +21,28 @@ from .protein_reference_agree import ProteinReferenceAgreeClassifier from .protein_stop_gain_classifier import ProteinStopGainClassifier from .protein_substitution_classifier import ProteinSubstitutionClassifier + +__all__ = [ + "AmplificationClassifier", + "CdnaDeletionClassifier", + "CdnaDelInsClassifier", + "CdnaInsertionClassifier", + "CdnaReferenceAgreeClassifier", + "CdnaSubstitutionClassifier", + "GenomicDeletionAmbiguousClassifier", + "GenomicDeletionClassifier", + "GenomicDelInsClassifier", + "GenomicDuplicationAmbiguousClassifier", + "GenomicDuplicationClassifier", + "GenomicInsertionClassifier", + "GenomicReferenceAgreeClassifier", + "GenomicSubstitutionClassifier", + "GnomadVcfClassifier", + "HgvsClassifier", + "ProteinDeletionClassifier", + "ProteinDelInsClassifier", + "ProteinInsertionClassifier", + "ProteinReferenceAgreeClassifier", + "ProteinStopGainClassifier", + "ProteinSubstitutionClassifier", +] diff --git a/variation/classifiers/cdna_substitution_classifier.py b/variation/classifiers/cdna_substitution_classifier.py index 6d4dce79..aad05ba1 100644 --- a/variation/classifiers/cdna_substitution_classifier.py +++ b/variation/classifiers/cdna_substitution_classifier.py @@ -48,3 +48,5 @@ def match(self, tokens: List[Token]) -> Optional[CdnaSubstitutionClassification] ref=cdna_sub_token.ref, alt=cdna_sub_token.alt, ) + + return None diff --git a/variation/classifiers/classifier.py b/variation/classifiers/classifier.py index 400a7e80..beaea3ce 100644 --- a/variation/classifiers/classifier.py +++ b/variation/classifiers/classifier.py @@ -25,7 +25,6 @@ def exact_match_candidates(self) -> List[List[TokenType]]: :return: List of list of tokens, where order matters, that represent a given classification. """ - pass def can_classify(self, tokens: List[Token]) -> bool: """Return whether or not a list of tokens can be classified by a given @@ -35,7 +34,7 @@ def can_classify(self, tokens: List[Token]) -> bool: :return: `True` if a list of tokens matches the tokens needed, where order matters, to represent a given classification. `False`, otherwise. """ - token_types = list(map(lambda t: t.token_type, tokens)) + token_types = [t.token_type for t in tokens] exact_matches: List[List[str]] = [] for candidate in self.exact_match_candidates(): diff --git a/variation/classifiers/genomic_substitution_classifier.py b/variation/classifiers/genomic_substitution_classifier.py index 4f39b303..0c2c47a7 100644 --- a/variation/classifiers/genomic_substitution_classifier.py +++ b/variation/classifiers/genomic_substitution_classifier.py @@ -49,3 +49,5 @@ def match(self, tokens: List[Token]) -> Optional[GenomicSubstitutionClassificati ref=genomic_sub_token.ref, alt=genomic_sub_token.alt, ) + + return None diff --git a/variation/classifiers/gnomad_vcf_classifier.py b/variation/classifiers/gnomad_vcf_classifier.py index 6ebfae89..9a5b8a3e 100644 --- a/variation/classifiers/gnomad_vcf_classifier.py +++ b/variation/classifiers/gnomad_vcf_classifier.py @@ -56,11 +56,11 @@ def match( if ref == alt: return GenomicReferenceAgreeClassification(**params) - else: - params["ref"] = ref - params["alt"] = alt - return GenomicSubstitutionClassification(**params) + params["ref"] = ref + params["alt"] = alt + + return GenomicSubstitutionClassification(**params) # delins params["pos0"] = token.pos diff --git a/variation/classifiers/hgvs_classifier.py b/variation/classifiers/hgvs_classifier.py index 073ac996..f81b8391 100644 --- a/variation/classifiers/hgvs_classifier.py +++ b/variation/classifiers/hgvs_classifier.py @@ -117,12 +117,14 @@ def _protein_classification( if params["alt"] in {"Ter", "*"}: params["alt"] = "*" return ProteinStopGainClassification(**params) - else: - return ProteinSubstitutionClassification(**params) - elif classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE: + + return ProteinSubstitutionClassification(**params) + + if classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE: params["pos"] = int(params["pos"]) return ProteinReferenceAgreeClassification(**params) - elif classification_type == ClassificationType.PROTEIN_DELINS: + + if classification_type == ClassificationType.PROTEIN_DELINS: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -130,7 +132,8 @@ def _protein_classification( else params["pos1"] ) return ProteinDelInsClassification(**params) - elif classification_type == ClassificationType.PROTEIN_DELETION: + + if classification_type == ClassificationType.PROTEIN_DELETION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -138,7 +141,8 @@ def _protein_classification( else params["pos1"] ) return ProteinDeletionClassification(**params) - elif classification_type == ClassificationType.PROTEIN_INSERTION: + + if classification_type == ClassificationType.PROTEIN_INSERTION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -170,10 +174,12 @@ def _cdna_classification( if classification_type == ClassificationType.CDNA_SUBSTITUTION: params["pos"] = int(params["pos"]) return CdnaSubstitutionClassification(**params) - elif classification_type == ClassificationType.CDNA_REFERENCE_AGREE: + + if classification_type == ClassificationType.CDNA_REFERENCE_AGREE: params["pos"] = int(params["pos"]) return CdnaReferenceAgreeClassification(**params) - elif classification_type == ClassificationType.CDNA_DELINS: + + if classification_type == ClassificationType.CDNA_DELINS: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -181,7 +187,8 @@ def _cdna_classification( else params["pos1"] ) return CdnaDelInsClassification(**params) - elif classification_type == ClassificationType.CDNA_DELETION: + + if classification_type == ClassificationType.CDNA_DELETION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -189,7 +196,8 @@ def _cdna_classification( else params["pos1"] ) return CdnaDeletionClassification(**params) - elif classification_type == ClassificationType.CDNA_INSERTION: + + if classification_type == ClassificationType.CDNA_INSERTION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -198,6 +206,8 @@ def _cdna_classification( ) return CdnaInsertionClassification(**params) + return None + def _genomic_classification( self, token: HgvsToken, params: Dict ) -> Optional[Classification]: @@ -220,10 +230,12 @@ def _genomic_classification( if classification_type == ClassificationType.GENOMIC_SUBSTITUTION: params["pos"] = int(params["pos"]) return GenomicSubstitutionClassification(**params) - elif classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE: + + if classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE: params["pos"] = int(params["pos"]) return GenomicReferenceAgreeClassification(**params) - elif classification_type == ClassificationType.GENOMIC_DELINS: + + if classification_type == ClassificationType.GENOMIC_DELINS: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -231,7 +243,8 @@ def _genomic_classification( else params["pos1"] ) return GenomicDelInsClassification(**params) - elif classification_type == ClassificationType.GENOMIC_INSERTION: + + if classification_type == ClassificationType.GENOMIC_INSERTION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -239,7 +252,8 @@ def _genomic_classification( else params["pos1"] ) return GenomicInsertionClassification(**params) - elif classification_type == ClassificationType.GENOMIC_DELETION: + + if classification_type == ClassificationType.GENOMIC_DELETION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -247,7 +261,8 @@ def _genomic_classification( else params["pos1"] ) return GenomicDeletionClassification(**params) - elif classification_type == ClassificationType.GENOMIC_DUPLICATION: + + if classification_type == ClassificationType.GENOMIC_DUPLICATION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -256,6 +271,8 @@ def _genomic_classification( ) return GenomicDuplicationClassification(**params) + return None + def _genomic_ambiguous_classification( self, token: HgvsToken, params: Dict ) -> Optional[Classification]: @@ -271,7 +288,8 @@ def _genomic_ambiguous_classification( """ if token.token.endswith("dup"): return self._genomic_dup_ambiguous_classification(token, params) - elif token.token.endswith("del"): + + if token.token.endswith("del"): return self._genomic_del_ambiguous_classification(token, params) return None diff --git a/variation/classify.py b/variation/classify.py index cd2db3a3..cd1a4728 100644 --- a/variation/classify.py +++ b/variation/classify.py @@ -1,5 +1,5 @@ """Module for classification.""" -from typing import List, Optional +from typing import ClassVar, List, Optional from variation.classifiers import ( AmplificationClassifier, @@ -35,7 +35,7 @@ class Classify: hgvs_classifier = HgvsClassifier() gnomad_vcf_classifier = GnomadVcfClassifier() - classifiers: List[Classifier] = [ + classifiers: ClassVar[List[Classifier]] = [ ProteinDelInsClassifier(), ProteinSubstitutionClassifier(), ProteinStopGainClassifier(), diff --git a/variation/main.py b/variation/main.py index 1f442520..3faaf70a 100644 --- a/variation/main.py +++ b/variation/main.py @@ -1,6 +1,6 @@ """Main application for FastAPI.""" +import datetime import traceback -from datetime import datetime from enum import Enum from typing import List, Optional, Union from urllib.parse import unquote @@ -68,7 +68,7 @@ class Tag(Enum): contact={ "name": "Alex H. Wagner", "email": "Alex.Wagner@nationwidechildrens.org", - "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab", # noqa: E501 + "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab", }, license={ "name": "MIT", @@ -112,8 +112,7 @@ async def to_vrs( :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly :return: ToVRSService model for variation """ - resp = await query_handler.to_vrs_handler.to_vrs(unquote(q)) - return resp + return await query_handler.to_vrs_handler.to_vrs(unquote(q)) normalize_summary = ( @@ -148,11 +147,11 @@ async def normalize( ), baseline_copies: Optional[int] = Query( None, - description="Baseline copies for HGVS duplications and deletions represented as Copy Number Count Variation", # noqa: E501 + description="Baseline copies for HGVS duplications and deletions represented as Copy Number Count Variation", ), copy_change: Optional[models.CopyChange] = Query( None, - description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", # noqa: E501 + description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", ), ) -> NormalizeService: """Normalize and translate a HGVS, gnomAD VCF or Free Text description on GRCh37 @@ -170,13 +169,12 @@ async def normalize( query. :return: NormalizeService for variation """ - normalize_resp = await query_handler.normalize_handler.normalize( + return await query_handler.normalize_handler.normalize( unquote(q), hgvs_dup_del_mode=hgvs_dup_del_mode, baseline_copies=baseline_copies, copy_change=copy_change, ) - return normalize_resp @app.get( @@ -218,7 +216,8 @@ def translate_identifier( warnings=warnings, aliases=aliases, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) @@ -258,7 +257,7 @@ def vrs_python_translate_from( :return: TranslateFromService containing VRS Allele object """ variation_query = unquote(variation.strip()) - warnings = list() + warnings = [] vrs_variation = None try: resp = query_handler.vrs_python_tlr.translate_from(variation_query, fmt) @@ -280,7 +279,8 @@ def vrs_python_translate_from( warnings=warnings, variation=vrs_variation, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version @@ -288,12 +288,16 @@ def vrs_python_translate_from( ) -g_to_p_summary = "Given GRCh38 gnomAD VCF, return VRS Variation object on MANE protein coordinate." # noqa: E501 +g_to_p_summary = ( + "Given GRCh38 gnomAD VCF, return VRS Variation object on MANE protein coordinate." +) g_to_p_response_description = "A response to a validly-formed query." g_to_p_description = ( "Return VRS Variation object on protein coordinate for variation provided." ) -q_description = "GRCh38 gnomAD VCF (chr-pos-ref-alt) to normalize to MANE protein variation." # noqa: E501 +q_description = ( + "GRCh38 gnomAD VCF (chr-pos-ref-alt) to normalize to MANE protein variation." +) hgvs_dup_del_mode_decsr = ( @@ -344,11 +348,11 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateTo """ query = request_body request_body = request_body.model_dump(by_alias=True) - warnings = list() + warnings = [] allele = _get_allele(request_body, warnings) - variations = list() + variations = [] if allele: try: variations = query_handler.vrs_python_tlr.translate_to( @@ -362,7 +366,8 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateTo warnings=warnings, variations=variations, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version @@ -401,11 +406,11 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToS """ query = request_body request_body = request_body.model_dump(by_alias=True) - warnings = list() + warnings = [] allele = _get_allele(request_body, warnings) - variations = list() + variations = [] if allele: try: variations = query_handler.vrs_python_tlr._to_hgvs( @@ -419,7 +424,8 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToS warnings=warnings, variations=variations, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version @@ -452,12 +458,11 @@ async def hgvs_to_copy_number_count( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberCountService """ - resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( + return await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( unquote(hgvs_expr.strip()), baseline_copies, do_liftover, ) - return resp @app.get( @@ -483,12 +488,11 @@ async def hgvs_to_copy_number_change( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberChangeService """ - resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( + return await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( unquote(hgvs_expr.strip()), copy_change, do_liftover, ) - return resp @app.post( @@ -518,7 +522,8 @@ def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: copy_number_count=None, warnings=["Unhandled exception. See logs for more details."], service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) else: @@ -552,7 +557,8 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: copy_number_count=None, warnings=["Unhandled exception. See logs for more details."], service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) else: @@ -600,13 +606,12 @@ def amplification_to_cx_var( :return: AmplificationToCxVarService containing Copy Number Change and list of warnings """ - resp = query_handler.to_copy_number_handler.amplification_to_cx_var( + return query_handler.to_copy_number_handler.amplification_to_cx_var( gene=gene, sequence_id=sequence_id, start=start, end=end, ) - return resp @app.get( @@ -648,7 +653,10 @@ async def p_to_c( return ToCdnaService( c_data=c_data, warnings=[w] if w else [], - service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + service_meta=ServiceMeta( + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), + ), ) @@ -656,7 +664,7 @@ async def p_to_c( "/variation/alignment_mapper/c_to_g", summary="Translate cDNA representation to genomic representation", response_description="A response to a validly-formed query.", - description="Given cDNA accession and positions for codon(s), return associated genomic" # noqa: E501 + description="Given cDNA accession and positions for codon(s), return associated genomic" " accession and positions for a given target genome assembly", response_model=ToGenomicService, response_model_exclude_none=True, @@ -705,7 +713,10 @@ async def c_to_g( return ToGenomicService( g_data=g_data, warnings=[w] if w else [], - service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + service_meta=ServiceMeta( + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), + ), ) @@ -756,5 +767,8 @@ async def p_to_g( return ToGenomicService( g_data=g_data, warnings=[w] if w else [], - service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + service_meta=ServiceMeta( + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), + ), ) diff --git a/variation/normalize.py b/variation/normalize.py index 5e350895..9d887fad 100644 --- a/variation/normalize.py +++ b/variation/normalize.py @@ -1,5 +1,5 @@ """Module for Variation Normalization.""" -from datetime import datetime +import datetime from typing import List, Optional, Tuple from urllib.parse import unquote @@ -129,10 +129,12 @@ def get_hgvs_dup_del_mode( if not hgvs_dup_del_mode: hgvs_dup_del_mode = HGVSDupDelModeOption.DEFAULT - if hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT: - if not baseline_copies: - warning = f"{hgvs_dup_del_mode.value} mode requires `baseline_copies`" # noqa: E501 - return None, warning + if ( + hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT + and not baseline_copies + ): + warning = f"{hgvs_dup_del_mode.value} mode requires `baseline_copies`" + return None, warning return hgvs_dup_del_mode, warning @@ -163,7 +165,8 @@ async def normalize( "variation": variation, "warnings": warnings, "service_meta_": ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), } @@ -221,9 +224,7 @@ async def normalize( translation_result.vrs_seq_loc_ac_status == VrsSeqLocAcStatus.NA ): - classification_type = ( - translation_result.validation_result.classification.classification_type.value - ) + classification_type = translation_result.validation_result.classification.classification_type.value if classification_type.startswith(("protein", "cdna")): # Only supports protein/cDNA at the moment warnings.append("Unable to find MANE representation") diff --git a/variation/query.py b/variation/query.py index 87add090..404e67de 100644 --- a/variation/query.py +++ b/variation/query.py @@ -63,8 +63,8 @@ def __init__( translator, ] self.to_vrs_handler = ToVRS(*to_vrs_params) - normalize_params = to_vrs_params + [uta_db] + normalize_params = [*to_vrs_params, uta_db] self.normalize_handler = Normalize(*normalize_params) self.to_copy_number_handler = ToCopyNumberVariation( - *to_vrs_params + [gene_query_handler, uta_db] + *[*to_vrs_params, gene_query_handler, uta_db] ) diff --git a/variation/regex.py b/variation/regex.py index 7a4528c9..182e98c7 100644 --- a/variation/regex.py +++ b/variation/regex.py @@ -41,15 +41,15 @@ ) PROTEIN_INSERTION = re.compile( - r"^(?P[a-zA-z]+)(?P\d+)_(?P[a-zA-z]+)(?P\d+)ins(?P[a-zA-z]+)$" # noqa: E501 + r"^(?P[a-zA-z]+)(?P\d+)_(?P[a-zA-z]+)(?P\d+)ins(?P[a-zA-z]+)$" ) PROTEIN_DELINS = re.compile( - r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?delins(?P[a-zA-z]+)$" # noqa: E501 + r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?delins(?P[a-zA-z]+)$" ) PROTEIN_DELETION = re.compile( - r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?del(?P[a-zA-z]+)?$" # noqa: E501 + r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?del(?P[a-zA-z]+)?$" ) PROTEIN_REFERENCE_AGREE = re.compile(r"^(?P[a-zA-z]+)(?P\d+)=$") diff --git a/variation/schemas/__init__.py b/variation/schemas/__init__.py index 04fec28a..e6bfc5d6 100644 --- a/variation/schemas/__init__.py +++ b/variation/schemas/__init__.py @@ -1,3 +1,5 @@ """Package level import.""" from .normalize_response_schema import NormalizeService, ServiceMeta from .to_vrs_response_schema import ToVRSService + +__all__ = ["NormalizeService", "ServiceMeta", "ToVRSService"] diff --git a/variation/schemas/copy_number_schema.py b/variation/schemas/copy_number_schema.py index 991a2717..6dcbb8b0 100644 --- a/variation/schemas/copy_number_schema.py +++ b/variation/schemas/copy_number_schema.py @@ -55,17 +55,19 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: - `end_pos_comparator` is required when `end_pos_type` is an Indefinite Range - End positions must be greater than start positions """ - ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + ac_assembly_chr_msg = ( + "Must provide either `accession` or both `assembly` and `chromosome`" + ) assembly = v.assembly chromosome = v.chromosome assembly_chr_set = assembly and chromosome - assert v.accession or assembly_chr_set, ac_assembly_chr_msg # noqa: E501 + assert v.accession or assembly_chr_set, ac_assembly_chr_msg if assembly_chr_set: pattern = r"^chr(X|Y|([1-9]|1[0-9]|2[0-2]))$" assert re.match( pattern, chromosome - ), f"`chromosome`, {chromosome}, does not match r'{pattern}'" # noqa: E501 + ), f"`chromosome`, {chromosome}, does not match r'{pattern}'" start0 = v.start0 start1 = v.start1 @@ -75,7 +77,7 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: elif v.start_pos_type == ParsedPosType.INDEFINITE_RANGE: assert ( v.start_pos_comparator - ), "`start_pos_comparator` is required for indefinite ranges" # noqa: E501 + ), "`start_pos_comparator` is required for indefinite ranges" end0 = v.end0 end1 = v.end1 @@ -85,7 +87,7 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: elif v.end_pos_type == ParsedPosType.INDEFINITE_RANGE: assert ( v.end_pos_comparator - ), "`end_pos_comparator` is required for indefinite ranges" # noqa: E501 + ), "`end_pos_comparator` is required for indefinite ranges" err_msg = "end positions must be greater than start" if start1 is None: @@ -216,13 +218,9 @@ def validate_fields(cls, v: Dict) -> Dict: copies_comparator = v.copies_comparator if copies_type == ParsedPosType.DEFINITE_RANGE: - assert ( - copies1 - ), "`copies1` must be provided for `copies_type == ParsedPosType.DEFINITE_RANGE`" # noqa: E501 + assert copies1, "`copies1` must be provided for `copies_type == ParsedPosType.DEFINITE_RANGE`" elif copies_type == ParsedPosType.INDEFINITE_RANGE: - assert ( - copies_comparator - ), "`copies_comparator` must be provided for `copies_type == ParsedPosType.INDEFINITE_RANGE`" # noqa: E501 + assert copies_comparator, "`copies_comparator` must be provided for `copies_type == ParsedPosType.INDEFINITE_RANGE`" return v diff --git a/variation/schemas/translation_response_schema.py b/variation/schemas/translation_response_schema.py index 2652ae85..73ab363f 100644 --- a/variation/schemas/translation_response_schema.py +++ b/variation/schemas/translation_response_schema.py @@ -21,7 +21,7 @@ class VrsSeqLocAcStatus(str, Enum): NA = "na" -AC_PRIORITY_LABELS = [m for m in VrsSeqLocAcStatus.__members__.values()] +AC_PRIORITY_LABELS = list(VrsSeqLocAcStatus.__members__.values()) class TranslationResult(BaseModel): diff --git a/variation/to_copy_number_variation.py b/variation/to_copy_number_variation.py index 3c71f393..60745779 100644 --- a/variation/to_copy_number_variation.py +++ b/variation/to_copy_number_variation.py @@ -1,5 +1,5 @@ """Module for to copy number variation translation""" -from datetime import datetime +import datetime from typing import Dict, List, NamedTuple, Optional, Tuple, Union from urllib.parse import unquote @@ -216,7 +216,8 @@ async def hgvs_to_copy_number_count( hgvs_expr=hgvs_expr, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), copy_number_count=cn_var, ) @@ -248,7 +249,8 @@ async def hgvs_to_copy_number_change( hgvs_expr=hgvs_expr, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), copy_number_change=cx_var, ) @@ -290,9 +292,8 @@ def _get_parsed_ac( else: raise ToCopyNumberError(str(error)) else: - raise ToCopyNumberError( - f"{og_assembly.value} assembly is not currently supported" - ) + msg = f"{og_assembly.value} assembly is not currently supported" + raise ToCopyNumberError(msg) return ParsedAccessionSummary(lifted_over=lifted_over, accession=accession) @@ -324,7 +325,7 @@ def _get_parsed_ac_chr( chromosome = grch_record.split(":")[-1] if grch_record.startswith("GRCh38") or not do_liftover: - new_ac = [a for a in aliases if a.startswith("ga4gh")][0] + new_ac = next(a for a in aliases if a.startswith("ga4gh")) else: grch38_query = grch_record.replace("GRCh37", "GRCh38") aliases, error = self.seqrepo_access.translate_identifier( @@ -337,7 +338,8 @@ def _get_parsed_ac_chr( lifted_over = True new_ac = aliases[0] else: - raise ToCopyNumberError(f"Not a supported genomic accession: {accession}") + msg = f"Not a supported genomic accession: {accession}" + raise ToCopyNumberError(msg) return ParsedChromosomeSummary( accession=new_ac, chromosome=chromosome, lifted_over=lifted_over @@ -354,14 +356,15 @@ def _validate_ac_pos(self, accession: str, pos: int) -> None: try: ref = self.seqrepo_access.sr[accession][pos - 1] except ValueError as e: - raise ToCopyNumberError( - f"SeqRepo ValueError: {str(e).replace('start', 'Position')}" - ) - except KeyError: - raise ToCopyNumberError(f"Accession not found in SeqRepo: {accession}") + msg = f"SeqRepo ValueError: {str(e).replace('start', 'Position')}" + raise ToCopyNumberError(msg) from e + except KeyError as e: + msg = f"Accession not found in SeqRepo: {accession}" + raise ToCopyNumberError(msg) from e else: if ref == "": - raise ToCopyNumberError(f"Position ({pos}) is not valid on {accession}") + msg = f"Position ({pos}) is not valid on {accession}" + raise ToCopyNumberError(msg) from None def _get_vrs_loc_start_or_end( self, @@ -527,11 +530,10 @@ def _liftover_pos( chromosome, pos ) if not liftover: - raise ToCopyNumberError( - f"Unable to liftover: {chromosome} with pos {pos}" - ) - else: - liftover_pos[k] = liftover[0][1] + msg = f"Unable to liftover: {chromosome} with pos {pos}" + raise ToCopyNumberError(msg) + + liftover_pos[k] = liftover[0][1] return liftover_pos @@ -608,7 +610,8 @@ def parsed_to_copy_number( service_params = { "warnings": warnings, "service_meta_": ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), } @@ -645,7 +648,7 @@ def amplification_to_cx_var( :return: AmplificationToCxVarService containing Copy Number Change and list of warnings """ - warnings = list() + warnings = [] amplification_label = None variation = None try: @@ -715,6 +718,7 @@ def amplification_to_cx_var( copy_number_change=variation, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) diff --git a/variation/to_vrs.py b/variation/to_vrs.py index 9a1bd2cf..eca7b347 100644 --- a/variation/to_vrs.py +++ b/variation/to_vrs.py @@ -1,5 +1,5 @@ """Module for to_vrs endpoint.""" -from datetime import datetime +import datetime from typing import List, Optional, Tuple from urllib.parse import unquote @@ -101,7 +101,8 @@ async def to_vrs(self, q: str) -> ToVRSService: "search_term": q, "variations": variations, "service_meta_": ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), "warnings": warnings, } diff --git a/variation/tokenizers/__init__.py b/variation/tokenizers/__init__.py index 0da19861..992e698c 100644 --- a/variation/tokenizers/__init__.py +++ b/variation/tokenizers/__init__.py @@ -18,3 +18,25 @@ from .protein_insertion import ProteinInsertion from .protein_reference_agree import ProteinReferenceAgree from .protein_substitution import ProteinSubstitution + +__all__ = [ + "CdnaGenomicReferenceAgree", + "CdnaDeletion", + "CdnaDelIns", + "CdnaInsertion", + "CdnaSubstitution", + "FreeTextCategorical", + "GeneSymbol", + "GenomicDeletion", + "GenomicDelIns", + "GenomicDuplication", + "GenomicInsertion", + "GenomicSubstitution", + "GnomadVCF", + "HGVS", + "ProteinDeletion", + "ProteinDelIns", + "ProteinInsertion", + "ProteinReferenceAgree", + "ProteinSubstitution", +] diff --git a/variation/tokenizers/cdna_and_genomic_reference_agree.py b/variation/tokenizers/cdna_and_genomic_reference_agree.py index f47ca537..ab69d65c 100644 --- a/variation/tokenizers/cdna_and_genomic_reference_agree.py +++ b/variation/tokenizers/cdna_and_genomic_reference_agree.py @@ -41,5 +41,8 @@ def match( if coordinate_type == AnnotationLayer.GENOMIC: return GenomicReferenceAgreeToken(**params) - elif coordinate_type == AnnotationLayer.CDNA: + + if coordinate_type == AnnotationLayer.CDNA: return CdnaReferenceAgreeToken(**params) + + return None diff --git a/variation/tokenizers/cdna_deletion.py b/variation/tokenizers/cdna_deletion.py index 01b6eb9a..9b949201 100644 --- a/variation/tokenizers/cdna_deletion.py +++ b/variation/tokenizers/cdna_deletion.py @@ -38,3 +38,5 @@ def match(self, input_string: str) -> Optional[CdnaDeletionToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, deleted_sequence=match_dict["deleted_sequence"], ) + + return None diff --git a/variation/tokenizers/cdna_delins.py b/variation/tokenizers/cdna_delins.py index 7169e015..beda5786 100644 --- a/variation/tokenizers/cdna_delins.py +++ b/variation/tokenizers/cdna_delins.py @@ -37,3 +37,5 @@ def match(self, input_string: str) -> Optional[CdnaDelInsToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, inserted_sequence=match_dict["inserted_sequence"], ) + + return None diff --git a/variation/tokenizers/cdna_insertion.py b/variation/tokenizers/cdna_insertion.py index 87ae61cd..2a0d5417 100644 --- a/variation/tokenizers/cdna_insertion.py +++ b/variation/tokenizers/cdna_insertion.py @@ -40,3 +40,5 @@ def match(self, input_string: str) -> Optional[CdnaInsertionToken]: pos1=pos1, inserted_sequence=inserted_sequence, ) + + return None diff --git a/variation/tokenizers/cdna_substitution.py b/variation/tokenizers/cdna_substitution.py index 7c166d85..59f36158 100644 --- a/variation/tokenizers/cdna_substitution.py +++ b/variation/tokenizers/cdna_substitution.py @@ -37,3 +37,5 @@ def match(self, input_string: str) -> Optional[CdnaSubstitutionToken]: ref=match_dict["ref"], alt=match_dict["alt"], ) + + return None diff --git a/variation/tokenizers/gene_symbol.py b/variation/tokenizers/gene_symbol.py index fde7cbe8..04e98c02 100644 --- a/variation/tokenizers/gene_symbol.py +++ b/variation/tokenizers/gene_symbol.py @@ -29,12 +29,11 @@ def match(self, input_string: str) -> Optional[GeneToken]: if norm_match_type != 0: gene = norm_resp.gene label = gene.label - gene_match_token = GeneToken( + return GeneToken( token=label, input_string=input_string, matched_value=label, gene=gene, ) - return gene_match_token return None diff --git a/variation/tokenizers/genomic_deletion.py b/variation/tokenizers/genomic_deletion.py index 0d2f93b3..8d9c1630 100644 --- a/variation/tokenizers/genomic_deletion.py +++ b/variation/tokenizers/genomic_deletion.py @@ -48,53 +48,53 @@ def match(self, input_string: str) -> Optional[GenomicDeletionToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, deleted_sequence=match_dict["deleted_sequence"], ) + + # Going to try ambiguous genomic duplications + match = GENOMIC_DELETION_AMBIGUOUS_1.match(input_string) + if match: + match_dict = match.groupdict() + pos0 = match_dict["pos0"] + pos1 = match_dict["pos1"] + pos2 = match_dict["pos2"] + pos3 = match_dict["pos3"] + + # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported + if not any(((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?"))): + return GenomicDeletionAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=int(pos0) if pos0 != "?" else pos0, + pos1=int(pos1) if pos1 != "?" else pos1, + pos2=int(pos2) if pos2 != "?" else pos2, + pos3=int(pos3) if pos3 != "?" else pos3, + ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + ) + else: - # Going to try ambiguous genomic duplications - match = GENOMIC_DELETION_AMBIGUOUS_1.match(input_string) - if match: - match_dict = match.groupdict() - pos0 = match_dict["pos0"] - pos1 = match_dict["pos1"] - pos2 = match_dict["pos2"] - pos3 = match_dict["pos3"] - - # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported - if not any( - ((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?")) - ): + for pattern_re, regex_type in [ + (GENOMIC_DELETION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), + (GENOMIC_DELETION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), + ]: + match = pattern_re.match(input_string) + + if match: + matched_pos = {} + match_dict = match.groupdict() + for k in match_dict: + v = match_dict[k] + if v: + v = int(v) if v != "?" else v + + matched_pos[k] = v + return GenomicDeletionAmbiguousToken( input_string=og_input_string, token=input_string, - pos0=int(pos0) if pos0 != "?" else pos0, - pos1=int(pos1) if pos1 != "?" else pos1, - pos2=int(pos2) if pos2 != "?" else pos2, - pos3=int(pos3) if pos3 != "?" else pos3, - ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + pos0=matched_pos["pos0"], + pos1=matched_pos.get("pos1"), + pos2=matched_pos["pos2"], + pos3=matched_pos.get("pos3"), + ambiguous_regex_type=regex_type, ) - else: - for pattern_re, regex_type in [ - (GENOMIC_DELETION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), - (GENOMIC_DELETION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), - ]: - match = pattern_re.match(input_string) - - if match: - matched_pos = dict() - match_dict = match.groupdict() - for k in match_dict: - v = match_dict[k] - if v: - v = int(v) if v != "?" else v - - matched_pos[k] = v - - return GenomicDeletionAmbiguousToken( - input_string=og_input_string, - token=input_string, - pos0=matched_pos["pos0"], - pos1=matched_pos.get("pos1"), - pos2=matched_pos["pos2"], - pos3=matched_pos.get("pos3"), - ambiguous_regex_type=regex_type, - ) + return None diff --git a/variation/tokenizers/genomic_delins.py b/variation/tokenizers/genomic_delins.py index 9c7603d4..888d3145 100644 --- a/variation/tokenizers/genomic_delins.py +++ b/variation/tokenizers/genomic_delins.py @@ -39,3 +39,5 @@ def match(self, input_string: str) -> Optional[GenomicDelInsToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, inserted_sequence=match_dict["inserted_sequence"], ) + + return None diff --git a/variation/tokenizers/genomic_duplication.py b/variation/tokenizers/genomic_duplication.py index f03246ae..a20e1f6d 100644 --- a/variation/tokenizers/genomic_duplication.py +++ b/variation/tokenizers/genomic_duplication.py @@ -46,53 +46,52 @@ def match(self, input_string: str) -> Optional[GenomicDuplicationToken]: pos0=int(match_dict["pos0"]), pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, ) + + # Going to try ambiguous genomic duplications + match = GENOMIC_DUPLICATION_AMBIGUOUS_1.match(input_string) + if match: + match_dict = match.groupdict() + pos0 = match_dict["pos0"] + pos1 = match_dict["pos1"] + pos2 = match_dict["pos2"] + pos3 = match_dict["pos3"] + + # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported + if not any(((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?"))): + return GenomicDuplicationAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=int(pos0) if pos0 != "?" else pos0, + pos1=int(pos1) if pos1 != "?" else pos1, + pos2=int(pos2) if pos2 != "?" else pos2, + pos3=int(pos3) if pos3 != "?" else pos3, + ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + ) + else: - # Going to try ambiguous genomic duplications - match = GENOMIC_DUPLICATION_AMBIGUOUS_1.match(input_string) - if match: - match_dict = match.groupdict() - pos0 = match_dict["pos0"] - pos1 = match_dict["pos1"] - pos2 = match_dict["pos2"] - pos3 = match_dict["pos3"] + for pattern_re, regex_type in [ + (GENOMIC_DUPLICATION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), + (GENOMIC_DUPLICATION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), + ]: + match = pattern_re.match(input_string) + + if match: + matched_pos = {} + match_dict = match.groupdict() + for k in match_dict: + v = match_dict[k] + if v: + v = int(v) if v != "?" else v + + matched_pos[k] = v - # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported - if not any( - ((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?")) - ): return GenomicDuplicationAmbiguousToken( input_string=og_input_string, token=input_string, - pos0=int(pos0) if pos0 != "?" else pos0, - pos1=int(pos1) if pos1 != "?" else pos1, - pos2=int(pos2) if pos2 != "?" else pos2, - pos3=int(pos3) if pos3 != "?" else pos3, - ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + pos0=matched_pos["pos0"], + pos1=matched_pos.get("pos1"), + pos2=matched_pos["pos2"], + pos3=matched_pos.get("pos3"), + ambiguous_regex_type=regex_type, ) - - else: - for pattern_re, regex_type in [ - (GENOMIC_DUPLICATION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), - (GENOMIC_DUPLICATION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), - ]: - match = pattern_re.match(input_string) - - if match: - matched_pos = dict() - match_dict = match.groupdict() - for k in match_dict: - v = match_dict[k] - if v: - v = int(v) if v != "?" else v - - matched_pos[k] = v - - return GenomicDuplicationAmbiguousToken( - input_string=og_input_string, - token=input_string, - pos0=matched_pos["pos0"], - pos1=matched_pos.get("pos1"), - pos2=matched_pos["pos2"], - pos3=matched_pos.get("pos3"), - ambiguous_regex_type=regex_type, - ) + return None diff --git a/variation/tokenizers/genomic_insertion.py b/variation/tokenizers/genomic_insertion.py index 43576fe7..3319fd50 100644 --- a/variation/tokenizers/genomic_insertion.py +++ b/variation/tokenizers/genomic_insertion.py @@ -42,3 +42,5 @@ def match(self, input_string: str) -> Optional[GenomicInsertionToken]: pos1=pos1, inserted_sequence=inserted_sequence, ) + + return None diff --git a/variation/tokenizers/genomic_substitution.py b/variation/tokenizers/genomic_substitution.py index 80242388..3cafed25 100644 --- a/variation/tokenizers/genomic_substitution.py +++ b/variation/tokenizers/genomic_substitution.py @@ -41,3 +41,5 @@ def match(self, input_string: str) -> Optional[GenomicSubstitutionToken]: ref=match_dict["ref"], alt=match_dict["alt"], ) + + return None diff --git a/variation/tokenizers/hgvs.py b/variation/tokenizers/hgvs.py index 507d5890..c98e33a9 100644 --- a/variation/tokenizers/hgvs.py +++ b/variation/tokenizers/hgvs.py @@ -12,7 +12,7 @@ class HGVS(Tokenizer): """The HGVS tokenizer class.""" splitter = re.compile( - r"^(?P(NC_|NM_|NP_|ENSP|ENST)[^:\s]+):(?P[cgnpr])\.(?P\S+)$" # noqa: E501 + r"^(?P(NC_|NM_|NP_|ENSP|ENST)[^:\s]+):(?P[cgnpr])\.(?P\S+)$" ) def match(self, input_string: str) -> Optional[HgvsToken]: @@ -32,5 +32,5 @@ def match(self, input_string: str) -> Optional[HgvsToken]: coordinate_type=AnnotationLayer(match_dict["coordinate"]), change=match_dict["change"], ) - else: - return None + + return None diff --git a/variation/tokenizers/protein_deletion.py b/variation/tokenizers/protein_deletion.py index 903611aa..eb459a8b 100644 --- a/variation/tokenizers/protein_deletion.py +++ b/variation/tokenizers/protein_deletion.py @@ -92,3 +92,4 @@ def match(self, input_string: str) -> Optional[ProteinDeletionToken]: pos1=pos1, deleted_sequence=one_letter_del_seq, ) + return None diff --git a/variation/tokenizers/protein_delins.py b/variation/tokenizers/protein_delins.py index f80af492..5bfe086e 100644 --- a/variation/tokenizers/protein_delins.py +++ b/variation/tokenizers/protein_delins.py @@ -85,3 +85,4 @@ def match(self, input_string: str) -> Optional[ProteinDelInsToken]: pos1=pos1, inserted_sequence=one_letter_ins_seq, ) + return None diff --git a/variation/tokenizers/protein_insertion.py b/variation/tokenizers/protein_insertion.py index 0f7c8861..6b482eeb 100644 --- a/variation/tokenizers/protein_insertion.py +++ b/variation/tokenizers/protein_insertion.py @@ -67,3 +67,5 @@ def match(self, input_string: str) -> Optional[ProteinInsertionToken]: pos1=pos1, inserted_sequence=one_letter_ins_seq, ) + + return None diff --git a/variation/tokenizers/protein_reference_agree.py b/variation/tokenizers/protein_reference_agree.py index a14acd8a..89b74e75 100644 --- a/variation/tokenizers/protein_reference_agree.py +++ b/variation/tokenizers/protein_reference_agree.py @@ -1,4 +1,5 @@ """A module for Reference Agree Tokenization.""" +import contextlib from typing import Optional from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -42,10 +43,8 @@ def match(self, input_string: str) -> Optional[ProteinReferenceAgreeToken]: aa1_to_aa3(ref) except KeyError: # maybe 3 letter AA code was used - try: + with contextlib.suppress(KeyError): aa1_ref = aa3_to_aa1(ref) - except KeyError: - pass else: aa1_ref = ref @@ -56,3 +55,5 @@ def match(self, input_string: str) -> Optional[ProteinReferenceAgreeToken]: pos=pos, ref=aa1_ref, ) + + return None diff --git a/variation/tokenizers/protein_substitution.py b/variation/tokenizers/protein_substitution.py index f66e28f3..dcc0fccd 100644 --- a/variation/tokenizers/protein_substitution.py +++ b/variation/tokenizers/protein_substitution.py @@ -74,5 +74,7 @@ def match( if aa1_alt == "*": return ProteinStopGainToken(**params) - else: - return ProteinSubstitutionToken(**params) + + return ProteinSubstitutionToken(**params) + + return None diff --git a/variation/tokenizers/tokenizer.py b/variation/tokenizers/tokenizer.py index 5f310913..3175d4d2 100644 --- a/variation/tokenizers/tokenizer.py +++ b/variation/tokenizers/tokenizer.py @@ -1,6 +1,6 @@ """Module for Tokenization.""" from abc import ABC, abstractmethod -from typing import Optional, Tuple +from typing import ClassVar, Dict, Optional, Tuple from cool_seq_tool.schemas import AnnotationLayer @@ -10,7 +10,9 @@ class Tokenizer(ABC): """The tokenizer class.""" - coord_types = {k: v.value for k, v in AnnotationLayer.__members__.items()} + coord_types: ClassVar[Dict[str, str]] = { + k: v.value for k, v in AnnotationLayer.__members__.items() + } @abstractmethod def match(self, input_string: str) -> Optional[Token]: diff --git a/variation/translators/__init__.py b/variation/translators/__init__.py index de52c9be..703976cc 100644 --- a/variation/translators/__init__.py +++ b/variation/translators/__init__.py @@ -19,3 +19,26 @@ from .protein_reference_agree import ProteinReferenceAgree from .protein_stop_gain import ProteinStopGain from .protein_substitution import ProteinSubstitution + +__all__ = [ + "Amplification", + "CdnaDeletion", + "CdnaDelIns", + "CdnaInsertion", + "CdnaReferenceAgree", + "CdnaSubstitution", + "GenomicDeletion", + "GenomicDeletionAmbiguous", + "GenomicDelIns", + "GenomicDuplication", + "GenomicDuplicationAmbiguous", + "GenomicInsertion", + "GenomicReferenceAgree", + "GenomicSubstitution", + "ProteinDeletion", + "ProteinDelIns", + "ProteinInsertion", + "ProteinReferenceAgree", + "ProteinStopGain", + "ProteinSubstitution", +] diff --git a/variation/translators/ambiguous_translator_base.py b/variation/translators/ambiguous_translator_base.py index a796b665..f287f5c2 100644 --- a/variation/translators/ambiguous_translator_base.py +++ b/variation/translators/ambiguous_translator_base.py @@ -188,29 +188,29 @@ async def translate( if w: warnings.append(w) return None + + # assembly is either 37 or 38 + if assembly == ClinVarAssembly.GRCH37: + grch38_data = await self.get_grch38_data_ambiguous( + classification, errors, validation_result.accession + ) + if errors: + warnings += errors + return None + + ac = grch38_data.ac + pos0 = grch38_data.pos0 + pos1 = grch38_data.pos1 + pos2 = grch38_data.pos2 + pos3 = grch38_data.pos3 else: - # assembly is either 37 or 38 - if assembly == ClinVarAssembly.GRCH37: - grch38_data = await self.get_grch38_data_ambiguous( - classification, errors, validation_result.accession - ) - if errors: - warnings += errors - return None - - ac = grch38_data.ac - pos0 = grch38_data.pos0 - pos1 = grch38_data.pos1 - pos2 = grch38_data.pos2 - pos3 = grch38_data.pos3 - else: - ac = validation_result.accession - pos0 = classification.pos0 - pos1 = classification.pos1 - pos2 = classification.pos2 - pos3 = classification.pos3 - - assembly = ClinVarAssembly.GRCH38 + ac = validation_result.accession + pos0 = classification.pos0 + pos1 = classification.pos1 + pos2 = classification.pos2 + pos3 = classification.pos3 + + assembly = ClinVarAssembly.GRCH38 else: ac = validation_result.accession pos0 = classification.pos0 @@ -306,5 +306,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/amplification.py b/variation/translators/amplification.py index ef2b6411..cbd4034a 100644 --- a/variation/translators/amplification.py +++ b/variation/translators/amplification.py @@ -65,5 +65,5 @@ async def translate( return TranslationResult( vrs_variation=vrs_cx, validation_result=validation_result ) - else: - return None + + return None diff --git a/variation/translators/cdna_deletion.py b/variation/translators/cdna_deletion.py index 9eb99199..96fdb5a9 100644 --- a/variation/translators/cdna_deletion.py +++ b/variation/translators/cdna_deletion.py @@ -53,7 +53,7 @@ async def translate( cds_start = validation_result.cds_start classification: CdnaDeletionClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -63,4 +63,3 @@ async def translate( warnings, cds_start=cds_start, ) - return translation_result diff --git a/variation/translators/cdna_delins.py b/variation/translators/cdna_delins.py index 815c30f5..1498b93c 100644 --- a/variation/translators/cdna_delins.py +++ b/variation/translators/cdna_delins.py @@ -53,7 +53,7 @@ async def translate( cds_start = validation_result.cds_start classification: CdnaDelInsClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -64,4 +64,3 @@ async def translate( cds_start=cds_start, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/cdna_insertion.py b/variation/translators/cdna_insertion.py index 8654aca7..f0d5d029 100644 --- a/variation/translators/cdna_insertion.py +++ b/variation/translators/cdna_insertion.py @@ -53,7 +53,7 @@ async def translate( cds_start = validation_result.cds_start classification: CdnaInsertionClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -64,4 +64,3 @@ async def translate( cds_start=cds_start, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/cdna_reference_agree.py b/variation/translators/cdna_reference_agree.py index d7392536..92fe1a09 100644 --- a/variation/translators/cdna_reference_agree.py +++ b/variation/translators/cdna_reference_agree.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -65,4 +65,3 @@ async def translate( warnings, cds_start=cds_start, ) - return translation_result diff --git a/variation/translators/cdna_substitution.py b/variation/translators/cdna_substitution.py index 38a928fd..755bdf0e 100644 --- a/variation/translators/cdna_substitution.py +++ b/variation/translators/cdna_substitution.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -67,4 +67,3 @@ async def translate( ref=classification.ref, alt=classification.alt, ) - return translation_result diff --git a/variation/translators/genomic_del_dup_base.py b/variation/translators/genomic_del_dup_base.py index 3103823e..80c879de 100644 --- a/variation/translators/genomic_del_dup_base.py +++ b/variation/translators/genomic_del_dup_base.py @@ -117,44 +117,46 @@ async def translate( if w: warnings.append(w) return None - else: - # assembly is either 37 or 38 - if assembly == ClinVarAssembly.GRCH37: - grch38_data = await self.get_grch38_data( - classification, errors, validation_result.accession - ) - if errors: - warnings += errors - return None - pos0 = grch38_data.pos0 - 1 - if grch38_data.pos1 is None: - pos1 = grch38_data.pos0 - else: - pos1 = grch38_data.pos1 - residue_mode = ResidueMode.INTER_RESIDUE - ac = grch38_data.ac - - if alt_type == AltType.DELETION: - if classification.nomenclature == Nomenclature.GNOMAD_VCF: - ref = classification.matching_tokens[0].ref - invalid_ref_msg = self.validate_reference_sequence( - ac, - pos0, - pos0 + (len(ref) - 1), - ref, - residue_mode=residue_mode, - ) - if invalid_ref_msg: - warnings.append(invalid_ref_msg) - return None + # assembly is either 37 or 38 + if assembly == ClinVarAssembly.GRCH37: + grch38_data = await self.get_grch38_data( + classification, errors, validation_result.accession + ) + if errors: + warnings += errors + return None + + pos0 = grch38_data.pos0 - 1 + if grch38_data.pos1 is None: + pos1 = grch38_data.pos0 else: - pos0 = classification.pos0 - pos1 = classification.pos1 - ac = validation_result.accession - grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) + pos1 = grch38_data.pos1 + residue_mode = ResidueMode.INTER_RESIDUE + ac = grch38_data.ac - assembly = ClinVarAssembly.GRCH38 + if ( + alt_type == AltType.DELETION + and classification.nomenclature == Nomenclature.GNOMAD_VCF + ): + ref = classification.matching_tokens[0].ref + invalid_ref_msg = self.validate_reference_sequence( + ac, + pos0, + pos0 + (len(ref) - 1), + ref, + residue_mode=residue_mode, + ) + if invalid_ref_msg: + warnings.append(invalid_ref_msg) + return None + else: + pos0 = classification.pos0 + pos1 = classification.pos1 + ac = validation_result.accession + grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) + + assembly = ClinVarAssembly.GRCH38 else: pos0 = classification.pos0 pos1 = classification.pos1 @@ -177,10 +179,7 @@ async def translate( ac = grch38_data.ac pos0 = grch38_data.pos0 - 1 - if grch38_data.pos1 is None: - pos1 = grch38_data.pos0 - else: - pos1 = grch38_data.pos1 + pos1 = grch38_data.pos0 if grch38_data.pos1 is None else grch38_data.pos1 residue_mode = ResidueMode.INTER_RESIDUE self.is_valid(classification.gene_token, ac, pos0, pos1, errors) @@ -211,11 +210,13 @@ async def translate( return None alt = None - if classification.nomenclature == Nomenclature.GNOMAD_VCF: - if alt_type == AltType.DELETION: - pos0 -= 1 - pos1 -= 1 - alt = classification.matching_tokens[0].alt + if ( + classification.nomenclature == Nomenclature.GNOMAD_VCF + and alt_type == AltType.DELETION + ): + pos0 -= 1 + pos1 -= 1 + alt = classification.matching_tokens[0].alt if alt_type == AltType.INSERTION: alt = classification.inserted_sequence @@ -268,5 +269,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_delins.py b/variation/translators/genomic_delins.py index a9934b78..9a6d6dcc 100644 --- a/variation/translators/genomic_delins.py +++ b/variation/translators/genomic_delins.py @@ -125,5 +125,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_insertion.py b/variation/translators/genomic_insertion.py index eec2b556..03d636bc 100644 --- a/variation/translators/genomic_insertion.py +++ b/variation/translators/genomic_insertion.py @@ -126,5 +126,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_reference_agree.py b/variation/translators/genomic_reference_agree.py index d4719993..f1b7d4a9 100644 --- a/variation/translators/genomic_reference_agree.py +++ b/variation/translators/genomic_reference_agree.py @@ -122,5 +122,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_substitution.py b/variation/translators/genomic_substitution.py index 789c6015..6b273337 100644 --- a/variation/translators/genomic_substitution.py +++ b/variation/translators/genomic_substitution.py @@ -146,5 +146,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/protein_deletion.py b/variation/translators/protein_deletion.py index 5369fd5b..70910d44 100644 --- a/variation/translators/protein_deletion.py +++ b/variation/translators/protein_deletion.py @@ -53,7 +53,7 @@ async def translate( # First will translate valid result to VRS Allele classification: ProteinDeletionClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -62,4 +62,3 @@ async def translate( AnnotationLayer.PROTEIN, warnings, ) - return translation_result diff --git a/variation/translators/protein_delins.py b/variation/translators/protein_delins.py index e72c8713..11f1d77f 100644 --- a/variation/translators/protein_delins.py +++ b/variation/translators/protein_delins.py @@ -53,7 +53,7 @@ async def translate( # First will translate valid result to VRS Allele classification: ProteinDelInsClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -63,4 +63,3 @@ async def translate( warnings, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/protein_insertion.py b/variation/translators/protein_insertion.py index 2bf9d0be..0dfb3983 100644 --- a/variation/translators/protein_insertion.py +++ b/variation/translators/protein_insertion.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -65,4 +65,3 @@ async def translate( warnings, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/protein_reference_agree.py b/variation/translators/protein_reference_agree.py index 2cfac144..05e9f3d8 100644 --- a/variation/translators/protein_reference_agree.py +++ b/variation/translators/protein_reference_agree.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -65,4 +65,3 @@ async def translate( warnings, ref=classification.ref, ) - return translation_result diff --git a/variation/translators/protein_stop_gain.py b/variation/translators/protein_stop_gain.py index 1d1fc7a1..88144412 100644 --- a/variation/translators/protein_stop_gain.py +++ b/variation/translators/protein_stop_gain.py @@ -53,7 +53,7 @@ async def translate( # First will translate valid result to VRS Allele classification: ProteinStopGainClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -63,4 +63,3 @@ async def translate( warnings, alt=classification.alt, ) - return translation_result diff --git a/variation/translators/protein_substitution.py b/variation/translators/protein_substitution.py index b58213b5..a1933090 100644 --- a/variation/translators/protein_substitution.py +++ b/variation/translators/protein_substitution.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -66,4 +66,3 @@ async def translate( ref=classification.ref, alt=classification.alt, ) - return translation_result diff --git a/variation/translators/translator.py b/variation/translators/translator.py index 2f01c011..9d8bb661 100644 --- a/variation/translators/translator.py +++ b/variation/translators/translator.py @@ -106,15 +106,14 @@ def is_valid( gene_end = None for ext in gene_token.gene.extensions: - if ext.name == "ensembl_locations": - if ext.value: - ensembl_loc = ext.value[0] - gene_start = ensembl_loc["start"] - gene_end = ensembl_loc["end"] - 1 + if ext.name == "ensembl_locations" and ext.value: + ensembl_loc = ext.value[0] + gene_start = ensembl_loc["start"] + gene_end = ensembl_loc["end"] - 1 if gene_start is None and gene_end is None: errors.append( - f"gene-normalizer unable to find Ensembl location for: {gene_token.token}" # noqa: E501 + f"gene-normalizer unable to find Ensembl location for: {gene_token.token}" ) for pos in [pos0, pos1, pos2, pos3]: @@ -124,7 +123,7 @@ def is_valid( if not (gene_start <= pos <= gene_end): errors.append( - f"Inter-residue position {pos} out of index on {alt_ac} on gene, {gene_token.token}" # noqa: E501 + f"Inter-residue position {pos} out of index on {alt_ac} on gene, {gene_token.token}" ) def validate_reference_sequence( @@ -245,5 +244,5 @@ async def get_p_or_cdna_translation_result( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/utils.py b/variation/utils.py index 0f088555..f76d44ab 100644 --- a/variation/utils.py +++ b/variation/utils.py @@ -1,4 +1,5 @@ """Module for general functionality throughout the app""" +import contextlib import re from typing import Dict, List, Literal, Optional, Tuple, Union @@ -101,10 +102,8 @@ def get_aa1_codes(aa: str) -> Optional[str]: _aa1_to_aa3(aa) except KeyError: # see if it's 3 AA - try: + with contextlib.suppress(KeyError): aa1 = _aa3_to_aa1(aa) - except KeyError: - pass else: aa1 = aa @@ -147,11 +146,16 @@ def get_ambiguous_type( (pos0 == "?", isinstance(pos1, int), isinstance(pos2, int), pos3 is None) ): ambiguous_type = AmbiguousType.AMBIGUOUS_5 - elif ambiguous_regex_type == AmbiguousRegexType.REGEX_3: - if all( - (isinstance(pos0, int), pos1 is None, isinstance(pos2, int), pos3 == "?") - ): - ambiguous_type = AmbiguousType.AMBIGUOUS_7 + elif all( + ( + ambiguous_regex_type == AmbiguousRegexType.REGEX_3, + isinstance(pos0, int), + pos1 is None, + isinstance(pos2, int), + pos3 == "?", + ) + ): + ambiguous_type = AmbiguousType.AMBIGUOUS_7 return ambiguous_type diff --git a/variation/validate.py b/variation/validate.py index 088469df..15781ea6 100644 --- a/variation/validate.py +++ b/variation/validate.py @@ -108,7 +108,7 @@ async def perform(self, classification: Classification) -> ValidationSummary: if not found_valid_result: warnings = [ - f"Unable to find valid result for classification: {invalid_classification}" # noqa: E501 + f"Unable to find valid result for classification: {invalid_classification}" ] else: warnings = [] diff --git a/variation/validators/__init__.py b/variation/validators/__init__.py index 3215eff8..33f8f2e1 100644 --- a/variation/validators/__init__.py +++ b/variation/validators/__init__.py @@ -20,3 +20,27 @@ from .protein_reference_agree import ProteinReferenceAgree from .protein_stop_gain import ProteinStopGain from .protein_substitution import ProteinSubstitution + +__all__ = [ + "Amplification", + "CdnaDeletion", + "CdnaDelIns", + "CdnaInsertion", + "CdnaReferenceAgree", + "CdnaSubstitution", + "GenomicBase", + "GenomicDeletion", + "GenomicDeletionAmbiguous", + "GenomicDelIns", + "GenomicDuplication", + "GenomicDuplicationAmbiguous", + "GenomicInsertion", + "GenomicReferenceAgree", + "GenomicSubstitution", + "ProteinDeletion", + "ProteinDelIns", + "ProteinInsertion", + "ProteinReferenceAgree", + "ProteinStopGain", + "ProteinSubstitution", +] diff --git a/variation/validators/genomic_base.py b/variation/validators/genomic_base.py index b0400d1c..eafad2df 100644 --- a/variation/validators/genomic_base.py +++ b/variation/validators/genomic_base.py @@ -58,7 +58,7 @@ def get_nc_accession(self, identifier: str) -> Optional[str]: identifier ) except KeyError: - logger.warning("Data Proxy unable to get metadata" f"for {identifier}") + logger.warning("Data Proxy unable to get metadata for %s", identifier) else: aliases = [a for a in translated_identifiers if a.startswith("refseq:NC_")] if aliases: diff --git a/variation/validators/genomic_deletion.py b/variation/validators/genomic_deletion.py index 7b2b3f87..cb3c9c4f 100644 --- a/variation/validators/genomic_deletion.py +++ b/variation/validators/genomic_deletion.py @@ -47,38 +47,40 @@ async def get_valid_invalid_results( if invalid_ac_pos: errors.append(invalid_ac_pos) else: - if classification.nomenclature in { - Nomenclature.FREE_TEXT, - Nomenclature.HGVS, - }: + if ( + classification.nomenclature + in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + } + and classification.deleted_sequence + ): # Validate deleted sequence # HGVS deleted sequence includes start and end - if classification.deleted_sequence: - invalid_del_seq_message = self.validate_reference_sequence( - alt_ac, - classification.pos0, - classification.pos1 - if classification.pos1 - else classification.pos0, - classification.deleted_sequence, - ) - - if invalid_del_seq_message: - errors.append(invalid_del_seq_message) - - if not errors: - if classification.nomenclature == Nomenclature.GNOMAD_VCF: - # Validate reference sequence - ref = classification.matching_tokens[0].ref - validate_ref_msg = self.validate_reference_sequence( + invalid_del_seq_message = self.validate_reference_sequence( alt_ac, - classification.pos0 - 1, - end_pos=classification.pos0 + (len(ref) - 1), - expected_ref=ref, + classification.pos0, + classification.pos1 + if classification.pos1 + else classification.pos0, + classification.deleted_sequence, ) - if validate_ref_msg: - errors.append(validate_ref_msg) + if invalid_del_seq_message: + errors.append(invalid_del_seq_message) + + if not errors and classification.nomenclature == Nomenclature.GNOMAD_VCF: + # Validate reference sequence + ref = classification.matching_tokens[0].ref + validate_ref_msg = self.validate_reference_sequence( + alt_ac, + classification.pos0 - 1, + end_pos=classification.pos0 + (len(ref) - 1), + expected_ref=ref, + ) + + if validate_ref_msg: + errors.append(validate_ref_msg) if not errors and classification.gene_token: # Validate positions exist within gene range diff --git a/variation/validators/genomic_deletion_ambiguous.py b/variation/validators/genomic_deletion_ambiguous.py index b0ddb8a0..ab999e53 100644 --- a/variation/validators/genomic_deletion_ambiguous.py +++ b/variation/validators/genomic_deletion_ambiguous.py @@ -48,10 +48,10 @@ async def get_valid_invalid_results( if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: start_pos = classification.pos0 end_pos = classification.pos3 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2: - start_pos = classification.pos1 - end_pos = classification.pos2 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5: + elif ( + classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2 + or classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5 + ): start_pos = classification.pos1 end_pos = classification.pos2 elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: diff --git a/variation/validators/genomic_duplication_ambiguous.py b/variation/validators/genomic_duplication_ambiguous.py index ea522393..9479d744 100644 --- a/variation/validators/genomic_duplication_ambiguous.py +++ b/variation/validators/genomic_duplication_ambiguous.py @@ -48,10 +48,10 @@ async def get_valid_invalid_results( if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: start_pos = classification.pos0 end_pos = classification.pos3 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2: - start_pos = classification.pos1 - end_pos = classification.pos2 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5: + elif ( + classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2 + or classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5 + ): start_pos = classification.pos1 end_pos = classification.pos2 elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: diff --git a/variation/validators/protein_deletion.py b/variation/validators/protein_deletion.py index 5756fb58..c8fc9d0d 100644 --- a/variation/validators/protein_deletion.py +++ b/variation/validators/protein_deletion.py @@ -74,22 +74,25 @@ async def get_valid_invalid_results( errors.append(invalid_aa1_seq_msg) # Validate that deleted sequence matches expected - if classification.nomenclature in { - Nomenclature.FREE_TEXT, - Nomenclature.HGVS, - }: + if ( + classification.nomenclature + in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + } + and classification.deleted_sequence + and classification.pos1 is not None + ): # HGVS deleted sequence includes start and end - if classification.deleted_sequence: - if classification.pos1 is not None: - invalid_del_seq_msg = self.validate_reference_sequence( - p_ac, - classification.pos0, - classification.pos1, - classification.deleted_sequence, - ) - - if invalid_del_seq_msg: - errors.append(invalid_del_seq_msg) + invalid_del_seq_msg = self.validate_reference_sequence( + p_ac, + classification.pos0, + classification.pos1, + classification.deleted_sequence, + ) + + if invalid_del_seq_msg: + errors.append(invalid_del_seq_msg) validation_results.append( ValidationResult( diff --git a/variation/validators/validator.py b/variation/validators/validator.py index 6d51a8f6..f7fb8b0d 100644 --- a/variation/validators/validator.py +++ b/variation/validators/validator.py @@ -112,10 +112,7 @@ async def validate(self, classification: Classification) -> List[ValidationResul errors=errors, ) ] - validation_results = await self.get_valid_invalid_results( - classification, accessions - ) - return validation_results + return await self.get_valid_invalid_results(classification, accessions) def get_protein_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: """Get accessions for variations with protein reference sequence. @@ -194,31 +191,33 @@ async def _validate_gene_pos( if gene_start_end["start"] is None and gene_start_end["end"] is None: return f"gene-normalizer unable to find Ensembl location for gene: {gene}" - else: - assembly = await self.uta.get_chr_assembly(alt_ac) - if assembly: - # Not in GRCh38 assembly. Gene normalizer only uses 38, so we - # need to liftover to GRCh37 coords - chromosome, assembly = assembly - for key in gene_start_end.keys(): - gene_pos = gene_start_end[key] - gene_pos_liftover = self.uta.liftover_38_to_37.convert_coordinate( - chromosome, gene_pos - ) - if gene_pos_liftover is None or len(gene_pos_liftover) == 0: - return f"{gene_pos} does not exist on {chromosome}" - else: - gene_start_end[key] = gene_pos_liftover[0][1] - - gene_start = gene_start_end["start"] - gene_end = gene_start_end["end"] - - for pos in [pos0, pos1, pos2, pos3]: - if pos not in ["?", None]: - if residue_mode == "residue": - pos -= 1 - if not (gene_start <= pos <= gene_end): - return f"Position {pos} out of index on {alt_ac} on gene, {gene}" # noqa: E501 + + assembly = await self.uta.get_chr_assembly(alt_ac) + if assembly: + # Not in GRCh38 assembly. Gene normalizer only uses 38, so we + # need to liftover to GRCh37 coords + chromosome, assembly = assembly + for key in gene_start_end: + gene_pos = gene_start_end[key] + gene_pos_liftover = self.uta.liftover_38_to_37.convert_coordinate( + chromosome, gene_pos + ) + if gene_pos_liftover is None or len(gene_pos_liftover) == 0: + return f"{gene_pos} does not exist on {chromosome}" + + gene_start_end[key] = gene_pos_liftover[0][1] + + gene_start = gene_start_end["start"] + gene_end = gene_start_end["end"] + + for pos in [pos0, pos1, pos2, pos3]: + if pos not in ["?", None]: + if residue_mode == "residue": + pos -= 1 + if not (gene_start <= pos <= gene_end): + return f"Position {pos} out of index on {alt_ac} on gene, {gene}" + + return None def validate_reference_sequence( self, @@ -302,7 +301,7 @@ def validate_ac_and_pos( else: if end_pos: if not ref_len or (end_pos - start_pos != ref_len): - msg = f"Positions ({start_pos}, {end_pos}) not valid on accession ({ac})" # noqa: E501 + msg = f"Positions ({start_pos}, {end_pos}) not valid on accession ({ac})" else: if not ref_len: msg = f"Position ({start_pos}) not valid on accession ({ac})" @@ -338,8 +337,8 @@ def validate_5_prime_to_3_prime( "should be listed from 5' to 3'" ) break - else: - prev_pos = pos + + prev_pos = pos return invalid_msg def validate_ambiguous_classification( @@ -413,15 +412,12 @@ def validate_protein_hgvs_classification( else: errors.append(f"`aa0` not valid amino acid(s): {classification.aa0}") - if hasattr(classification, "aa1"): - if classification.aa1: - aa1_codes = get_aa1_codes(classification.aa1) - if aa1_codes: - classification.aa1 = aa1_codes - else: - errors.append( - f"`aa1` not valid amino acid(s): {classification.aa1}" - ) + if hasattr(classification, "aa1") and classification.aa1: + aa1_codes = get_aa1_codes(classification.aa1) + if aa1_codes: + classification.aa1 = aa1_codes + else: + errors.append(f"`aa1` not valid amino acid(s): {classification.aa1}") if hasattr(classification, "inserted_sequence"): ins_codes = get_aa1_codes(classification.inserted_sequence) diff --git a/variation/vrs_representation.py b/variation/vrs_representation.py index bde025f9..1d12c4b3 100644 --- a/variation/vrs_representation.py +++ b/variation/vrs_representation.py @@ -46,10 +46,9 @@ def get_start_end( errors.append("Start/End must be valid ints") return None - if coordinate == "c": - if cds_start: - start += cds_start - end += cds_start + if coordinate == "c" and cds_start: + start += cds_start + end += cds_start return start, end @staticmethod From 716c3d94435bd2957da67f792d328f6616468778 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 7 Feb 2024 18:16:34 -0500 Subject: [PATCH 4/8] resolve noqa --- tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a32eb880..ca4c4e1d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -558,11 +558,11 @@ def assertion_checks(normalize_response, test_variation): def cnv_assertion_checks(resp, test_fixture): """Check that actual response for to copy number matches expected""" try: - resp.copy_number_count # noqa: B018 + cnc = resp.copy_number_count except AttributeError: actual = resp.copy_number_change.model_dump(exclude_none=True) else: - actual = resp.copy_number_count.model_dump(exclude_none=True) + actual = cnc.model_dump(exclude_none=True) expected = test_fixture.model_dump(exclude_none=True) assert actual == expected assert resp.warnings == [] From 22c16c4b2c805226f726bc1a9114fdc1c359f80d Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 7 Feb 2024 18:41:20 -0500 Subject: [PATCH 5/8] build: modernize build configs * Use the "src" layout instead of a "flat" layout * Move build settings into pyproject.toml --- .gitignore | 5 +- pyproject.toml | 79 +++++++++++++++++-- setup.cfg | 60 -------------- setup.py | 5 -- {variation => src/variation}/__init__.py | 0 .../variation}/classifiers/__init__.py | 0 .../classifiers/amplification_classifier.py | 0 .../classifiers/cdna_deletion_classifier.py | 0 .../classifiers/cdna_delins_classifier.py | 0 .../classifiers/cdna_insertion_classifier.py | 0 .../cdna_reference_agree_classifier.py | 0 .../cdna_substitution_classifier.py | 0 .../variation}/classifiers/classifier.py | 0 .../classifiers/genomic_deletion_ambiguous.py | 0 .../genomic_deletion_classifier.py | 0 .../classifiers/genomic_delins_classifier.py | 0 .../genomic_duplication_ambiguous.py | 0 .../genomic_duplication_classifier.py | 0 .../genomic_insertion_classifier.py | 0 .../genomic_reference_agree_classifier.py | 0 .../genomic_substitution_classifier.py | 0 .../classifiers/gnomad_vcf_classifier.py | 0 .../variation}/classifiers/hgvs_classifier.py | 0 .../protein_deletion_classifier.py | 0 .../classifiers/protein_delins_classifier.py | 0 .../protein_insertion_classifier.py | 0 .../classifiers/protein_reference_agree.py | 0 .../protein_stop_gain_classifier.py | 0 .../protein_substitution_classifier.py | 0 {variation => src/variation}/classify.py | 0 .../gnomad_vcf_to_protein_variation.py | 0 .../variation}/hgvs_dup_del_mode.py | 0 {variation => src/variation}/main.py | 0 {variation => src/variation}/normalize.py | 0 {variation => src/variation}/query.py | 0 {variation => src/variation}/regex.py | 0 .../variation}/schemas/__init__.py | 0 .../variation}/schemas/app_schemas.py | 0 .../schemas/classification_response_schema.py | 0 .../variation}/schemas/copy_number_schema.py | 0 .../schemas/gnomad_vcf_to_protein_schema.py | 0 .../schemas/hgvs_to_copy_number_schema.py | 0 .../schemas/normalize_response_schema.py | 0 .../variation}/schemas/service_schema.py | 0 .../schemas/to_vrs_response_schema.py | 0 .../schemas/token_response_schema.py | 0 .../schemas/translation_response_schema.py | 0 .../schemas/validation_response_schema.py | 0 .../variation}/schemas/variation_schema.py | 0 .../schemas/vrs_python_translator_schema.py | 0 .../variation}/to_copy_number_variation.py | 0 {variation => src/variation}/to_vrs.py | 0 {variation => src/variation}/tokenize.py | 0 .../variation}/tokenizers/__init__.py | 0 .../cdna_and_genomic_reference_agree.py | 0 .../variation}/tokenizers/cdna_deletion.py | 0 .../variation}/tokenizers/cdna_delins.py | 0 .../variation}/tokenizers/cdna_insertion.py | 0 .../tokenizers/cdna_substitution.py | 0 .../tokenizers/free_text_categorical.py | 0 .../variation}/tokenizers/gene_symbol.py | 0 .../variation}/tokenizers/genomic_deletion.py | 0 .../variation}/tokenizers/genomic_delins.py | 0 .../tokenizers/genomic_duplication.py | 0 .../tokenizers/genomic_insertion.py | 0 .../tokenizers/genomic_substitution.py | 0 .../variation}/tokenizers/gnomad_vcf.py | 0 .../variation}/tokenizers/hgvs.py | 0 .../variation}/tokenizers/protein_deletion.py | 0 .../variation}/tokenizers/protein_delins.py | 0 .../tokenizers/protein_insertion.py | 0 .../tokenizers/protein_reference_agree.py | 0 .../tokenizers/protein_substitution.py | 0 .../variation}/tokenizers/tokenizer.py | 0 {variation => src/variation}/translate.py | 0 .../variation}/translators/__init__.py | 0 .../translators/ambiguous_translator_base.py | 0 .../variation}/translators/amplification.py | 0 .../variation}/translators/cdna_deletion.py | 0 .../variation}/translators/cdna_delins.py | 0 .../variation}/translators/cdna_insertion.py | 0 .../translators/cdna_reference_agree.py | 0 .../translators/cdna_substitution.py | 0 .../translators/genomic_del_dup_base.py | 0 .../translators/genomic_deletion.py | 0 .../translators/genomic_deletion_ambiguous.py | 0 .../variation}/translators/genomic_delins.py | 0 .../translators/genomic_duplication.py | 0 .../genomic_duplication_ambiguous.py | 0 .../translators/genomic_insertion.py | 0 .../translators/genomic_reference_agree.py | 0 .../translators/genomic_substitution.py | 0 .../translators/protein_deletion.py | 0 .../variation}/translators/protein_delins.py | 0 .../translators/protein_insertion.py | 0 .../translators/protein_reference_agree.py | 0 .../translators/protein_stop_gain.py | 0 .../translators/protein_substitution.py | 0 .../variation}/translators/translator.py | 0 {variation => src/variation}/utils.py | 0 {variation => src/variation}/validate.py | 0 .../variation}/validators/__init__.py | 0 .../variation}/validators/amplification.py | 0 .../variation}/validators/cdna_deletion.py | 0 .../variation}/validators/cdna_delins.py | 0 .../variation}/validators/cdna_insertion.py | 0 .../validators/cdna_reference_agree.py | 0 .../validators/cdna_substitution.py | 0 .../variation}/validators/genomic_base.py | 0 .../variation}/validators/genomic_deletion.py | 0 .../validators/genomic_deletion_ambiguous.py | 0 .../variation}/validators/genomic_delins.py | 0 .../validators/genomic_duplication.py | 0 .../genomic_duplication_ambiguous.py | 0 .../validators/genomic_insertion.py | 0 .../validators/genomic_reference_agree.py | 0 .../validators/genomic_substitution.py | 0 .../variation}/validators/protein_deletion.py | 0 .../variation}/validators/protein_delins.py | 0 .../validators/protein_insertion.py | 0 .../validators/protein_reference_agree.py | 0 .../validators/protein_stop_gain.py | 0 .../validators/protein_substitution.py | 0 .../variation}/validators/validator.py | 0 {variation => src/variation}/version.py | 0 .../variation}/vrs_representation.py | 0 126 files changed, 73 insertions(+), 76 deletions(-) delete mode 100644 setup.cfg delete mode 100644 setup.py rename {variation => src/variation}/__init__.py (100%) rename {variation => src/variation}/classifiers/__init__.py (100%) rename {variation => src/variation}/classifiers/amplification_classifier.py (100%) rename {variation => src/variation}/classifiers/cdna_deletion_classifier.py (100%) rename {variation => src/variation}/classifiers/cdna_delins_classifier.py (100%) rename {variation => src/variation}/classifiers/cdna_insertion_classifier.py (100%) rename {variation => src/variation}/classifiers/cdna_reference_agree_classifier.py (100%) rename {variation => src/variation}/classifiers/cdna_substitution_classifier.py (100%) rename {variation => src/variation}/classifiers/classifier.py (100%) rename {variation => src/variation}/classifiers/genomic_deletion_ambiguous.py (100%) rename {variation => src/variation}/classifiers/genomic_deletion_classifier.py (100%) rename {variation => src/variation}/classifiers/genomic_delins_classifier.py (100%) rename {variation => src/variation}/classifiers/genomic_duplication_ambiguous.py (100%) rename {variation => src/variation}/classifiers/genomic_duplication_classifier.py (100%) rename {variation => src/variation}/classifiers/genomic_insertion_classifier.py (100%) rename {variation => src/variation}/classifiers/genomic_reference_agree_classifier.py (100%) rename {variation => src/variation}/classifiers/genomic_substitution_classifier.py (100%) rename {variation => src/variation}/classifiers/gnomad_vcf_classifier.py (100%) rename {variation => src/variation}/classifiers/hgvs_classifier.py (100%) rename {variation => src/variation}/classifiers/protein_deletion_classifier.py (100%) rename {variation => src/variation}/classifiers/protein_delins_classifier.py (100%) rename {variation => src/variation}/classifiers/protein_insertion_classifier.py (100%) rename {variation => src/variation}/classifiers/protein_reference_agree.py (100%) rename {variation => src/variation}/classifiers/protein_stop_gain_classifier.py (100%) rename {variation => src/variation}/classifiers/protein_substitution_classifier.py (100%) rename {variation => src/variation}/classify.py (100%) rename {variation => src/variation}/gnomad_vcf_to_protein_variation.py (100%) rename {variation => src/variation}/hgvs_dup_del_mode.py (100%) rename {variation => src/variation}/main.py (100%) rename {variation => src/variation}/normalize.py (100%) rename {variation => src/variation}/query.py (100%) rename {variation => src/variation}/regex.py (100%) rename {variation => src/variation}/schemas/__init__.py (100%) rename {variation => src/variation}/schemas/app_schemas.py (100%) rename {variation => src/variation}/schemas/classification_response_schema.py (100%) rename {variation => src/variation}/schemas/copy_number_schema.py (100%) rename {variation => src/variation}/schemas/gnomad_vcf_to_protein_schema.py (100%) rename {variation => src/variation}/schemas/hgvs_to_copy_number_schema.py (100%) rename {variation => src/variation}/schemas/normalize_response_schema.py (100%) rename {variation => src/variation}/schemas/service_schema.py (100%) rename {variation => src/variation}/schemas/to_vrs_response_schema.py (100%) rename {variation => src/variation}/schemas/token_response_schema.py (100%) rename {variation => src/variation}/schemas/translation_response_schema.py (100%) rename {variation => src/variation}/schemas/validation_response_schema.py (100%) rename {variation => src/variation}/schemas/variation_schema.py (100%) rename {variation => src/variation}/schemas/vrs_python_translator_schema.py (100%) rename {variation => src/variation}/to_copy_number_variation.py (100%) rename {variation => src/variation}/to_vrs.py (100%) rename {variation => src/variation}/tokenize.py (100%) rename {variation => src/variation}/tokenizers/__init__.py (100%) rename {variation => src/variation}/tokenizers/cdna_and_genomic_reference_agree.py (100%) rename {variation => src/variation}/tokenizers/cdna_deletion.py (100%) rename {variation => src/variation}/tokenizers/cdna_delins.py (100%) rename {variation => src/variation}/tokenizers/cdna_insertion.py (100%) rename {variation => src/variation}/tokenizers/cdna_substitution.py (100%) rename {variation => src/variation}/tokenizers/free_text_categorical.py (100%) rename {variation => src/variation}/tokenizers/gene_symbol.py (100%) rename {variation => src/variation}/tokenizers/genomic_deletion.py (100%) rename {variation => src/variation}/tokenizers/genomic_delins.py (100%) rename {variation => src/variation}/tokenizers/genomic_duplication.py (100%) rename {variation => src/variation}/tokenizers/genomic_insertion.py (100%) rename {variation => src/variation}/tokenizers/genomic_substitution.py (100%) rename {variation => src/variation}/tokenizers/gnomad_vcf.py (100%) rename {variation => src/variation}/tokenizers/hgvs.py (100%) rename {variation => src/variation}/tokenizers/protein_deletion.py (100%) rename {variation => src/variation}/tokenizers/protein_delins.py (100%) rename {variation => src/variation}/tokenizers/protein_insertion.py (100%) rename {variation => src/variation}/tokenizers/protein_reference_agree.py (100%) rename {variation => src/variation}/tokenizers/protein_substitution.py (100%) rename {variation => src/variation}/tokenizers/tokenizer.py (100%) rename {variation => src/variation}/translate.py (100%) rename {variation => src/variation}/translators/__init__.py (100%) rename {variation => src/variation}/translators/ambiguous_translator_base.py (100%) rename {variation => src/variation}/translators/amplification.py (100%) rename {variation => src/variation}/translators/cdna_deletion.py (100%) rename {variation => src/variation}/translators/cdna_delins.py (100%) rename {variation => src/variation}/translators/cdna_insertion.py (100%) rename {variation => src/variation}/translators/cdna_reference_agree.py (100%) rename {variation => src/variation}/translators/cdna_substitution.py (100%) rename {variation => src/variation}/translators/genomic_del_dup_base.py (100%) rename {variation => src/variation}/translators/genomic_deletion.py (100%) rename {variation => src/variation}/translators/genomic_deletion_ambiguous.py (100%) rename {variation => src/variation}/translators/genomic_delins.py (100%) rename {variation => src/variation}/translators/genomic_duplication.py (100%) rename {variation => src/variation}/translators/genomic_duplication_ambiguous.py (100%) rename {variation => src/variation}/translators/genomic_insertion.py (100%) rename {variation => src/variation}/translators/genomic_reference_agree.py (100%) rename {variation => src/variation}/translators/genomic_substitution.py (100%) rename {variation => src/variation}/translators/protein_deletion.py (100%) rename {variation => src/variation}/translators/protein_delins.py (100%) rename {variation => src/variation}/translators/protein_insertion.py (100%) rename {variation => src/variation}/translators/protein_reference_agree.py (100%) rename {variation => src/variation}/translators/protein_stop_gain.py (100%) rename {variation => src/variation}/translators/protein_substitution.py (100%) rename {variation => src/variation}/translators/translator.py (100%) rename {variation => src/variation}/utils.py (100%) rename {variation => src/variation}/validate.py (100%) rename {variation => src/variation}/validators/__init__.py (100%) rename {variation => src/variation}/validators/amplification.py (100%) rename {variation => src/variation}/validators/cdna_deletion.py (100%) rename {variation => src/variation}/validators/cdna_delins.py (100%) rename {variation => src/variation}/validators/cdna_insertion.py (100%) rename {variation => src/variation}/validators/cdna_reference_agree.py (100%) rename {variation => src/variation}/validators/cdna_substitution.py (100%) rename {variation => src/variation}/validators/genomic_base.py (100%) rename {variation => src/variation}/validators/genomic_deletion.py (100%) rename {variation => src/variation}/validators/genomic_deletion_ambiguous.py (100%) rename {variation => src/variation}/validators/genomic_delins.py (100%) rename {variation => src/variation}/validators/genomic_duplication.py (100%) rename {variation => src/variation}/validators/genomic_duplication_ambiguous.py (100%) rename {variation => src/variation}/validators/genomic_insertion.py (100%) rename {variation => src/variation}/validators/genomic_reference_agree.py (100%) rename {variation => src/variation}/validators/genomic_substitution.py (100%) rename {variation => src/variation}/validators/protein_deletion.py (100%) rename {variation => src/variation}/validators/protein_delins.py (100%) rename {variation => src/variation}/validators/protein_insertion.py (100%) rename {variation => src/variation}/validators/protein_reference_agree.py (100%) rename {variation => src/variation}/validators/protein_stop_gain.py (100%) rename {variation => src/variation}/validators/protein_substitution.py (100%) rename {variation => src/variation}/validators/validator.py (100%) rename {variation => src/variation}/version.py (100%) rename {variation => src/variation}/vrs_representation.py (100%) diff --git a/.gitignore b/.gitignore index 7695a62a..12c84e4f 100644 --- a/.gitignore +++ b/.gitignore @@ -16,10 +16,7 @@ dist/ .mypy_cache/ -variation/data/seqrepo/ -variation/data/uta_20180821 -variation/data/notebooks -variation/data/*.txt +src/variation/data Pipfile.lock diff --git a/pyproject.toml b/pyproject.toml index 7e578334..39ff0c0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,77 @@ +[project] +name = "variation-normalizer" +authors = [ + {name = "Alex Wagner"}, + {name = "Kori Kuzma"}, + {name = "James Stevenson"} +] +readme = "README.md" +classifiers = [ + "Development Status :: 3 - Alpha", + "Framework :: FastAPI", + "Framework :: Pydantic", + "Framework :: Pydantic :: 2", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", +] +requires-python = ">=3.8" +description = "VICC normalization routine for variations" +license = {file = "LICENSE"} +dependencies = [ + "biocommons.seqrepo", + "fastapi", + "uvicorn", + "pydantic ==2.*", + "ga4gh.vrs[extras] ~= 2.0.0a2", + "gene-normalizer ~=0.3.0.dev1", + "boto3", + "cool-seq-tool ~=0.4.0.dev1", + "bioutils" +] +dynamic = ["version"] + +[project.optional-dependencies] +test = ["pytest>=6.0", "pytest-cov", "pytest-asyncio"] +dev = ["pre-commit", "ruff==0.2.0", "psycopg2-binary", "jupyter", "ipykernel"] + +[project.urls] +Homepage = "https://github.com/cancervariants/variation-normalization" +Documentation = "https://github.com/cancervariants/variation-normalization" +Changelog = "https://github.com/cancervariants/variation-normalization/releases" +Source = "https://github.com/cancervariants/variation-normalization" +"Bug Tracker" = "https://github.com/cancervariants/variation-normalization/issues" + [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta:__legacy__" +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.dynamic] +version = {attr = "variation.version.__version__"} + +# Scanning for namespace packages in the ``src`` directory is true by +# default in pyproject.toml, so you do NOT need to include the +# `tool.setuptools.packages.find` if it looks like the following: +# [tool.setuptools.packages.find] +# namespaces = true +# where = ["src"] + +[tool.pytest.ini_options] +addopts = "--cov=src --cov-report term-missing" +testpaths = ["tests"] + +[tool.coverage.run] +branch = true [tool.ruff] +src = ["src"] + lint.select = [ "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w @@ -72,10 +141,6 @@ lint.ignore = [ "S321", ] -exclude = [ - "setup.py" -] - [tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type @@ -86,7 +151,7 @@ exclude = [ # B011 - assert-false # RUF001 - ambiguous-unicode-character-string "tests/*" = ["ANN001", "ANN102", "ANN2", "S101", "B011"] -"variation/schemas/*" = ["ANN001", "ANN201", "N805", "S101"] +"src/variation/schemas/*" = ["ANN001", "ANN201", "N805", "S101"] "codebuild/*" = ["T201"] [tool.ruff.lint.flake8-bugbear] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 7f37081a..00000000 --- a/setup.cfg +++ /dev/null @@ -1,60 +0,0 @@ -[metadata] -name = variation-normalizer -author = VICC -author_email = help@cancervariants.org -description = VICC normalization routine for variations -long_description = file:README.md -long_description_content_type = text/markdown -home-page = https://github.com/cancervariants/variation-normalization -license_files = LICENSE -license = MIT -project_urls = - Changelog = https://github.com/cancervariants/variation-normalization/releases - Source = https://github.com/cancervariants/variation-normalization - Tracker = https://github.com/cancervariants/variation-normalization/issues - -classifiers = - Development Status :: 3 - Alpha - Intended Audience :: Science/Research - Intended Audience :: Developers - Topic :: Scientific/Engineering :: Bio-Informatics - License :: OSI Approved :: MIT License - Programming Language :: Python :: 3 - Programming Language :: Python :: 3.7 - -[options] -packages = find: -python_requires = >=3.7 -zip_safe = False -include_package_data = True - -install_requires = - biocommons.seqrepo - fastapi - uvicorn - pydantic ==2.* - ga4gh.vrs[extras] ~= 2.0.0a2 - gene-normalizer ~=0.3.0.dev1 - boto3 - cool-seq-tool ~=0.4.0.dev1 - bioutils - -tests_require = - pytest - pytest-cov - pyyaml - pytest-asyncio - -[options.extras_require] -dev = - pytest - pytest-asyncio - pytest-cov - ruff == 0.2.0 - pre-commit - jupyter - ipykernel - psycopg2-binary - -[tool:pytest] -addopts = --ignore setup.py --ignore=codebuild/ --doctest-modules --cov-report term-missing --cov . diff --git a/setup.py b/setup.py deleted file mode 100644 index 7255c518..00000000 --- a/setup.py +++ /dev/null @@ -1,5 +0,0 @@ -"""Module for package and distribution.""" -import setuptools - -exec(open("variation/version.py").read()) -setuptools.setup(version=__version__) diff --git a/variation/__init__.py b/src/variation/__init__.py similarity index 100% rename from variation/__init__.py rename to src/variation/__init__.py diff --git a/variation/classifiers/__init__.py b/src/variation/classifiers/__init__.py similarity index 100% rename from variation/classifiers/__init__.py rename to src/variation/classifiers/__init__.py diff --git a/variation/classifiers/amplification_classifier.py b/src/variation/classifiers/amplification_classifier.py similarity index 100% rename from variation/classifiers/amplification_classifier.py rename to src/variation/classifiers/amplification_classifier.py diff --git a/variation/classifiers/cdna_deletion_classifier.py b/src/variation/classifiers/cdna_deletion_classifier.py similarity index 100% rename from variation/classifiers/cdna_deletion_classifier.py rename to src/variation/classifiers/cdna_deletion_classifier.py diff --git a/variation/classifiers/cdna_delins_classifier.py b/src/variation/classifiers/cdna_delins_classifier.py similarity index 100% rename from variation/classifiers/cdna_delins_classifier.py rename to src/variation/classifiers/cdna_delins_classifier.py diff --git a/variation/classifiers/cdna_insertion_classifier.py b/src/variation/classifiers/cdna_insertion_classifier.py similarity index 100% rename from variation/classifiers/cdna_insertion_classifier.py rename to src/variation/classifiers/cdna_insertion_classifier.py diff --git a/variation/classifiers/cdna_reference_agree_classifier.py b/src/variation/classifiers/cdna_reference_agree_classifier.py similarity index 100% rename from variation/classifiers/cdna_reference_agree_classifier.py rename to src/variation/classifiers/cdna_reference_agree_classifier.py diff --git a/variation/classifiers/cdna_substitution_classifier.py b/src/variation/classifiers/cdna_substitution_classifier.py similarity index 100% rename from variation/classifiers/cdna_substitution_classifier.py rename to src/variation/classifiers/cdna_substitution_classifier.py diff --git a/variation/classifiers/classifier.py b/src/variation/classifiers/classifier.py similarity index 100% rename from variation/classifiers/classifier.py rename to src/variation/classifiers/classifier.py diff --git a/variation/classifiers/genomic_deletion_ambiguous.py b/src/variation/classifiers/genomic_deletion_ambiguous.py similarity index 100% rename from variation/classifiers/genomic_deletion_ambiguous.py rename to src/variation/classifiers/genomic_deletion_ambiguous.py diff --git a/variation/classifiers/genomic_deletion_classifier.py b/src/variation/classifiers/genomic_deletion_classifier.py similarity index 100% rename from variation/classifiers/genomic_deletion_classifier.py rename to src/variation/classifiers/genomic_deletion_classifier.py diff --git a/variation/classifiers/genomic_delins_classifier.py b/src/variation/classifiers/genomic_delins_classifier.py similarity index 100% rename from variation/classifiers/genomic_delins_classifier.py rename to src/variation/classifiers/genomic_delins_classifier.py diff --git a/variation/classifiers/genomic_duplication_ambiguous.py b/src/variation/classifiers/genomic_duplication_ambiguous.py similarity index 100% rename from variation/classifiers/genomic_duplication_ambiguous.py rename to src/variation/classifiers/genomic_duplication_ambiguous.py diff --git a/variation/classifiers/genomic_duplication_classifier.py b/src/variation/classifiers/genomic_duplication_classifier.py similarity index 100% rename from variation/classifiers/genomic_duplication_classifier.py rename to src/variation/classifiers/genomic_duplication_classifier.py diff --git a/variation/classifiers/genomic_insertion_classifier.py b/src/variation/classifiers/genomic_insertion_classifier.py similarity index 100% rename from variation/classifiers/genomic_insertion_classifier.py rename to src/variation/classifiers/genomic_insertion_classifier.py diff --git a/variation/classifiers/genomic_reference_agree_classifier.py b/src/variation/classifiers/genomic_reference_agree_classifier.py similarity index 100% rename from variation/classifiers/genomic_reference_agree_classifier.py rename to src/variation/classifiers/genomic_reference_agree_classifier.py diff --git a/variation/classifiers/genomic_substitution_classifier.py b/src/variation/classifiers/genomic_substitution_classifier.py similarity index 100% rename from variation/classifiers/genomic_substitution_classifier.py rename to src/variation/classifiers/genomic_substitution_classifier.py diff --git a/variation/classifiers/gnomad_vcf_classifier.py b/src/variation/classifiers/gnomad_vcf_classifier.py similarity index 100% rename from variation/classifiers/gnomad_vcf_classifier.py rename to src/variation/classifiers/gnomad_vcf_classifier.py diff --git a/variation/classifiers/hgvs_classifier.py b/src/variation/classifiers/hgvs_classifier.py similarity index 100% rename from variation/classifiers/hgvs_classifier.py rename to src/variation/classifiers/hgvs_classifier.py diff --git a/variation/classifiers/protein_deletion_classifier.py b/src/variation/classifiers/protein_deletion_classifier.py similarity index 100% rename from variation/classifiers/protein_deletion_classifier.py rename to src/variation/classifiers/protein_deletion_classifier.py diff --git a/variation/classifiers/protein_delins_classifier.py b/src/variation/classifiers/protein_delins_classifier.py similarity index 100% rename from variation/classifiers/protein_delins_classifier.py rename to src/variation/classifiers/protein_delins_classifier.py diff --git a/variation/classifiers/protein_insertion_classifier.py b/src/variation/classifiers/protein_insertion_classifier.py similarity index 100% rename from variation/classifiers/protein_insertion_classifier.py rename to src/variation/classifiers/protein_insertion_classifier.py diff --git a/variation/classifiers/protein_reference_agree.py b/src/variation/classifiers/protein_reference_agree.py similarity index 100% rename from variation/classifiers/protein_reference_agree.py rename to src/variation/classifiers/protein_reference_agree.py diff --git a/variation/classifiers/protein_stop_gain_classifier.py b/src/variation/classifiers/protein_stop_gain_classifier.py similarity index 100% rename from variation/classifiers/protein_stop_gain_classifier.py rename to src/variation/classifiers/protein_stop_gain_classifier.py diff --git a/variation/classifiers/protein_substitution_classifier.py b/src/variation/classifiers/protein_substitution_classifier.py similarity index 100% rename from variation/classifiers/protein_substitution_classifier.py rename to src/variation/classifiers/protein_substitution_classifier.py diff --git a/variation/classify.py b/src/variation/classify.py similarity index 100% rename from variation/classify.py rename to src/variation/classify.py diff --git a/variation/gnomad_vcf_to_protein_variation.py b/src/variation/gnomad_vcf_to_protein_variation.py similarity index 100% rename from variation/gnomad_vcf_to_protein_variation.py rename to src/variation/gnomad_vcf_to_protein_variation.py diff --git a/variation/hgvs_dup_del_mode.py b/src/variation/hgvs_dup_del_mode.py similarity index 100% rename from variation/hgvs_dup_del_mode.py rename to src/variation/hgvs_dup_del_mode.py diff --git a/variation/main.py b/src/variation/main.py similarity index 100% rename from variation/main.py rename to src/variation/main.py diff --git a/variation/normalize.py b/src/variation/normalize.py similarity index 100% rename from variation/normalize.py rename to src/variation/normalize.py diff --git a/variation/query.py b/src/variation/query.py similarity index 100% rename from variation/query.py rename to src/variation/query.py diff --git a/variation/regex.py b/src/variation/regex.py similarity index 100% rename from variation/regex.py rename to src/variation/regex.py diff --git a/variation/schemas/__init__.py b/src/variation/schemas/__init__.py similarity index 100% rename from variation/schemas/__init__.py rename to src/variation/schemas/__init__.py diff --git a/variation/schemas/app_schemas.py b/src/variation/schemas/app_schemas.py similarity index 100% rename from variation/schemas/app_schemas.py rename to src/variation/schemas/app_schemas.py diff --git a/variation/schemas/classification_response_schema.py b/src/variation/schemas/classification_response_schema.py similarity index 100% rename from variation/schemas/classification_response_schema.py rename to src/variation/schemas/classification_response_schema.py diff --git a/variation/schemas/copy_number_schema.py b/src/variation/schemas/copy_number_schema.py similarity index 100% rename from variation/schemas/copy_number_schema.py rename to src/variation/schemas/copy_number_schema.py diff --git a/variation/schemas/gnomad_vcf_to_protein_schema.py b/src/variation/schemas/gnomad_vcf_to_protein_schema.py similarity index 100% rename from variation/schemas/gnomad_vcf_to_protein_schema.py rename to src/variation/schemas/gnomad_vcf_to_protein_schema.py diff --git a/variation/schemas/hgvs_to_copy_number_schema.py b/src/variation/schemas/hgvs_to_copy_number_schema.py similarity index 100% rename from variation/schemas/hgvs_to_copy_number_schema.py rename to src/variation/schemas/hgvs_to_copy_number_schema.py diff --git a/variation/schemas/normalize_response_schema.py b/src/variation/schemas/normalize_response_schema.py similarity index 100% rename from variation/schemas/normalize_response_schema.py rename to src/variation/schemas/normalize_response_schema.py diff --git a/variation/schemas/service_schema.py b/src/variation/schemas/service_schema.py similarity index 100% rename from variation/schemas/service_schema.py rename to src/variation/schemas/service_schema.py diff --git a/variation/schemas/to_vrs_response_schema.py b/src/variation/schemas/to_vrs_response_schema.py similarity index 100% rename from variation/schemas/to_vrs_response_schema.py rename to src/variation/schemas/to_vrs_response_schema.py diff --git a/variation/schemas/token_response_schema.py b/src/variation/schemas/token_response_schema.py similarity index 100% rename from variation/schemas/token_response_schema.py rename to src/variation/schemas/token_response_schema.py diff --git a/variation/schemas/translation_response_schema.py b/src/variation/schemas/translation_response_schema.py similarity index 100% rename from variation/schemas/translation_response_schema.py rename to src/variation/schemas/translation_response_schema.py diff --git a/variation/schemas/validation_response_schema.py b/src/variation/schemas/validation_response_schema.py similarity index 100% rename from variation/schemas/validation_response_schema.py rename to src/variation/schemas/validation_response_schema.py diff --git a/variation/schemas/variation_schema.py b/src/variation/schemas/variation_schema.py similarity index 100% rename from variation/schemas/variation_schema.py rename to src/variation/schemas/variation_schema.py diff --git a/variation/schemas/vrs_python_translator_schema.py b/src/variation/schemas/vrs_python_translator_schema.py similarity index 100% rename from variation/schemas/vrs_python_translator_schema.py rename to src/variation/schemas/vrs_python_translator_schema.py diff --git a/variation/to_copy_number_variation.py b/src/variation/to_copy_number_variation.py similarity index 100% rename from variation/to_copy_number_variation.py rename to src/variation/to_copy_number_variation.py diff --git a/variation/to_vrs.py b/src/variation/to_vrs.py similarity index 100% rename from variation/to_vrs.py rename to src/variation/to_vrs.py diff --git a/variation/tokenize.py b/src/variation/tokenize.py similarity index 100% rename from variation/tokenize.py rename to src/variation/tokenize.py diff --git a/variation/tokenizers/__init__.py b/src/variation/tokenizers/__init__.py similarity index 100% rename from variation/tokenizers/__init__.py rename to src/variation/tokenizers/__init__.py diff --git a/variation/tokenizers/cdna_and_genomic_reference_agree.py b/src/variation/tokenizers/cdna_and_genomic_reference_agree.py similarity index 100% rename from variation/tokenizers/cdna_and_genomic_reference_agree.py rename to src/variation/tokenizers/cdna_and_genomic_reference_agree.py diff --git a/variation/tokenizers/cdna_deletion.py b/src/variation/tokenizers/cdna_deletion.py similarity index 100% rename from variation/tokenizers/cdna_deletion.py rename to src/variation/tokenizers/cdna_deletion.py diff --git a/variation/tokenizers/cdna_delins.py b/src/variation/tokenizers/cdna_delins.py similarity index 100% rename from variation/tokenizers/cdna_delins.py rename to src/variation/tokenizers/cdna_delins.py diff --git a/variation/tokenizers/cdna_insertion.py b/src/variation/tokenizers/cdna_insertion.py similarity index 100% rename from variation/tokenizers/cdna_insertion.py rename to src/variation/tokenizers/cdna_insertion.py diff --git a/variation/tokenizers/cdna_substitution.py b/src/variation/tokenizers/cdna_substitution.py similarity index 100% rename from variation/tokenizers/cdna_substitution.py rename to src/variation/tokenizers/cdna_substitution.py diff --git a/variation/tokenizers/free_text_categorical.py b/src/variation/tokenizers/free_text_categorical.py similarity index 100% rename from variation/tokenizers/free_text_categorical.py rename to src/variation/tokenizers/free_text_categorical.py diff --git a/variation/tokenizers/gene_symbol.py b/src/variation/tokenizers/gene_symbol.py similarity index 100% rename from variation/tokenizers/gene_symbol.py rename to src/variation/tokenizers/gene_symbol.py diff --git a/variation/tokenizers/genomic_deletion.py b/src/variation/tokenizers/genomic_deletion.py similarity index 100% rename from variation/tokenizers/genomic_deletion.py rename to src/variation/tokenizers/genomic_deletion.py diff --git a/variation/tokenizers/genomic_delins.py b/src/variation/tokenizers/genomic_delins.py similarity index 100% rename from variation/tokenizers/genomic_delins.py rename to src/variation/tokenizers/genomic_delins.py diff --git a/variation/tokenizers/genomic_duplication.py b/src/variation/tokenizers/genomic_duplication.py similarity index 100% rename from variation/tokenizers/genomic_duplication.py rename to src/variation/tokenizers/genomic_duplication.py diff --git a/variation/tokenizers/genomic_insertion.py b/src/variation/tokenizers/genomic_insertion.py similarity index 100% rename from variation/tokenizers/genomic_insertion.py rename to src/variation/tokenizers/genomic_insertion.py diff --git a/variation/tokenizers/genomic_substitution.py b/src/variation/tokenizers/genomic_substitution.py similarity index 100% rename from variation/tokenizers/genomic_substitution.py rename to src/variation/tokenizers/genomic_substitution.py diff --git a/variation/tokenizers/gnomad_vcf.py b/src/variation/tokenizers/gnomad_vcf.py similarity index 100% rename from variation/tokenizers/gnomad_vcf.py rename to src/variation/tokenizers/gnomad_vcf.py diff --git a/variation/tokenizers/hgvs.py b/src/variation/tokenizers/hgvs.py similarity index 100% rename from variation/tokenizers/hgvs.py rename to src/variation/tokenizers/hgvs.py diff --git a/variation/tokenizers/protein_deletion.py b/src/variation/tokenizers/protein_deletion.py similarity index 100% rename from variation/tokenizers/protein_deletion.py rename to src/variation/tokenizers/protein_deletion.py diff --git a/variation/tokenizers/protein_delins.py b/src/variation/tokenizers/protein_delins.py similarity index 100% rename from variation/tokenizers/protein_delins.py rename to src/variation/tokenizers/protein_delins.py diff --git a/variation/tokenizers/protein_insertion.py b/src/variation/tokenizers/protein_insertion.py similarity index 100% rename from variation/tokenizers/protein_insertion.py rename to src/variation/tokenizers/protein_insertion.py diff --git a/variation/tokenizers/protein_reference_agree.py b/src/variation/tokenizers/protein_reference_agree.py similarity index 100% rename from variation/tokenizers/protein_reference_agree.py rename to src/variation/tokenizers/protein_reference_agree.py diff --git a/variation/tokenizers/protein_substitution.py b/src/variation/tokenizers/protein_substitution.py similarity index 100% rename from variation/tokenizers/protein_substitution.py rename to src/variation/tokenizers/protein_substitution.py diff --git a/variation/tokenizers/tokenizer.py b/src/variation/tokenizers/tokenizer.py similarity index 100% rename from variation/tokenizers/tokenizer.py rename to src/variation/tokenizers/tokenizer.py diff --git a/variation/translate.py b/src/variation/translate.py similarity index 100% rename from variation/translate.py rename to src/variation/translate.py diff --git a/variation/translators/__init__.py b/src/variation/translators/__init__.py similarity index 100% rename from variation/translators/__init__.py rename to src/variation/translators/__init__.py diff --git a/variation/translators/ambiguous_translator_base.py b/src/variation/translators/ambiguous_translator_base.py similarity index 100% rename from variation/translators/ambiguous_translator_base.py rename to src/variation/translators/ambiguous_translator_base.py diff --git a/variation/translators/amplification.py b/src/variation/translators/amplification.py similarity index 100% rename from variation/translators/amplification.py rename to src/variation/translators/amplification.py diff --git a/variation/translators/cdna_deletion.py b/src/variation/translators/cdna_deletion.py similarity index 100% rename from variation/translators/cdna_deletion.py rename to src/variation/translators/cdna_deletion.py diff --git a/variation/translators/cdna_delins.py b/src/variation/translators/cdna_delins.py similarity index 100% rename from variation/translators/cdna_delins.py rename to src/variation/translators/cdna_delins.py diff --git a/variation/translators/cdna_insertion.py b/src/variation/translators/cdna_insertion.py similarity index 100% rename from variation/translators/cdna_insertion.py rename to src/variation/translators/cdna_insertion.py diff --git a/variation/translators/cdna_reference_agree.py b/src/variation/translators/cdna_reference_agree.py similarity index 100% rename from variation/translators/cdna_reference_agree.py rename to src/variation/translators/cdna_reference_agree.py diff --git a/variation/translators/cdna_substitution.py b/src/variation/translators/cdna_substitution.py similarity index 100% rename from variation/translators/cdna_substitution.py rename to src/variation/translators/cdna_substitution.py diff --git a/variation/translators/genomic_del_dup_base.py b/src/variation/translators/genomic_del_dup_base.py similarity index 100% rename from variation/translators/genomic_del_dup_base.py rename to src/variation/translators/genomic_del_dup_base.py diff --git a/variation/translators/genomic_deletion.py b/src/variation/translators/genomic_deletion.py similarity index 100% rename from variation/translators/genomic_deletion.py rename to src/variation/translators/genomic_deletion.py diff --git a/variation/translators/genomic_deletion_ambiguous.py b/src/variation/translators/genomic_deletion_ambiguous.py similarity index 100% rename from variation/translators/genomic_deletion_ambiguous.py rename to src/variation/translators/genomic_deletion_ambiguous.py diff --git a/variation/translators/genomic_delins.py b/src/variation/translators/genomic_delins.py similarity index 100% rename from variation/translators/genomic_delins.py rename to src/variation/translators/genomic_delins.py diff --git a/variation/translators/genomic_duplication.py b/src/variation/translators/genomic_duplication.py similarity index 100% rename from variation/translators/genomic_duplication.py rename to src/variation/translators/genomic_duplication.py diff --git a/variation/translators/genomic_duplication_ambiguous.py b/src/variation/translators/genomic_duplication_ambiguous.py similarity index 100% rename from variation/translators/genomic_duplication_ambiguous.py rename to src/variation/translators/genomic_duplication_ambiguous.py diff --git a/variation/translators/genomic_insertion.py b/src/variation/translators/genomic_insertion.py similarity index 100% rename from variation/translators/genomic_insertion.py rename to src/variation/translators/genomic_insertion.py diff --git a/variation/translators/genomic_reference_agree.py b/src/variation/translators/genomic_reference_agree.py similarity index 100% rename from variation/translators/genomic_reference_agree.py rename to src/variation/translators/genomic_reference_agree.py diff --git a/variation/translators/genomic_substitution.py b/src/variation/translators/genomic_substitution.py similarity index 100% rename from variation/translators/genomic_substitution.py rename to src/variation/translators/genomic_substitution.py diff --git a/variation/translators/protein_deletion.py b/src/variation/translators/protein_deletion.py similarity index 100% rename from variation/translators/protein_deletion.py rename to src/variation/translators/protein_deletion.py diff --git a/variation/translators/protein_delins.py b/src/variation/translators/protein_delins.py similarity index 100% rename from variation/translators/protein_delins.py rename to src/variation/translators/protein_delins.py diff --git a/variation/translators/protein_insertion.py b/src/variation/translators/protein_insertion.py similarity index 100% rename from variation/translators/protein_insertion.py rename to src/variation/translators/protein_insertion.py diff --git a/variation/translators/protein_reference_agree.py b/src/variation/translators/protein_reference_agree.py similarity index 100% rename from variation/translators/protein_reference_agree.py rename to src/variation/translators/protein_reference_agree.py diff --git a/variation/translators/protein_stop_gain.py b/src/variation/translators/protein_stop_gain.py similarity index 100% rename from variation/translators/protein_stop_gain.py rename to src/variation/translators/protein_stop_gain.py diff --git a/variation/translators/protein_substitution.py b/src/variation/translators/protein_substitution.py similarity index 100% rename from variation/translators/protein_substitution.py rename to src/variation/translators/protein_substitution.py diff --git a/variation/translators/translator.py b/src/variation/translators/translator.py similarity index 100% rename from variation/translators/translator.py rename to src/variation/translators/translator.py diff --git a/variation/utils.py b/src/variation/utils.py similarity index 100% rename from variation/utils.py rename to src/variation/utils.py diff --git a/variation/validate.py b/src/variation/validate.py similarity index 100% rename from variation/validate.py rename to src/variation/validate.py diff --git a/variation/validators/__init__.py b/src/variation/validators/__init__.py similarity index 100% rename from variation/validators/__init__.py rename to src/variation/validators/__init__.py diff --git a/variation/validators/amplification.py b/src/variation/validators/amplification.py similarity index 100% rename from variation/validators/amplification.py rename to src/variation/validators/amplification.py diff --git a/variation/validators/cdna_deletion.py b/src/variation/validators/cdna_deletion.py similarity index 100% rename from variation/validators/cdna_deletion.py rename to src/variation/validators/cdna_deletion.py diff --git a/variation/validators/cdna_delins.py b/src/variation/validators/cdna_delins.py similarity index 100% rename from variation/validators/cdna_delins.py rename to src/variation/validators/cdna_delins.py diff --git a/variation/validators/cdna_insertion.py b/src/variation/validators/cdna_insertion.py similarity index 100% rename from variation/validators/cdna_insertion.py rename to src/variation/validators/cdna_insertion.py diff --git a/variation/validators/cdna_reference_agree.py b/src/variation/validators/cdna_reference_agree.py similarity index 100% rename from variation/validators/cdna_reference_agree.py rename to src/variation/validators/cdna_reference_agree.py diff --git a/variation/validators/cdna_substitution.py b/src/variation/validators/cdna_substitution.py similarity index 100% rename from variation/validators/cdna_substitution.py rename to src/variation/validators/cdna_substitution.py diff --git a/variation/validators/genomic_base.py b/src/variation/validators/genomic_base.py similarity index 100% rename from variation/validators/genomic_base.py rename to src/variation/validators/genomic_base.py diff --git a/variation/validators/genomic_deletion.py b/src/variation/validators/genomic_deletion.py similarity index 100% rename from variation/validators/genomic_deletion.py rename to src/variation/validators/genomic_deletion.py diff --git a/variation/validators/genomic_deletion_ambiguous.py b/src/variation/validators/genomic_deletion_ambiguous.py similarity index 100% rename from variation/validators/genomic_deletion_ambiguous.py rename to src/variation/validators/genomic_deletion_ambiguous.py diff --git a/variation/validators/genomic_delins.py b/src/variation/validators/genomic_delins.py similarity index 100% rename from variation/validators/genomic_delins.py rename to src/variation/validators/genomic_delins.py diff --git a/variation/validators/genomic_duplication.py b/src/variation/validators/genomic_duplication.py similarity index 100% rename from variation/validators/genomic_duplication.py rename to src/variation/validators/genomic_duplication.py diff --git a/variation/validators/genomic_duplication_ambiguous.py b/src/variation/validators/genomic_duplication_ambiguous.py similarity index 100% rename from variation/validators/genomic_duplication_ambiguous.py rename to src/variation/validators/genomic_duplication_ambiguous.py diff --git a/variation/validators/genomic_insertion.py b/src/variation/validators/genomic_insertion.py similarity index 100% rename from variation/validators/genomic_insertion.py rename to src/variation/validators/genomic_insertion.py diff --git a/variation/validators/genomic_reference_agree.py b/src/variation/validators/genomic_reference_agree.py similarity index 100% rename from variation/validators/genomic_reference_agree.py rename to src/variation/validators/genomic_reference_agree.py diff --git a/variation/validators/genomic_substitution.py b/src/variation/validators/genomic_substitution.py similarity index 100% rename from variation/validators/genomic_substitution.py rename to src/variation/validators/genomic_substitution.py diff --git a/variation/validators/protein_deletion.py b/src/variation/validators/protein_deletion.py similarity index 100% rename from variation/validators/protein_deletion.py rename to src/variation/validators/protein_deletion.py diff --git a/variation/validators/protein_delins.py b/src/variation/validators/protein_delins.py similarity index 100% rename from variation/validators/protein_delins.py rename to src/variation/validators/protein_delins.py diff --git a/variation/validators/protein_insertion.py b/src/variation/validators/protein_insertion.py similarity index 100% rename from variation/validators/protein_insertion.py rename to src/variation/validators/protein_insertion.py diff --git a/variation/validators/protein_reference_agree.py b/src/variation/validators/protein_reference_agree.py similarity index 100% rename from variation/validators/protein_reference_agree.py rename to src/variation/validators/protein_reference_agree.py diff --git a/variation/validators/protein_stop_gain.py b/src/variation/validators/protein_stop_gain.py similarity index 100% rename from variation/validators/protein_stop_gain.py rename to src/variation/validators/protein_stop_gain.py diff --git a/variation/validators/protein_substitution.py b/src/variation/validators/protein_substitution.py similarity index 100% rename from variation/validators/protein_substitution.py rename to src/variation/validators/protein_substitution.py diff --git a/variation/validators/validator.py b/src/variation/validators/validator.py similarity index 100% rename from variation/validators/validator.py rename to src/variation/validators/validator.py diff --git a/variation/version.py b/src/variation/version.py similarity index 100% rename from variation/version.py rename to src/variation/version.py diff --git a/variation/vrs_representation.py b/src/variation/vrs_representation.py similarity index 100% rename from variation/vrs_representation.py rename to src/variation/vrs_representation.py From 2b3a492929cee89b7df20a00750dde796666862b Mon Sep 17 00:00:00 2001 From: korikuzma Date: Wed, 7 Feb 2024 19:01:04 -0500 Subject: [PATCH 6/8] ruff fix --- tests/to_copy_number_variation/test_hgvs_to_copy_number.py | 1 - tests/to_copy_number_variation/test_parsed_to_copy_number.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py index d2e3a9b5..c454e015 100644 --- a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py +++ b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py @@ -3,7 +3,6 @@ import pytest from ga4gh.vrs import models - from tests.conftest import cnv_assertion_checks diff --git a/tests/to_copy_number_variation/test_parsed_to_copy_number.py b/tests/to_copy_number_variation/test_parsed_to_copy_number.py index 3e4ae807..2eeadc35 100644 --- a/tests/to_copy_number_variation/test_parsed_to_copy_number.py +++ b/tests/to_copy_number_variation/test_parsed_to_copy_number.py @@ -4,8 +4,8 @@ import pytest from ga4gh.vrs import models from pydantic import ValidationError - from tests.conftest import cnv_assertion_checks + from variation.schemas.copy_number_schema import ( ClinVarAssembly, Comparator, From 65740f12e6d23d7897a1ad32bbabdfb63d4b1d18 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 12 Feb 2024 20:57:11 -0500 Subject: [PATCH 7/8] fix: handle cases where gene_context is not found * Also handles cases where only protein or cdna gene is found and adds warnings if there is a mismatch when both exist --- src/variation/gnomad_vcf_to_protein_variation.py | 10 +++++++++- tests/test_gnomad_vcf_to_protein.py | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/variation/gnomad_vcf_to_protein_variation.py b/src/variation/gnomad_vcf_to_protein_variation.py index 77a39aff..d8ab6434 100644 --- a/src/variation/gnomad_vcf_to_protein_variation.py +++ b/src/variation/gnomad_vcf_to_protein_variation.py @@ -581,11 +581,19 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi except GnomadVcfToProteinError as e: warnings.append(str(e)) + if p_data.gene and c_data.gene and p_data.gene != c_data.gene: + warnings.append( + f"Protein gene ({p_data.gene}) and cDNA gene ({c_data.gene}) mismatch" + ) + else: + gene = p_data.gene or c_data.gene + gene_context = self._get_gene_context(gene) if gene else None + return GnomadVcfToProteinService( variation_query=vcf_query, variation=variation, vrs_ref_allele_seq=self._get_vrs_ref_allele_seq(variation.location, p_ac), - gene_context=self._get_gene_context(p_data.gene), + gene_context=gene_context, warnings=warnings, service_meta_=ServiceMeta( version=__version__, diff --git a/tests/test_gnomad_vcf_to_protein.py b/tests/test_gnomad_vcf_to_protein.py index a5a793f6..ea0078b0 100644 --- a/tests/test_gnomad_vcf_to_protein.py +++ b/tests/test_gnomad_vcf_to_protein.py @@ -388,6 +388,12 @@ async def test_delins(test_handler, delins_pos, delins_neg): assert resp.vrs_ref_allele_seq == "PRLLFPTNSSSHLVALQGQP" assert resp.gene_context + # CA16602420. Example where protein gene not found, but cDNA gene found + resp = await test_handler.gnomad_vcf_to_protein("7-140453136-AC-TT") + assert resp.variation + assert resp.gene_context + assert resp.warnings == [] + @pytest.mark.asyncio() async def test_invalid(test_handler): From b504f4cd1af09d9bcb0d74cf17ba1fd12c152760 Mon Sep 17 00:00:00 2001 From: korikuzma Date: Mon, 12 Feb 2024 21:03:34 -0500 Subject: [PATCH 8/8] define gene --- src/variation/gnomad_vcf_to_protein_variation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/variation/gnomad_vcf_to_protein_variation.py b/src/variation/gnomad_vcf_to_protein_variation.py index d8ab6434..2cb8e4b0 100644 --- a/src/variation/gnomad_vcf_to_protein_variation.py +++ b/src/variation/gnomad_vcf_to_protein_variation.py @@ -585,8 +585,7 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi warnings.append( f"Protein gene ({p_data.gene}) and cDNA gene ({c_data.gene}) mismatch" ) - else: - gene = p_data.gene or c_data.gene + gene = p_data.gene or c_data.gene gene_context = self._get_gene_context(gene) if gene else None return GnomadVcfToProteinService(