diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 1eca9f33..00000000 --- a/.flake8 +++ /dev/null @@ -1,22 +0,0 @@ -[flake8] -ignore = D205, D400, I101, ANN101, ANN002, ANN003 -max-line-length = 88 -exclude = - .git - venv - __pycache__ - source - outputs - docs/* - variation/version.py - build/* - codebuild/* -inline-quotes = " -import-order-style = pep8 -application-import-names = - variation - tests -per-file-ignores = - tests/*:ANN001, ANN2, ANN102 - setup.py:F821 - *__init__.py:F401 diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 72090fe0..ad485bc0 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -25,8 +25,13 @@ jobs: steps: - uses: actions/checkout@v3 - - name: black - uses: psf/black@stable + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + + - name: Install dependencies + run: python3 -m pip install ".[dev]" - - name: ruff - uses: chartboost/ruff-action@v1 + - name: Check style + run: python3 -m ruff check . && ruff format --check . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c1c3d02..8e29f947 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,20 +2,15 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v1.4.0 + rev: v4.5.0 hooks: - id: check-added-large-files - id: detect-private-key - id: trailing-whitespace - id: end-of-file-fixer -- repo: https://github.com/psf/black - rev: 23.7.0 - hooks: - - id: black - language_version: python3.11 - repo: https://github.com/astral-sh/ruff-pre-commit - # Ruff version. - rev: v0.0.280 + rev: v0.2.0 hooks: + - id: ruff-format - id: ruff args: [ --fix, --exit-non-zero-on-fix ] diff --git a/Pipfile b/Pipfile index ea36fa69..e3d15d97 100644 --- a/Pipfile +++ b/Pipfile @@ -12,8 +12,7 @@ variation-normalizer = {editable = true, path = "."} jupyter = "*" ipykernel = "*" psycopg2-binary = "*" -ruff = "*" -black = "*" +ruff = "==0.2.0" [packages] "biocommons.seqrepo" = "*" diff --git a/README.md b/README.md index 414cfcfc..2d4e7425 100644 --- a/README.md +++ b/README.md @@ -156,20 +156,26 @@ Next, view the OpenAPI docs on your local machine: ### Init coding style tests -Code style is managed by [Ruff](https://github.com/astral-sh/ruff) and checked prior to commit. +Code style is managed by [Ruff](https://docs.astral.sh/ruff/) and checked prior to commit. + +Check style with `ruff`: + +```shell +python3 -m ruff format . && python3 -m ruff check --fix . +``` We use [pre-commit](https://pre-commit.com/#usage) to run conformance tests. This ensures: -* Check code style -* Check for added large files -* Detect AWS Credentials -* Detect Private Key +* Style correctness +* No large files +* AWS credentials are present +* Private key is present -Before first commit run: +Pre-commit *must* be installed before your first commit. Use the following command: -```shell +```commandline pre-commit install ``` diff --git a/codebuild/deploy_eb_env_dev.py b/codebuild/deploy_eb_env_dev.py index 08ae38b3..9871a35b 100644 --- a/codebuild/deploy_eb_env_dev.py +++ b/codebuild/deploy_eb_env_dev.py @@ -43,6 +43,6 @@ "Status" ] print(eb_provisioned_product_status) -except Exception as e: # noqa: E722 +except Exception as e: print(e) print("The EB environment is already running....") diff --git a/pyproject.toml b/pyproject.toml index b472aaaf..7e578334 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,40 +2,93 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta:__legacy__" -[tool.black] -line-length = 88 - [tool.ruff] -# pycodestyle (E, W) -# Pyflakes (F) -# flake8-annotations (ANN) -# flake8-quotes (Q) -# pydocstyle (D) -# pep8-naming (N) -# isort (I) -select = ["E", "W", "F", "ANN", "Q", "D", "N", "I"] - -fixable = ["I", "F401"] +lint.select = [ + "F", # https://docs.astral.sh/ruff/rules/#pyflakes-f + "E", "W", # https://docs.astral.sh/ruff/rules/#pycodestyle-e-w + "I", # https://docs.astral.sh/ruff/rules/#isort-i + "N", # https://docs.astral.sh/ruff/rules/#pep8-naming-n + "D", # https://docs.astral.sh/ruff/rules/#pydocstyle-d + "UP", # https://docs.astral.sh/ruff/rules/#pyupgrade-up + "ANN", # https://docs.astral.sh/ruff/rules/#flake8-annotations-ann + "ASYNC", # https://docs.astral.sh/ruff/rules/#flake8-async-async + "S", # https://docs.astral.sh/ruff/rules/#flake8-bandit-s + "B", # https://docs.astral.sh/ruff/rules/#flake8-bugbear-b + "A", # https://docs.astral.sh/ruff/rules/#flake8-builtins-a + "C4", # https://docs.astral.sh/ruff/rules/#flake8-comprehensions-c4 + "DTZ", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "T10", # https://docs.astral.sh/ruff/rules/#flake8-datetimez-dtz + "EM", # https://docs.astral.sh/ruff/rules/#flake8-errmsg-em + "G", # https://docs.astral.sh/ruff/rules/#flake8-logging-format-g + "PIE", # https://docs.astral.sh/ruff/rules/#flake8-pie-pie + "T20", # https://docs.astral.sh/ruff/rules/#flake8-print-t20 + "PT", # https://docs.astral.sh/ruff/rules/#flake8-pytest-style-pt + "Q", # https://docs.astral.sh/ruff/rules/#flake8-quotes-q + "RSE", # https://docs.astral.sh/ruff/rules/#flake8-raise-rse + "RET", # https://docs.astral.sh/ruff/rules/#flake8-return-ret + "SIM", # https://docs.astral.sh/ruff/rules/#flake8-simplify-sim + "PTH", # https://docs.astral.sh/ruff/rules/#flake8-use-pathlib-pth + "PGH", # https://docs.astral.sh/ruff/rules/#pygrep-hooks-pgh + "RUF", # https://docs.astral.sh/ruff/rules/#ruff-specific-rules-ruf +] +lint.fixable = [ + "I", + "F401", + "D", + "UP", + "ANN", + "B", + "C4", + "G", + "PIE", + "PT", + "RSE", + "SIM", + "RUF" +] +# ANN101 - missing-type-self +# ANN003 - missing-type-kwargs +# D203 - one-blank-line-before-class # D205 - blank-line-after-summary +# D206 - indent-with-spaces* +# D213 - multi-line-summary-second-line +# D300 - triple-single-quotes* # D400 - ends-in-period # D415 - ends-in-punctuation -# ANN101 - missing-type-self -# ANN003 - missing-type-kwargs -# E501 - line-too-long -ignore = ["D205", "D400", "D415", "ANN101", "ANN003", "E501"] +# E111 - indentation-with-invalid-multiple* +# E114 - indentation-with-invalid-multiple-comment* +# E117 - over-indented* +# E501 - line-too-long* +# W191 - tab-indentation* +# PGH003 - blanket-type-ignore +# *ignored for compatibility with formatter +lint.ignore = [ + "ANN101", "ANN003", + "D203", "D205", "D206", "D213", "D300", "D400", "D415", + "E111", "E114", "E117", "E501", + "W191", + "PGH003", + "S321", +] -[tool.ruff.flake8-quotes] -docstring-quotes = "double" +exclude = [ + "setup.py" +] -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] # ANN001 - missing-type-function-argument # ANN2 - missing-return-type +# ANN201 - Missing type annotation # ANN102 - missing-type-cls # N805 - invalid-first-argument-name-for-method -# F821 - undefined-name -# F401 - unused-import -"tests/*" = ["ANN001", "ANN2", "ANN102"] -"setup.py" = ["F821"] -"*__init__.py" = ["F401"] -"variation/schemas/*" = ["ANN201", "N805", "ANN001"] +# S101 - assert +# B011 - assert-false +# RUF001 - ambiguous-unicode-character-string +"tests/*" = ["ANN001", "ANN102", "ANN2", "S101", "B011"] +"variation/schemas/*" = ["ANN001", "ANN201", "N805", "S101"] +"codebuild/*" = ["T201"] + +[tool.ruff.lint.flake8-bugbear] +# Allow default arguments like, e.g., `data: List[str] = fastapi.Query(None)`. +extend-immutable-calls = ["fastapi.Query"] diff --git a/setup.cfg b/setup.cfg index edcda243..7f37081a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,12 +50,11 @@ dev = pytest pytest-asyncio pytest-cov - ruff + ruff == 0.2.0 pre-commit jupyter ipykernel psycopg2-binary - black [tool:pytest] addopts = --ignore setup.py --ignore=codebuild/ --doctest-modules --cov-report term-missing --cov . diff --git a/tests/conftest.py b/tests/conftest.py index d2ab6472..ca4c4e1d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -558,11 +558,11 @@ def assertion_checks(normalize_response, test_variation): def cnv_assertion_checks(resp, test_fixture): """Check that actual response for to copy number matches expected""" try: - getattr(resp, "copy_number_count") + cnc = resp.copy_number_count except AttributeError: actual = resp.copy_number_change.model_dump(exclude_none=True) else: - actual = resp.copy_number_count.model_dump(exclude_none=True) + actual = cnc.model_dump(exclude_none=True) expected = test_fixture.model_dump(exclude_none=True) assert actual == expected assert resp.warnings == [] diff --git a/tests/test_classifier.py b/tests/test_classifier.py index 82f45c30..e0ca546f 100644 --- a/tests/test_classifier.py +++ b/tests/test_classifier.py @@ -1,4 +1,6 @@ """Module for testing classifiers""" +from pathlib import Path + import pytest import yaml @@ -30,7 +32,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for classifiers""" - with open(f"{PROJECT_ROOT}/tests/fixtures/classifiers.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/classifiers.yml").open() as stream: return yaml.safe_load(stream) diff --git a/tests/test_gnomad_vcf_to_protein.py b/tests/test_gnomad_vcf_to_protein.py index 559e47cc..a5a793f6 100644 --- a/tests/test_gnomad_vcf_to_protein.py +++ b/tests/test_gnomad_vcf_to_protein.py @@ -263,7 +263,7 @@ def delins_neg(): return models.Allele(**params) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_substitution( test_handler, braf_v600e, @@ -327,7 +327,7 @@ async def test_substitution( assertion_checks(resp, multi_nuc_sub_neg) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_reference_agree(test_handler, vhl_reference_agree): """Test that reference agree queries return correct response""" # https://www.ncbi.nlm.nih.gov/clinvar/variation/379039/?new_evidence=true @@ -338,7 +338,7 @@ async def test_reference_agree(test_handler, vhl_reference_agree): assert resp.warnings == [] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_insertion(test_handler, protein_insertion, protein_insertion2): """Test that insertion queries return correct response""" # positive strand (CA645561585) @@ -356,7 +356,7 @@ async def test_insertion(test_handler, protein_insertion, protein_insertion2): assert resp.warnings == [] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_deletion(test_handler, protein_deletion_np_range, cdk11a_e314del): """Test that deletion queries return correct response""" resp = await test_handler.gnomad_vcf_to_protein("17-39723966-TTGAGGGAAAACACAT-T") @@ -371,7 +371,7 @@ async def test_deletion(test_handler, protein_deletion_np_range, cdk11a_e314del) assert resp.warnings == [] -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_delins(test_handler, delins_pos, delins_neg): """Test that delins queries return correct response""" # CA645561524, Positive Strand @@ -389,7 +389,7 @@ async def test_delins(test_handler, delins_pos, delins_neg): assert resp.gene_context -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_invalid(test_handler): """Test that invalid queries return correct response""" resp = await test_handler.gnomad_vcf_to_protein("BRAF V600E") diff --git a/tests/test_hgvs_dup_del_mode.py b/tests/test_hgvs_dup_del_mode.py index 576b9b88..aa56fe05 100644 --- a/tests/test_hgvs_dup_del_mode.py +++ b/tests/test_hgvs_dup_del_mode.py @@ -786,7 +786,7 @@ def no_variation_check(resp, q): assert resp.variation is None, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def invalid_query_list_checks(query_list, test_handler): """Check that invalid queries in query list do not normalize""" for q in query_list: @@ -794,7 +794,7 @@ async def invalid_query_list_checks(query_list, test_handler): no_variation_check(resp, q) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup1( test_handler, genomic_dup1_lse, @@ -872,7 +872,7 @@ async def test_genomic_dup1( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup2( test_handler, genomic_dup2_lse, @@ -942,7 +942,7 @@ async def test_genomic_dup2( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup3( test_handler, genomic_dup3_cx, @@ -1007,7 +1007,7 @@ async def test_genomic_dup3( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup4( test_handler, genomic_dup4_cn, @@ -1071,7 +1071,7 @@ async def test_genomic_dup4( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup5( test_handler, genomic_dup5_cn, @@ -1135,7 +1135,7 @@ async def test_genomic_dup5( assert resp.variation is None, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup6( test_handler, genomic_dup6_cn, @@ -1199,7 +1199,7 @@ async def test_genomic_dup6( assert resp.variation is None, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del1( test_handler, genomic_del1_lse, @@ -1270,7 +1270,7 @@ async def test_genomic_del1( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del2( test_handler, genomic_del2_lse, @@ -1356,7 +1356,7 @@ async def test_genomic_del2( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del3( test_handler, genomic_del3_dup3_cn_38, @@ -1421,7 +1421,7 @@ async def test_genomic_del3( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del4( test_handler, genomic_del4_cn, @@ -1493,7 +1493,7 @@ async def test_genomic_del4( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del5( test_handler, genomic_del5_cn_var, @@ -1554,7 +1554,7 @@ async def test_genomic_del5( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del6( test_handler, genomic_del6_cn_var, @@ -1616,7 +1616,7 @@ async def test_genomic_del6( await invalid_query_list_checks(invalid_queries, test_handler) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_parameters(test_handler): """Check that valid and invalid parameters work as intended.""" resp = await test_handler.normalize("7-140453136-A-T") diff --git a/tests/test_normalize.py b/tests/test_normalize.py index 066da0ff..8a36f0c2 100644 --- a/tests/test_normalize.py +++ b/tests/test_normalize.py @@ -555,7 +555,7 @@ def gnomad_vcf_genomic_delins5(): return models.Allele(**params) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g262c): """Test that protein substitutions normalize correctly.""" resp = await test_handler.normalize(" BRAF V600E ") @@ -574,21 +574,21 @@ async def test_protein_substitution(test_handler, braf_v600e, dis3_p63a, tp53_g2 assertion_checks(resp, dis3_p63a) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_polypeptide_truncation(test_handler, vhl): """Test that polypeptide truncations normalize correctly.""" resp = await test_handler.normalize("NP_000542.1:p.Tyr185Ter") assertion_checks(resp, vhl) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_reference_agree(test_handler, vhl_reference_agree): """Test that reference agrees normalize correctly.""" resp = await test_handler.normalize("NP_000542.1:p.Pro61=") assertion_checks(resp, vhl_reference_agree) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_and_genomic_substitution( test_handler, braf_v600e_nucleotide, @@ -642,7 +642,7 @@ async def test_cdna_and_genomic_substitution( assertion_checks(resp, gnomad_vcf_genomic_sub_mnv) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_reference_agree(test_handler, cdna_reference_agree): """Test that cdna Reference Agree normalizes correctly.""" resp = await test_handler.normalize("NM_004333.4:c.1799= ") @@ -658,7 +658,7 @@ async def test_cdna_reference_agree(test_handler, cdna_reference_agree): assertion_checks(resp, cdna_reference_agree) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_reference_agree( test_handler, cdna_reference_agree, grch38_braf_genom_reference_agree ): @@ -683,7 +683,7 @@ async def test_genomic_reference_agree( assertion_checks(resp, cdna_reference_agree) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_delins(test_handler, nm_004448_cdna_delins, nm_000551): """Test that cdna DelIns normalizes correctly.""" resp = await test_handler.normalize(" NM_004448.4:c.2326_2327delinsCT ") @@ -696,7 +696,7 @@ async def test_cdna_delins(test_handler, nm_004448_cdna_delins, nm_000551): assertion_checks(resp, nm_000551) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_delins( test_handler, grch38_genomic_delins1, @@ -770,7 +770,7 @@ async def test_genomic_delins( assertion_checks(resp, genomic_del1_lse) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_delins(test_handler, protein_delins): """Test that Amnio Acid DelIns normalizes correctly.""" resp = await test_handler.normalize("NP_001333827.1:p.Leu747_Thr751delinsPro") @@ -786,7 +786,7 @@ async def test_protein_delins(test_handler, protein_delins): assertion_checks(resp, protein_delins) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_deletion(test_handler, protein_deletion_np_range): """Test that Protein Deletion normalizes correctly.""" resp = await test_handler.normalize("NP_004439.2:p.Leu755_Thr759del") @@ -807,10 +807,10 @@ async def test_protein_deletion(test_handler, protein_deletion_np_range): assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_deletion(test_handler, cdna_deletion): """Test that cdna deletion normalizes correctly.""" - # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA645372623 # noqa: E501 + # https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/by_caid?caid=CA645372623 q = "NM_004448.3:c.2264_2278delTGAGGGAAAACACAT" resp1 = await test_handler.normalize(q) assertion_checks(resp1, cdna_deletion) @@ -827,7 +827,7 @@ async def test_cdna_deletion(test_handler, cdna_deletion): assertion_checks(resp, cdna_deletion) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion(test_handler, genomic_deletion): """Test that genomic deletion normalizes correctly""" # CA915940709 @@ -846,7 +846,7 @@ async def test_genomic_deletion(test_handler, genomic_deletion): assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_insertion(test_handler, protein_insertion): """Test that protein insertion normalizes correctly.""" resp = await test_handler.normalize("NP_005219.2:p.Asp770_Asn771insGlyLeu") @@ -865,14 +865,14 @@ async def test_protein_insertion(test_handler, protein_insertion): assertion_checks(resp, protein_insertion) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_insertion(test_handler, cdna_insertion): """Test that cdna insertion normalizes correctly.""" resp = await test_handler.normalize("ENST00000331728.9:c.2049_2050insA") assertion_checks(resp, cdna_insertion) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_insertion( test_handler, genomic_insertion, grch38_genomic_insertion_variation ): @@ -890,7 +890,7 @@ async def test_genomic_insertion( assertion_checks(resp, grch38_genomic_insertion_variation) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_amplification(test_handler, braf_amplification, prpf8_amplification): """Test that amplification normalizes correctly.""" q = "BRAF Amplification" @@ -907,7 +907,7 @@ async def test_amplification(test_handler, braf_amplification, prpf8_amplificati assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_valid_queries(test_handler): """Test that valid queries don"t throw exceptions. Used for queries that revealed bugs in service. @@ -932,7 +932,7 @@ async def test_valid_queries(test_handler): assert resp.variation, q -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_no_matches(test_handler): """Test no matches work correctly.""" queries = [ @@ -973,7 +973,7 @@ async def test_no_matches(test_handler): assert resp.variation is None -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_service_meta(): """Test that service meta info populates correctly.""" response = await normalize_get_response("BRAF v600e", "default") diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 8ac3fb0c..bf3132e8 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -1,4 +1,6 @@ """Module for testing tokenizers""" +from pathlib import Path + import pytest import yaml @@ -48,7 +50,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for tokenizers""" - with open(f"{PROJECT_ROOT}/tests/fixtures/tokenizers.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/tokenizers.yml").open() as stream: return yaml.safe_load(stream) diff --git a/tests/test_translator.py b/tests/test_translator.py index babe94db..b3cc1338 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -1,4 +1,6 @@ """Module for testing translators""" +from pathlib import Path + import pytest import yaml @@ -33,7 +35,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for translators""" - with open(f"{PROJECT_ROOT}/tests/fixtures/translators.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/translators.yml").open() as stream: return yaml.safe_load(stream) @@ -92,7 +94,7 @@ async def translator_checks( assert len(translations) == len(expected), query -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_substitution( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -110,7 +112,7 @@ async def test_protein_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_substitution( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -128,7 +130,7 @@ async def test_cdna_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_substitution( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -146,7 +148,7 @@ async def test_genomic_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_stop_gain( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -164,7 +166,7 @@ async def test_protein_stop_gain( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_reference_agree( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -182,7 +184,7 @@ async def test_protein_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_reference_agree( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -200,7 +202,7 @@ async def test_cdna_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_reference_agree( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -218,7 +220,7 @@ async def test_genomic_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_delins( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -236,7 +238,7 @@ async def test_protein_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_delins( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -254,7 +256,7 @@ async def test_cdna_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_delins( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -272,7 +274,7 @@ async def test_genomic_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_deletion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -290,7 +292,7 @@ async def test_protein_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_deletion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -308,7 +310,7 @@ async def test_cdna_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -326,7 +328,7 @@ async def test_genomic_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion_ambiguous( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -344,7 +346,7 @@ async def test_genomic_deletion_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_insertion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -362,7 +364,7 @@ async def test_protein_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_insertion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -380,7 +382,7 @@ async def test_cdna_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_insertion( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -398,7 +400,7 @@ async def test_genomic_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -416,7 +418,7 @@ async def test_genomic_duplication( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication_ambiguous( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): @@ -434,7 +436,7 @@ async def test_genomic_duplication_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_amplification( all_fixtures, trans_params, test_tokenizer, test_classifier, test_validator ): diff --git a/tests/test_validator.py b/tests/test_validator.py index b97707c6..ee07e501 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -1,4 +1,6 @@ """Module for testing validators""" +from pathlib import Path + import pytest import yaml @@ -30,7 +32,7 @@ @pytest.fixture(scope="module") def all_fixtures(): """Create fixture for validators""" - with open(f"{PROJECT_ROOT}/tests/fixtures/validators.yml") as stream: + with Path(f"{PROJECT_ROOT}/tests/fixtures/validators.yml").open() as stream: return yaml.safe_load(stream) @@ -67,9 +69,9 @@ async def validator_checks( classification ) except Exception as e: - raise Exception(f"{e}: {query}") + msg = f"{e}: {query}" + raise Exception(msg) from e else: - validator_instance is_valid = False for vr in validation_results: if vr.is_valid: @@ -79,7 +81,7 @@ async def validator_checks( assert is_valid if label == "should_match" else not is_valid, query -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_substitution( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -96,7 +98,7 @@ async def test_protein_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_substitution( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -113,7 +115,7 @@ async def test_cdna_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_substitution( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -130,7 +132,7 @@ async def test_genomic_substitution( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_stop_gain( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -147,7 +149,7 @@ async def test_protein_stop_gain( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_reference_agree( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -164,7 +166,7 @@ async def test_protein_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_reference_agree( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -181,7 +183,7 @@ async def test_cdna_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_reference_agree( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -198,7 +200,7 @@ async def test_genomic_reference_agree( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_delins( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -215,7 +217,7 @@ async def test_protein_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_delins(all_fixtures, test_tokenizer, test_classifier, val_params): """Test that cdna delins validator works correctly""" fixture_name = "cdna_delins" @@ -230,7 +232,7 @@ async def test_cdna_delins(all_fixtures, test_tokenizer, test_classifier, val_pa ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_delins( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -247,7 +249,7 @@ async def test_genomic_delins( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_deletion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -264,7 +266,7 @@ async def test_protein_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_deletion(all_fixtures, test_tokenizer, test_classifier, val_params): """Test that cdna deletion validator works correctly""" fixture_name = "cdna_deletion" @@ -279,7 +281,7 @@ async def test_cdna_deletion(all_fixtures, test_tokenizer, test_classifier, val_ ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -296,7 +298,7 @@ async def test_genomic_deletion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_deletion_ambiguous( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -313,7 +315,7 @@ async def test_genomic_deletion_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_protein_insertion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -330,7 +332,7 @@ async def test_protein_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_cdna_insertion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -347,7 +349,7 @@ async def test_cdna_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_insertion( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -364,7 +366,7 @@ async def test_genomic_insertion( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -381,7 +383,7 @@ async def test_genomic_duplication( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_duplication_ambiguous( all_fixtures, test_tokenizer, test_classifier, val_params ): @@ -398,7 +400,7 @@ async def test_genomic_duplication_ambiguous( ) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_amplification(all_fixtures, test_tokenizer, test_classifier, val_params): """Test that amplification validator works correctly""" fixture_name = "amplification" diff --git a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py index 3e950cbc..d2e3a9b5 100644 --- a/tests/to_copy_number_variation/test_hgvs_to_copy_number.py +++ b/tests/to_copy_number_variation/test_hgvs_to_copy_number.py @@ -679,7 +679,7 @@ def genomic_del6_cx_37(genomic_del6_37_loc): return models.CopyNumberChange(**params) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup1_copy_number_count( test_cnv_handler, genomic_dup1_38_cn, genomic_dup1_cn_37 ): @@ -712,7 +712,7 @@ async def test_genomic_dup1_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup1_copy_number_change( test_cnv_handler, genomic_dup1_cx_38, genomic_dup1_cx_37 ): @@ -735,7 +735,7 @@ async def test_genomic_dup1_copy_number_change( cnv_assertion_checks(resp, genomic_dup1_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup2_copy_number_count( test_cnv_handler, genomic_dup2_38_cn, genomic_dup2_cn_37 ): @@ -766,7 +766,7 @@ async def test_genomic_dup2_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup2_copy_number_change( test_cnv_handler, genomic_dup2_cx_38, genomic_dup2_cx_37 ): @@ -789,7 +789,7 @@ async def test_genomic_dup2_copy_number_change( cnv_assertion_checks(resp, genomic_dup2_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup3_copy_number_count( test_cnv_handler, genomic_del3_dup3_cn_38, genomic_del3_dup3_cn_37 ): @@ -820,7 +820,7 @@ async def test_genomic_dup3_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup3_copy_number_change( test_cnv_handler, genomic_dup3_cx_38, genomic_dup3_cx_37 ): @@ -843,7 +843,7 @@ async def test_genomic_dup3_copy_number_change( cnv_assertion_checks(resp, genomic_dup3_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup4_copy_number_count( test_cnv_handler, genomic_dup4_cn_38, genomic_dup4_cn_37 ): @@ -874,7 +874,7 @@ async def test_genomic_dup4_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup4_copy_number_change( test_cnv_handler, genomic_dup4_cx_38, genomic_dup4_cx_37 ): @@ -897,7 +897,7 @@ async def test_genomic_dup4_copy_number_change( cnv_assertion_checks(resp, genomic_dup4_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup5_copy_number_count( test_cnv_handler, genomic_dup5_cn_38, genomic_dup5_cn_37 ): @@ -928,7 +928,7 @@ async def test_genomic_dup5_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup5_copy_number_change( test_cnv_handler, genomic_dup5_cx_38, genomic_dup5_cx_37 ): @@ -951,7 +951,7 @@ async def test_genomic_dup5_copy_number_change( cnv_assertion_checks(resp, genomic_dup5_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup6_copy_number_count( test_cnv_handler, genomic_dup6_cn_38, genomic_dup6_cn_37 ): @@ -982,7 +982,7 @@ async def test_genomic_dup6_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_dup6_copy_number_change( test_cnv_handler, genomic_dup6_cx_38, genomic_dup6_cx_37 ): @@ -1005,7 +1005,7 @@ async def test_genomic_dup6_copy_number_change( cnv_assertion_checks(resp, genomic_dup6_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del1_copy_number_count( test_cnv_handler, genomic_del1_38_cn, genomic_del1_cn_37 ): @@ -1036,7 +1036,7 @@ async def test_genomic_del1_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del1_copy_number_change( test_cnv_handler, genomic_del1_cx_38, genomic_del1_cx_37 ): @@ -1059,7 +1059,7 @@ async def test_genomic_del1_copy_number_change( cnv_assertion_checks(resp, genomic_del1_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del2_copy_number_count( test_cnv_handler, genomic_del2_38_cn, genomic_del2_cn_37 ): @@ -1090,7 +1090,7 @@ async def test_genomic_del2_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del2_copy_number_change( test_cnv_handler, genomic_del2_cx_38, genomic_del2_cx_37 ): @@ -1113,7 +1113,7 @@ async def test_genomic_del2_copy_number_change( cnv_assertion_checks(resp, genomic_del2_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del3_copy_number_count( test_cnv_handler, genomic_del3_dup3_cn_38, genomic_del3_dup3_cn_37 ): @@ -1144,7 +1144,7 @@ async def test_genomic_del3_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del3_copy_number_change( test_cnv_handler, genomic_del3_cx_38, genomic_del3_cx_37 ): @@ -1167,7 +1167,7 @@ async def test_genomic_del3_copy_number_change( cnv_assertion_checks(resp, genomic_del3_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del4_copy_number_count( test_cnv_handler, genomic_del4_cn_38, genomic_del4_cn_37 ): @@ -1198,7 +1198,7 @@ async def test_genomic_del4_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del4_copy_number_change( test_cnv_handler, genomic_del4_cx_38, genomic_del4_cx_37 ): @@ -1221,7 +1221,7 @@ async def test_genomic_del4_copy_number_change( cnv_assertion_checks(resp, genomic_del4_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del5_copy_number_count( test_cnv_handler, genomic_del5_cn_38, genomic_del5_cn_37 ): @@ -1252,7 +1252,7 @@ async def test_genomic_del5_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del5_copy_number_change( test_cnv_handler, genomic_del5_cx_38, genomic_del5_cx_37 ): @@ -1275,7 +1275,7 @@ async def test_genomic_del5_copy_number_change( cnv_assertion_checks(resp, genomic_del5_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del6_copy_number_count( test_cnv_handler, genomic_del6_cn_38, genomic_del6_cn_37 ): @@ -1306,7 +1306,7 @@ async def test_genomic_del6_copy_number_count( cnv_assertion_checks(resp, expected) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_genomic_del6_copy_number_change( test_cnv_handler, genomic_del6_cx_38, genomic_del6_cx_37 ): @@ -1329,7 +1329,7 @@ async def test_genomic_del6_copy_number_change( cnv_assertion_checks(resp, genomic_del6_cx_38) -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_invalid_cnv(test_cnv_handler): """Check that invalid input return warnings""" q = "DAG1 g.49568695dup" diff --git a/tests/to_copy_number_variation/test_parsed_to_copy_number.py b/tests/to_copy_number_variation/test_parsed_to_copy_number.py index d147b947..3e4ae807 100644 --- a/tests/to_copy_number_variation/test_parsed_to_copy_number.py +++ b/tests/to_copy_number_variation/test_parsed_to_copy_number.py @@ -303,7 +303,7 @@ def test_get_parsed_ac_chr(test_cnv_handler): test_cnv_handler._get_parsed_ac_chr("NC_00000713", False) assert ( str(e.value) == "SeqRepo unable to get translated identifiers for NC_00000713" - ) # noqa: E501 + ) def test_validate_pos(test_cnv_handler): @@ -888,7 +888,9 @@ def test_invalid(test_cnv_handler): assert resp.warnings == ["hg18 assembly is not currently supported"] # Must give both assembly + chromosome or accession - ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + ac_assembly_chr_msg = ( + "Must provide either `accession` or both `assembly` and `chromosome`" + ) with pytest.raises(ValidationError) as e: ParsedToCxVarQuery( start0=31738809, diff --git a/variation/__init__.py b/variation/__init__.py index ed2efae6..ed36bbc3 100644 --- a/variation/__init__.py +++ b/variation/__init__.py @@ -3,7 +3,7 @@ from os import environ if "VARIATION_NORM_EB_PROD" in environ: - LOG_FN = "/tmp/variation.log" + LOG_FN = "/var/variation.log" else: LOG_FN = "variation.log" diff --git a/variation/classifiers/__init__.py b/variation/classifiers/__init__.py index c174ba7c..6b68ec5f 100644 --- a/variation/classifiers/__init__.py +++ b/variation/classifiers/__init__.py @@ -21,3 +21,28 @@ from .protein_reference_agree import ProteinReferenceAgreeClassifier from .protein_stop_gain_classifier import ProteinStopGainClassifier from .protein_substitution_classifier import ProteinSubstitutionClassifier + +__all__ = [ + "AmplificationClassifier", + "CdnaDeletionClassifier", + "CdnaDelInsClassifier", + "CdnaInsertionClassifier", + "CdnaReferenceAgreeClassifier", + "CdnaSubstitutionClassifier", + "GenomicDeletionAmbiguousClassifier", + "GenomicDeletionClassifier", + "GenomicDelInsClassifier", + "GenomicDuplicationAmbiguousClassifier", + "GenomicDuplicationClassifier", + "GenomicInsertionClassifier", + "GenomicReferenceAgreeClassifier", + "GenomicSubstitutionClassifier", + "GnomadVcfClassifier", + "HgvsClassifier", + "ProteinDeletionClassifier", + "ProteinDelInsClassifier", + "ProteinInsertionClassifier", + "ProteinReferenceAgreeClassifier", + "ProteinStopGainClassifier", + "ProteinSubstitutionClassifier", +] diff --git a/variation/classifiers/cdna_substitution_classifier.py b/variation/classifiers/cdna_substitution_classifier.py index 6d4dce79..aad05ba1 100644 --- a/variation/classifiers/cdna_substitution_classifier.py +++ b/variation/classifiers/cdna_substitution_classifier.py @@ -48,3 +48,5 @@ def match(self, tokens: List[Token]) -> Optional[CdnaSubstitutionClassification] ref=cdna_sub_token.ref, alt=cdna_sub_token.alt, ) + + return None diff --git a/variation/classifiers/classifier.py b/variation/classifiers/classifier.py index 400a7e80..beaea3ce 100644 --- a/variation/classifiers/classifier.py +++ b/variation/classifiers/classifier.py @@ -25,7 +25,6 @@ def exact_match_candidates(self) -> List[List[TokenType]]: :return: List of list of tokens, where order matters, that represent a given classification. """ - pass def can_classify(self, tokens: List[Token]) -> bool: """Return whether or not a list of tokens can be classified by a given @@ -35,7 +34,7 @@ def can_classify(self, tokens: List[Token]) -> bool: :return: `True` if a list of tokens matches the tokens needed, where order matters, to represent a given classification. `False`, otherwise. """ - token_types = list(map(lambda t: t.token_type, tokens)) + token_types = [t.token_type for t in tokens] exact_matches: List[List[str]] = [] for candidate in self.exact_match_candidates(): diff --git a/variation/classifiers/genomic_substitution_classifier.py b/variation/classifiers/genomic_substitution_classifier.py index 4f39b303..0c2c47a7 100644 --- a/variation/classifiers/genomic_substitution_classifier.py +++ b/variation/classifiers/genomic_substitution_classifier.py @@ -49,3 +49,5 @@ def match(self, tokens: List[Token]) -> Optional[GenomicSubstitutionClassificati ref=genomic_sub_token.ref, alt=genomic_sub_token.alt, ) + + return None diff --git a/variation/classifiers/gnomad_vcf_classifier.py b/variation/classifiers/gnomad_vcf_classifier.py index 6ebfae89..9a5b8a3e 100644 --- a/variation/classifiers/gnomad_vcf_classifier.py +++ b/variation/classifiers/gnomad_vcf_classifier.py @@ -56,11 +56,11 @@ def match( if ref == alt: return GenomicReferenceAgreeClassification(**params) - else: - params["ref"] = ref - params["alt"] = alt - return GenomicSubstitutionClassification(**params) + params["ref"] = ref + params["alt"] = alt + + return GenomicSubstitutionClassification(**params) # delins params["pos0"] = token.pos diff --git a/variation/classifiers/hgvs_classifier.py b/variation/classifiers/hgvs_classifier.py index 073ac996..f81b8391 100644 --- a/variation/classifiers/hgvs_classifier.py +++ b/variation/classifiers/hgvs_classifier.py @@ -117,12 +117,14 @@ def _protein_classification( if params["alt"] in {"Ter", "*"}: params["alt"] = "*" return ProteinStopGainClassification(**params) - else: - return ProteinSubstitutionClassification(**params) - elif classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE: + + return ProteinSubstitutionClassification(**params) + + if classification_type == ClassificationType.PROTEIN_REFERENCE_AGREE: params["pos"] = int(params["pos"]) return ProteinReferenceAgreeClassification(**params) - elif classification_type == ClassificationType.PROTEIN_DELINS: + + if classification_type == ClassificationType.PROTEIN_DELINS: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -130,7 +132,8 @@ def _protein_classification( else params["pos1"] ) return ProteinDelInsClassification(**params) - elif classification_type == ClassificationType.PROTEIN_DELETION: + + if classification_type == ClassificationType.PROTEIN_DELETION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -138,7 +141,8 @@ def _protein_classification( else params["pos1"] ) return ProteinDeletionClassification(**params) - elif classification_type == ClassificationType.PROTEIN_INSERTION: + + if classification_type == ClassificationType.PROTEIN_INSERTION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -170,10 +174,12 @@ def _cdna_classification( if classification_type == ClassificationType.CDNA_SUBSTITUTION: params["pos"] = int(params["pos"]) return CdnaSubstitutionClassification(**params) - elif classification_type == ClassificationType.CDNA_REFERENCE_AGREE: + + if classification_type == ClassificationType.CDNA_REFERENCE_AGREE: params["pos"] = int(params["pos"]) return CdnaReferenceAgreeClassification(**params) - elif classification_type == ClassificationType.CDNA_DELINS: + + if classification_type == ClassificationType.CDNA_DELINS: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -181,7 +187,8 @@ def _cdna_classification( else params["pos1"] ) return CdnaDelInsClassification(**params) - elif classification_type == ClassificationType.CDNA_DELETION: + + if classification_type == ClassificationType.CDNA_DELETION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -189,7 +196,8 @@ def _cdna_classification( else params["pos1"] ) return CdnaDeletionClassification(**params) - elif classification_type == ClassificationType.CDNA_INSERTION: + + if classification_type == ClassificationType.CDNA_INSERTION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -198,6 +206,8 @@ def _cdna_classification( ) return CdnaInsertionClassification(**params) + return None + def _genomic_classification( self, token: HgvsToken, params: Dict ) -> Optional[Classification]: @@ -220,10 +230,12 @@ def _genomic_classification( if classification_type == ClassificationType.GENOMIC_SUBSTITUTION: params["pos"] = int(params["pos"]) return GenomicSubstitutionClassification(**params) - elif classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE: + + if classification_type == ClassificationType.GENOMIC_REFERENCE_AGREE: params["pos"] = int(params["pos"]) return GenomicReferenceAgreeClassification(**params) - elif classification_type == ClassificationType.GENOMIC_DELINS: + + if classification_type == ClassificationType.GENOMIC_DELINS: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -231,7 +243,8 @@ def _genomic_classification( else params["pos1"] ) return GenomicDelInsClassification(**params) - elif classification_type == ClassificationType.GENOMIC_INSERTION: + + if classification_type == ClassificationType.GENOMIC_INSERTION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -239,7 +252,8 @@ def _genomic_classification( else params["pos1"] ) return GenomicInsertionClassification(**params) - elif classification_type == ClassificationType.GENOMIC_DELETION: + + if classification_type == ClassificationType.GENOMIC_DELETION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -247,7 +261,8 @@ def _genomic_classification( else params["pos1"] ) return GenomicDeletionClassification(**params) - elif classification_type == ClassificationType.GENOMIC_DUPLICATION: + + if classification_type == ClassificationType.GENOMIC_DUPLICATION: params["pos0"] = int(params["pos0"]) params["pos1"] = ( int(params["pos1"]) @@ -256,6 +271,8 @@ def _genomic_classification( ) return GenomicDuplicationClassification(**params) + return None + def _genomic_ambiguous_classification( self, token: HgvsToken, params: Dict ) -> Optional[Classification]: @@ -271,7 +288,8 @@ def _genomic_ambiguous_classification( """ if token.token.endswith("dup"): return self._genomic_dup_ambiguous_classification(token, params) - elif token.token.endswith("del"): + + if token.token.endswith("del"): return self._genomic_del_ambiguous_classification(token, params) return None diff --git a/variation/classify.py b/variation/classify.py index cd2db3a3..cd1a4728 100644 --- a/variation/classify.py +++ b/variation/classify.py @@ -1,5 +1,5 @@ """Module for classification.""" -from typing import List, Optional +from typing import ClassVar, List, Optional from variation.classifiers import ( AmplificationClassifier, @@ -35,7 +35,7 @@ class Classify: hgvs_classifier = HgvsClassifier() gnomad_vcf_classifier = GnomadVcfClassifier() - classifiers: List[Classifier] = [ + classifiers: ClassVar[List[Classifier]] = [ ProteinDelInsClassifier(), ProteinSubstitutionClassifier(), ProteinStopGainClassifier(), diff --git a/variation/gnomad_vcf_to_protein_variation.py b/variation/gnomad_vcf_to_protein_variation.py index e71dea54..77a39aff 100644 --- a/variation/gnomad_vcf_to_protein_variation.py +++ b/variation/gnomad_vcf_to_protein_variation.py @@ -1,5 +1,5 @@ """Module for translating VCF-like to protein VRS Allele representation""" -from datetime import datetime +import datetime from typing import List, Optional, Tuple from cool_seq_tool.handlers import SeqRepoAccess @@ -193,16 +193,17 @@ async def _get_valid_result( """ tokens = self.tokenizer.perform(vcf_query, warnings) if not tokens: - raise GnomadVcfToProteinError("No tokens found") + msg = "No tokens found" + raise GnomadVcfToProteinError(msg) classification = self.classifier.perform(tokens) if not classification: - raise GnomadVcfToProteinError("No classification found") + msg = "No classification found" + raise GnomadVcfToProteinError(msg) if classification.nomenclature != Nomenclature.GNOMAD_VCF: - raise GnomadVcfToProteinError( - f"{vcf_query} is not a gnomAD VCF-like query (`chr-pos-ref-alt`)" - ) + msg = f"{vcf_query} is not a gnomAD VCF-like query (`chr-pos-ref-alt`)" + raise GnomadVcfToProteinError(msg) validation_summary = await self.validator.perform(classification) valid_results = validation_summary.valid_results @@ -213,10 +214,9 @@ async def _get_valid_result( reverse=True, ) return valid_results[0] - else: - raise GnomadVcfToProteinError( - f"{vcf_query} is not a valid gnomad vcf query" - ) + + msg = f"{vcf_query} is not a valid gnomad vcf query" + raise GnomadVcfToProteinError(msg) @staticmethod def _get_alt_type_and_prefix_match( @@ -359,7 +359,8 @@ def _dna_to_aa(dna_seq: str, strand: Strand) -> str: elif char == "C": rna_seq += "G" else: - raise ValueError(f"{char} is not a supported nucleotide") + msg = f"{char} is not a supported nucleotide" + raise ValueError(msg) else: # We only need to replace T/U for DNA->RNA rna_seq = dna_seq.replace("T", "U") @@ -398,7 +399,8 @@ def _get_protein_representation( try: variation = normalize(variation, self.seqrepo_access) except (KeyError, AttributeError) as e: - raise GnomadVcfToProteinError(f"VRS-Python unable to normalize allele: {e}") + msg = f"VRS-Python unable to normalize allele: {e}" + raise GnomadVcfToProteinError(msg) from e # Add VRS digests for VRS Allele and VRS Sequence Location variation.id = ga4gh_identify(variation) @@ -459,7 +461,8 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi variation=variation, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) @@ -498,7 +501,8 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi variation=variation, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) @@ -516,7 +520,8 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi variation=variation, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) @@ -538,7 +543,8 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi variation=variation, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) @@ -582,6 +588,7 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi gene_context=self._get_gene_context(p_data.gene), warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) diff --git a/variation/main.py b/variation/main.py index d83b87a3..927846de 100644 --- a/variation/main.py +++ b/variation/main.py @@ -1,6 +1,6 @@ """Main application for FastAPI.""" +import datetime import traceback -from datetime import datetime from enum import Enum from typing import List, Optional, Union from urllib.parse import unquote @@ -69,7 +69,7 @@ class Tag(Enum): contact={ "name": "Alex H. Wagner", "email": "Alex.Wagner@nationwidechildrens.org", - "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab", # noqa: E501 + "url": "https://www.nationwidechildrens.org/specialties/institute-for-genomic-medicine/research-labs/wagner-lab", }, license={ "name": "MIT", @@ -113,8 +113,7 @@ async def to_vrs( :param q: HGVS, gnomAD VCF or Free Text description on GRCh37 or GRCh38 assembly :return: ToVRSService model for variation """ - resp = await query_handler.to_vrs_handler.to_vrs(unquote(q)) - return resp + return await query_handler.to_vrs_handler.to_vrs(unquote(q)) normalize_summary = ( @@ -149,11 +148,11 @@ async def normalize( ), baseline_copies: Optional[int] = Query( None, - description="Baseline copies for HGVS duplications and deletions represented as Copy Number Count Variation", # noqa: E501 + description="Baseline copies for HGVS duplications and deletions represented as Copy Number Count Variation", ), copy_change: Optional[models.CopyChange] = Query( None, - description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", # noqa: E501 + description="The copy change for HGVS duplications and deletions represented as Copy Number Change Variation.", ), ) -> NormalizeService: """Normalize and translate a HGVS, gnomAD VCF or Free Text description on GRCh37 @@ -171,13 +170,12 @@ async def normalize( query. :return: NormalizeService for variation """ - normalize_resp = await query_handler.normalize_handler.normalize( + return await query_handler.normalize_handler.normalize( unquote(q), hgvs_dup_del_mode=hgvs_dup_del_mode, baseline_copies=baseline_copies, copy_change=copy_change, ) - return normalize_resp @app.get( @@ -219,7 +217,8 @@ def translate_identifier( warnings=warnings, aliases=aliases, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) @@ -259,7 +258,7 @@ def vrs_python_translate_from( :return: TranslateFromService containing VRS Allele object """ variation_query = unquote(variation.strip()) - warnings = list() + warnings = [] vrs_variation = None try: resp = query_handler.vrs_python_tlr.translate_from(variation_query, fmt) @@ -281,7 +280,8 @@ def vrs_python_translate_from( warnings=warnings, variation=vrs_variation, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version @@ -289,12 +289,16 @@ def vrs_python_translate_from( ) -g_to_p_summary = "Given GRCh38 gnomAD VCF, return VRS Variation object on MANE protein coordinate." # noqa: E501 +g_to_p_summary = ( + "Given GRCh38 gnomAD VCF, return VRS Variation object on MANE protein coordinate." +) g_to_p_response_description = "A response to a validly-formed query." g_to_p_description = ( "Return VRS Variation object on protein coordinate for variation provided." ) -q_description = "GRCh38 gnomAD VCF (chr-pos-ref-alt) to normalize to MANE protein variation." # noqa: E501 +q_description = ( + "GRCh38 gnomAD VCF (chr-pos-ref-alt) to normalize to MANE protein variation." +) @app.get( @@ -315,8 +319,7 @@ async def gnomad_vcf_to_protein( :return: GnomadVcfToProteinService for variation """ q = unquote(q.strip()) - resp = await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) - return resp + return await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q) hgvs_dup_del_mode_decsr = ( @@ -367,11 +370,11 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateTo """ query = request_body request_body = request_body.model_dump(by_alias=True) - warnings = list() + warnings = [] allele = _get_allele(request_body, warnings) - variations = list() + variations = [] if allele: try: variations = query_handler.vrs_python_tlr.translate_to( @@ -385,7 +388,8 @@ async def vrs_python_translate_to(request_body: TranslateToQuery) -> TranslateTo warnings=warnings, variations=variations, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version @@ -424,11 +428,11 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToS """ query = request_body request_body = request_body.model_dump(by_alias=True) - warnings = list() + warnings = [] allele = _get_allele(request_body, warnings) - variations = list() + variations = [] if allele: try: variations = query_handler.vrs_python_tlr._to_hgvs( @@ -442,7 +446,8 @@ async def vrs_python_to_hgvs(request_body: TranslateToHGVSQuery) -> TranslateToS warnings=warnings, variations=variations, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), vrs_python_meta_=VrsPythonMeta( version=pkg_resources.get_distribution("ga4gh.vrs").version @@ -475,12 +480,11 @@ async def hgvs_to_copy_number_count( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberCountService """ - resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( + return await query_handler.to_copy_number_handler.hgvs_to_copy_number_count( unquote(hgvs_expr.strip()), baseline_copies, do_liftover, ) - return resp @app.get( @@ -506,12 +510,11 @@ async def hgvs_to_copy_number_change( :param do_liftover: Whether or not to liftover to GRCh38 assembly :return: HgvsToCopyNumberChangeService """ - resp = await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( + return await query_handler.to_copy_number_handler.hgvs_to_copy_number_change( unquote(hgvs_expr.strip()), copy_change, do_liftover, ) - return resp @app.post( @@ -541,7 +544,8 @@ def parsed_to_cn_var(request_body: ParsedToCnVarQuery) -> ParsedToCnVarService: copy_number_count=None, warnings=["Unhandled exception. See logs for more details."], service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) else: @@ -575,7 +579,8 @@ def parsed_to_cx_var(request_body: ParsedToCxVarQuery) -> ParsedToCxVarService: copy_number_count=None, warnings=["Unhandled exception. See logs for more details."], service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) else: @@ -623,13 +628,12 @@ def amplification_to_cx_var( :return: AmplificationToCxVarService containing Copy Number Change and list of warnings """ - resp = query_handler.to_copy_number_handler.amplification_to_cx_var( + return query_handler.to_copy_number_handler.amplification_to_cx_var( gene=gene, sequence_id=sequence_id, start=start, end=end, ) - return resp @app.get( @@ -671,7 +675,10 @@ async def p_to_c( return ToCdnaService( c_data=c_data, warnings=[w] if w else [], - service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + service_meta=ServiceMeta( + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), + ), ) @@ -679,7 +686,7 @@ async def p_to_c( "/variation/alignment_mapper/c_to_g", summary="Translate cDNA representation to genomic representation", response_description="A response to a validly-formed query.", - description="Given cDNA accession and positions for codon(s), return associated genomic" # noqa: E501 + description="Given cDNA accession and positions for codon(s), return associated genomic" " accession and positions for a given target genome assembly", response_model=ToGenomicService, response_model_exclude_none=True, @@ -728,7 +735,10 @@ async def c_to_g( return ToGenomicService( g_data=g_data, warnings=[w] if w else [], - service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + service_meta=ServiceMeta( + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), + ), ) @@ -779,5 +789,8 @@ async def p_to_g( return ToGenomicService( g_data=g_data, warnings=[w] if w else [], - service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()), + service_meta=ServiceMeta( + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), + ), ) diff --git a/variation/normalize.py b/variation/normalize.py index 37b05fc5..699d7f81 100644 --- a/variation/normalize.py +++ b/variation/normalize.py @@ -1,5 +1,5 @@ """Module for Variation Normalization.""" -from datetime import datetime +import datetime from typing import List, Optional, Tuple from urllib.parse import unquote @@ -129,10 +129,12 @@ def get_hgvs_dup_del_mode( if not hgvs_dup_del_mode: hgvs_dup_del_mode = HGVSDupDelModeOption.DEFAULT - if hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT: - if not baseline_copies: - warning = f"{hgvs_dup_del_mode.value} mode requires `baseline_copies`" # noqa: E501 - return None, warning + if ( + hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT + and not baseline_copies + ): + warning = f"{hgvs_dup_del_mode.value} mode requires `baseline_copies`" + return None, warning return hgvs_dup_del_mode, warning @@ -163,7 +165,8 @@ async def normalize( "variation": variation, "warnings": warnings, "service_meta_": ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), } @@ -221,9 +224,7 @@ async def normalize( translation_result.vrs_seq_loc_ac_status == VrsSeqLocAcStatus.NA ): - classification_type = ( - translation_result.validation_result.classification.classification_type.value - ) + classification_type = translation_result.validation_result.classification.classification_type.value if classification_type.startswith(("protein", "cdna")): # Only supports protein/cDNA at the moment warnings.append("Unable to find MANE representation") diff --git a/variation/query.py b/variation/query.py index 60c6a4f3..c0c68a3e 100644 --- a/variation/query.py +++ b/variation/query.py @@ -64,11 +64,10 @@ def __init__( translator, ] self.to_vrs_handler = ToVRS(*to_vrs_params) - normalize_params = to_vrs_params + [uta_db] - self.normalize_handler = Normalize(*normalize_params) + self.normalize_handler = Normalize(*[*to_vrs_params, uta_db]) self.gnomad_vcf_to_protein_handler = GnomadVcfToProteinVariation( - *to_vrs_params + [mane_transcript, gene_query_handler] + *[*to_vrs_params, mane_transcript, gene_query_handler] ) self.to_copy_number_handler = ToCopyNumberVariation( - *to_vrs_params + [gene_query_handler, uta_db] + *[*to_vrs_params, gene_query_handler, uta_db] ) diff --git a/variation/regex.py b/variation/regex.py index 7a4528c9..182e98c7 100644 --- a/variation/regex.py +++ b/variation/regex.py @@ -41,15 +41,15 @@ ) PROTEIN_INSERTION = re.compile( - r"^(?P[a-zA-z]+)(?P\d+)_(?P[a-zA-z]+)(?P\d+)ins(?P[a-zA-z]+)$" # noqa: E501 + r"^(?P[a-zA-z]+)(?P\d+)_(?P[a-zA-z]+)(?P\d+)ins(?P[a-zA-z]+)$" ) PROTEIN_DELINS = re.compile( - r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?delins(?P[a-zA-z]+)$" # noqa: E501 + r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?delins(?P[a-zA-z]+)$" ) PROTEIN_DELETION = re.compile( - r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?del(?P[a-zA-z]+)?$" # noqa: E501 + r"^(?P[a-zA-z]+)(?P\d+)(_(?P[a-zA-z]+)(?P\d+))?del(?P[a-zA-z]+)?$" ) PROTEIN_REFERENCE_AGREE = re.compile(r"^(?P[a-zA-z]+)(?P\d+)=$") diff --git a/variation/schemas/__init__.py b/variation/schemas/__init__.py index 04fec28a..e6bfc5d6 100644 --- a/variation/schemas/__init__.py +++ b/variation/schemas/__init__.py @@ -1,3 +1,5 @@ """Package level import.""" from .normalize_response_schema import NormalizeService, ServiceMeta from .to_vrs_response_schema import ToVRSService + +__all__ = ["NormalizeService", "ServiceMeta", "ToVRSService"] diff --git a/variation/schemas/copy_number_schema.py b/variation/schemas/copy_number_schema.py index 991a2717..6dcbb8b0 100644 --- a/variation/schemas/copy_number_schema.py +++ b/variation/schemas/copy_number_schema.py @@ -55,17 +55,19 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: - `end_pos_comparator` is required when `end_pos_type` is an Indefinite Range - End positions must be greater than start positions """ - ac_assembly_chr_msg = "Must provide either `accession` or both `assembly` and `chromosome`" # noqa: E501 + ac_assembly_chr_msg = ( + "Must provide either `accession` or both `assembly` and `chromosome`" + ) assembly = v.assembly chromosome = v.chromosome assembly_chr_set = assembly and chromosome - assert v.accession or assembly_chr_set, ac_assembly_chr_msg # noqa: E501 + assert v.accession or assembly_chr_set, ac_assembly_chr_msg if assembly_chr_set: pattern = r"^chr(X|Y|([1-9]|1[0-9]|2[0-2]))$" assert re.match( pattern, chromosome - ), f"`chromosome`, {chromosome}, does not match r'{pattern}'" # noqa: E501 + ), f"`chromosome`, {chromosome}, does not match r'{pattern}'" start0 = v.start0 start1 = v.start1 @@ -75,7 +77,7 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: elif v.start_pos_type == ParsedPosType.INDEFINITE_RANGE: assert ( v.start_pos_comparator - ), "`start_pos_comparator` is required for indefinite ranges" # noqa: E501 + ), "`start_pos_comparator` is required for indefinite ranges" end0 = v.end0 end1 = v.end1 @@ -85,7 +87,7 @@ def validate_parsed_fields(cls, v: Dict) -> Dict: elif v.end_pos_type == ParsedPosType.INDEFINITE_RANGE: assert ( v.end_pos_comparator - ), "`end_pos_comparator` is required for indefinite ranges" # noqa: E501 + ), "`end_pos_comparator` is required for indefinite ranges" err_msg = "end positions must be greater than start" if start1 is None: @@ -216,13 +218,9 @@ def validate_fields(cls, v: Dict) -> Dict: copies_comparator = v.copies_comparator if copies_type == ParsedPosType.DEFINITE_RANGE: - assert ( - copies1 - ), "`copies1` must be provided for `copies_type == ParsedPosType.DEFINITE_RANGE`" # noqa: E501 + assert copies1, "`copies1` must be provided for `copies_type == ParsedPosType.DEFINITE_RANGE`" elif copies_type == ParsedPosType.INDEFINITE_RANGE: - assert ( - copies_comparator - ), "`copies_comparator` must be provided for `copies_type == ParsedPosType.INDEFINITE_RANGE`" # noqa: E501 + assert copies_comparator, "`copies_comparator` must be provided for `copies_type == ParsedPosType.INDEFINITE_RANGE`" return v diff --git a/variation/schemas/translation_response_schema.py b/variation/schemas/translation_response_schema.py index 2652ae85..73ab363f 100644 --- a/variation/schemas/translation_response_schema.py +++ b/variation/schemas/translation_response_schema.py @@ -21,7 +21,7 @@ class VrsSeqLocAcStatus(str, Enum): NA = "na" -AC_PRIORITY_LABELS = [m for m in VrsSeqLocAcStatus.__members__.values()] +AC_PRIORITY_LABELS = list(VrsSeqLocAcStatus.__members__.values()) class TranslationResult(BaseModel): diff --git a/variation/to_copy_number_variation.py b/variation/to_copy_number_variation.py index 3c71f393..60745779 100644 --- a/variation/to_copy_number_variation.py +++ b/variation/to_copy_number_variation.py @@ -1,5 +1,5 @@ """Module for to copy number variation translation""" -from datetime import datetime +import datetime from typing import Dict, List, NamedTuple, Optional, Tuple, Union from urllib.parse import unquote @@ -216,7 +216,8 @@ async def hgvs_to_copy_number_count( hgvs_expr=hgvs_expr, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), copy_number_count=cn_var, ) @@ -248,7 +249,8 @@ async def hgvs_to_copy_number_change( hgvs_expr=hgvs_expr, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), copy_number_change=cx_var, ) @@ -290,9 +292,8 @@ def _get_parsed_ac( else: raise ToCopyNumberError(str(error)) else: - raise ToCopyNumberError( - f"{og_assembly.value} assembly is not currently supported" - ) + msg = f"{og_assembly.value} assembly is not currently supported" + raise ToCopyNumberError(msg) return ParsedAccessionSummary(lifted_over=lifted_over, accession=accession) @@ -324,7 +325,7 @@ def _get_parsed_ac_chr( chromosome = grch_record.split(":")[-1] if grch_record.startswith("GRCh38") or not do_liftover: - new_ac = [a for a in aliases if a.startswith("ga4gh")][0] + new_ac = next(a for a in aliases if a.startswith("ga4gh")) else: grch38_query = grch_record.replace("GRCh37", "GRCh38") aliases, error = self.seqrepo_access.translate_identifier( @@ -337,7 +338,8 @@ def _get_parsed_ac_chr( lifted_over = True new_ac = aliases[0] else: - raise ToCopyNumberError(f"Not a supported genomic accession: {accession}") + msg = f"Not a supported genomic accession: {accession}" + raise ToCopyNumberError(msg) return ParsedChromosomeSummary( accession=new_ac, chromosome=chromosome, lifted_over=lifted_over @@ -354,14 +356,15 @@ def _validate_ac_pos(self, accession: str, pos: int) -> None: try: ref = self.seqrepo_access.sr[accession][pos - 1] except ValueError as e: - raise ToCopyNumberError( - f"SeqRepo ValueError: {str(e).replace('start', 'Position')}" - ) - except KeyError: - raise ToCopyNumberError(f"Accession not found in SeqRepo: {accession}") + msg = f"SeqRepo ValueError: {str(e).replace('start', 'Position')}" + raise ToCopyNumberError(msg) from e + except KeyError as e: + msg = f"Accession not found in SeqRepo: {accession}" + raise ToCopyNumberError(msg) from e else: if ref == "": - raise ToCopyNumberError(f"Position ({pos}) is not valid on {accession}") + msg = f"Position ({pos}) is not valid on {accession}" + raise ToCopyNumberError(msg) from None def _get_vrs_loc_start_or_end( self, @@ -527,11 +530,10 @@ def _liftover_pos( chromosome, pos ) if not liftover: - raise ToCopyNumberError( - f"Unable to liftover: {chromosome} with pos {pos}" - ) - else: - liftover_pos[k] = liftover[0][1] + msg = f"Unable to liftover: {chromosome} with pos {pos}" + raise ToCopyNumberError(msg) + + liftover_pos[k] = liftover[0][1] return liftover_pos @@ -608,7 +610,8 @@ def parsed_to_copy_number( service_params = { "warnings": warnings, "service_meta_": ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), } @@ -645,7 +648,7 @@ def amplification_to_cx_var( :return: AmplificationToCxVarService containing Copy Number Change and list of warnings """ - warnings = list() + warnings = [] amplification_label = None variation = None try: @@ -715,6 +718,7 @@ def amplification_to_cx_var( copy_number_change=variation, warnings=warnings, service_meta_=ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), ) diff --git a/variation/to_vrs.py b/variation/to_vrs.py index 9a1bd2cf..eca7b347 100644 --- a/variation/to_vrs.py +++ b/variation/to_vrs.py @@ -1,5 +1,5 @@ """Module for to_vrs endpoint.""" -from datetime import datetime +import datetime from typing import List, Optional, Tuple from urllib.parse import unquote @@ -101,7 +101,8 @@ async def to_vrs(self, q: str) -> ToVRSService: "search_term": q, "variations": variations, "service_meta_": ServiceMeta( - version=__version__, response_datetime=datetime.now() + version=__version__, + response_datetime=datetime.datetime.now(tz=datetime.timezone.utc), ), "warnings": warnings, } diff --git a/variation/tokenizers/__init__.py b/variation/tokenizers/__init__.py index 0da19861..992e698c 100644 --- a/variation/tokenizers/__init__.py +++ b/variation/tokenizers/__init__.py @@ -18,3 +18,25 @@ from .protein_insertion import ProteinInsertion from .protein_reference_agree import ProteinReferenceAgree from .protein_substitution import ProteinSubstitution + +__all__ = [ + "CdnaGenomicReferenceAgree", + "CdnaDeletion", + "CdnaDelIns", + "CdnaInsertion", + "CdnaSubstitution", + "FreeTextCategorical", + "GeneSymbol", + "GenomicDeletion", + "GenomicDelIns", + "GenomicDuplication", + "GenomicInsertion", + "GenomicSubstitution", + "GnomadVCF", + "HGVS", + "ProteinDeletion", + "ProteinDelIns", + "ProteinInsertion", + "ProteinReferenceAgree", + "ProteinSubstitution", +] diff --git a/variation/tokenizers/cdna_and_genomic_reference_agree.py b/variation/tokenizers/cdna_and_genomic_reference_agree.py index f47ca537..ab69d65c 100644 --- a/variation/tokenizers/cdna_and_genomic_reference_agree.py +++ b/variation/tokenizers/cdna_and_genomic_reference_agree.py @@ -41,5 +41,8 @@ def match( if coordinate_type == AnnotationLayer.GENOMIC: return GenomicReferenceAgreeToken(**params) - elif coordinate_type == AnnotationLayer.CDNA: + + if coordinate_type == AnnotationLayer.CDNA: return CdnaReferenceAgreeToken(**params) + + return None diff --git a/variation/tokenizers/cdna_deletion.py b/variation/tokenizers/cdna_deletion.py index 01b6eb9a..9b949201 100644 --- a/variation/tokenizers/cdna_deletion.py +++ b/variation/tokenizers/cdna_deletion.py @@ -38,3 +38,5 @@ def match(self, input_string: str) -> Optional[CdnaDeletionToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, deleted_sequence=match_dict["deleted_sequence"], ) + + return None diff --git a/variation/tokenizers/cdna_delins.py b/variation/tokenizers/cdna_delins.py index 7169e015..beda5786 100644 --- a/variation/tokenizers/cdna_delins.py +++ b/variation/tokenizers/cdna_delins.py @@ -37,3 +37,5 @@ def match(self, input_string: str) -> Optional[CdnaDelInsToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, inserted_sequence=match_dict["inserted_sequence"], ) + + return None diff --git a/variation/tokenizers/cdna_insertion.py b/variation/tokenizers/cdna_insertion.py index 87ae61cd..2a0d5417 100644 --- a/variation/tokenizers/cdna_insertion.py +++ b/variation/tokenizers/cdna_insertion.py @@ -40,3 +40,5 @@ def match(self, input_string: str) -> Optional[CdnaInsertionToken]: pos1=pos1, inserted_sequence=inserted_sequence, ) + + return None diff --git a/variation/tokenizers/cdna_substitution.py b/variation/tokenizers/cdna_substitution.py index 7c166d85..59f36158 100644 --- a/variation/tokenizers/cdna_substitution.py +++ b/variation/tokenizers/cdna_substitution.py @@ -37,3 +37,5 @@ def match(self, input_string: str) -> Optional[CdnaSubstitutionToken]: ref=match_dict["ref"], alt=match_dict["alt"], ) + + return None diff --git a/variation/tokenizers/gene_symbol.py b/variation/tokenizers/gene_symbol.py index fde7cbe8..04e98c02 100644 --- a/variation/tokenizers/gene_symbol.py +++ b/variation/tokenizers/gene_symbol.py @@ -29,12 +29,11 @@ def match(self, input_string: str) -> Optional[GeneToken]: if norm_match_type != 0: gene = norm_resp.gene label = gene.label - gene_match_token = GeneToken( + return GeneToken( token=label, input_string=input_string, matched_value=label, gene=gene, ) - return gene_match_token return None diff --git a/variation/tokenizers/genomic_deletion.py b/variation/tokenizers/genomic_deletion.py index 0d2f93b3..8d9c1630 100644 --- a/variation/tokenizers/genomic_deletion.py +++ b/variation/tokenizers/genomic_deletion.py @@ -48,53 +48,53 @@ def match(self, input_string: str) -> Optional[GenomicDeletionToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, deleted_sequence=match_dict["deleted_sequence"], ) + + # Going to try ambiguous genomic duplications + match = GENOMIC_DELETION_AMBIGUOUS_1.match(input_string) + if match: + match_dict = match.groupdict() + pos0 = match_dict["pos0"] + pos1 = match_dict["pos1"] + pos2 = match_dict["pos2"] + pos3 = match_dict["pos3"] + + # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported + if not any(((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?"))): + return GenomicDeletionAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=int(pos0) if pos0 != "?" else pos0, + pos1=int(pos1) if pos1 != "?" else pos1, + pos2=int(pos2) if pos2 != "?" else pos2, + pos3=int(pos3) if pos3 != "?" else pos3, + ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + ) + else: - # Going to try ambiguous genomic duplications - match = GENOMIC_DELETION_AMBIGUOUS_1.match(input_string) - if match: - match_dict = match.groupdict() - pos0 = match_dict["pos0"] - pos1 = match_dict["pos1"] - pos2 = match_dict["pos2"] - pos3 = match_dict["pos3"] - - # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported - if not any( - ((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?")) - ): + for pattern_re, regex_type in [ + (GENOMIC_DELETION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), + (GENOMIC_DELETION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), + ]: + match = pattern_re.match(input_string) + + if match: + matched_pos = {} + match_dict = match.groupdict() + for k in match_dict: + v = match_dict[k] + if v: + v = int(v) if v != "?" else v + + matched_pos[k] = v + return GenomicDeletionAmbiguousToken( input_string=og_input_string, token=input_string, - pos0=int(pos0) if pos0 != "?" else pos0, - pos1=int(pos1) if pos1 != "?" else pos1, - pos2=int(pos2) if pos2 != "?" else pos2, - pos3=int(pos3) if pos3 != "?" else pos3, - ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + pos0=matched_pos["pos0"], + pos1=matched_pos.get("pos1"), + pos2=matched_pos["pos2"], + pos3=matched_pos.get("pos3"), + ambiguous_regex_type=regex_type, ) - else: - for pattern_re, regex_type in [ - (GENOMIC_DELETION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), - (GENOMIC_DELETION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), - ]: - match = pattern_re.match(input_string) - - if match: - matched_pos = dict() - match_dict = match.groupdict() - for k in match_dict: - v = match_dict[k] - if v: - v = int(v) if v != "?" else v - - matched_pos[k] = v - - return GenomicDeletionAmbiguousToken( - input_string=og_input_string, - token=input_string, - pos0=matched_pos["pos0"], - pos1=matched_pos.get("pos1"), - pos2=matched_pos["pos2"], - pos3=matched_pos.get("pos3"), - ambiguous_regex_type=regex_type, - ) + return None diff --git a/variation/tokenizers/genomic_delins.py b/variation/tokenizers/genomic_delins.py index 9c7603d4..888d3145 100644 --- a/variation/tokenizers/genomic_delins.py +++ b/variation/tokenizers/genomic_delins.py @@ -39,3 +39,5 @@ def match(self, input_string: str) -> Optional[GenomicDelInsToken]: pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, inserted_sequence=match_dict["inserted_sequence"], ) + + return None diff --git a/variation/tokenizers/genomic_duplication.py b/variation/tokenizers/genomic_duplication.py index f03246ae..a20e1f6d 100644 --- a/variation/tokenizers/genomic_duplication.py +++ b/variation/tokenizers/genomic_duplication.py @@ -46,53 +46,52 @@ def match(self, input_string: str) -> Optional[GenomicDuplicationToken]: pos0=int(match_dict["pos0"]), pos1=int(match_dict["pos1"]) if match_dict["pos1"] else None, ) + + # Going to try ambiguous genomic duplications + match = GENOMIC_DUPLICATION_AMBIGUOUS_1.match(input_string) + if match: + match_dict = match.groupdict() + pos0 = match_dict["pos0"] + pos1 = match_dict["pos1"] + pos2 = match_dict["pos2"] + pos3 = match_dict["pos3"] + + # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported + if not any(((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?"))): + return GenomicDuplicationAmbiguousToken( + input_string=og_input_string, + token=input_string, + pos0=int(pos0) if pos0 != "?" else pos0, + pos1=int(pos1) if pos1 != "?" else pos1, + pos2=int(pos2) if pos2 != "?" else pos2, + pos3=int(pos3) if pos3 != "?" else pos3, + ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + ) + else: - # Going to try ambiguous genomic duplications - match = GENOMIC_DUPLICATION_AMBIGUOUS_1.match(input_string) - if match: - match_dict = match.groupdict() - pos0 = match_dict["pos0"] - pos1 = match_dict["pos1"] - pos2 = match_dict["pos2"] - pos3 = match_dict["pos3"] + for pattern_re, regex_type in [ + (GENOMIC_DUPLICATION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), + (GENOMIC_DUPLICATION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), + ]: + match = pattern_re.match(input_string) + + if match: + matched_pos = {} + match_dict = match.groupdict() + for k in match_dict: + v = match_dict[k] + if v: + v = int(v) if v != "?" else v + + matched_pos[k] = v - # (?_?)_(#_#), (#_#)_(?, ?), (?_?)_(?_?) are not supported - if not any( - ((pos0 == "?" and pos1 == "?"), (pos2 == "?" and pos3 == "?")) - ): return GenomicDuplicationAmbiguousToken( input_string=og_input_string, token=input_string, - pos0=int(pos0) if pos0 != "?" else pos0, - pos1=int(pos1) if pos1 != "?" else pos1, - pos2=int(pos2) if pos2 != "?" else pos2, - pos3=int(pos3) if pos3 != "?" else pos3, - ambiguous_regex_type=AmbiguousRegexType.REGEX_1, + pos0=matched_pos["pos0"], + pos1=matched_pos.get("pos1"), + pos2=matched_pos["pos2"], + pos3=matched_pos.get("pos3"), + ambiguous_regex_type=regex_type, ) - - else: - for pattern_re, regex_type in [ - (GENOMIC_DUPLICATION_AMBIGUOUS_2, AmbiguousRegexType.REGEX_2), - (GENOMIC_DUPLICATION_AMBIGUOUS_3, AmbiguousRegexType.REGEX_3), - ]: - match = pattern_re.match(input_string) - - if match: - matched_pos = dict() - match_dict = match.groupdict() - for k in match_dict: - v = match_dict[k] - if v: - v = int(v) if v != "?" else v - - matched_pos[k] = v - - return GenomicDuplicationAmbiguousToken( - input_string=og_input_string, - token=input_string, - pos0=matched_pos["pos0"], - pos1=matched_pos.get("pos1"), - pos2=matched_pos["pos2"], - pos3=matched_pos.get("pos3"), - ambiguous_regex_type=regex_type, - ) + return None diff --git a/variation/tokenizers/genomic_insertion.py b/variation/tokenizers/genomic_insertion.py index 43576fe7..3319fd50 100644 --- a/variation/tokenizers/genomic_insertion.py +++ b/variation/tokenizers/genomic_insertion.py @@ -42,3 +42,5 @@ def match(self, input_string: str) -> Optional[GenomicInsertionToken]: pos1=pos1, inserted_sequence=inserted_sequence, ) + + return None diff --git a/variation/tokenizers/genomic_substitution.py b/variation/tokenizers/genomic_substitution.py index 80242388..3cafed25 100644 --- a/variation/tokenizers/genomic_substitution.py +++ b/variation/tokenizers/genomic_substitution.py @@ -41,3 +41,5 @@ def match(self, input_string: str) -> Optional[GenomicSubstitutionToken]: ref=match_dict["ref"], alt=match_dict["alt"], ) + + return None diff --git a/variation/tokenizers/hgvs.py b/variation/tokenizers/hgvs.py index 507d5890..c98e33a9 100644 --- a/variation/tokenizers/hgvs.py +++ b/variation/tokenizers/hgvs.py @@ -12,7 +12,7 @@ class HGVS(Tokenizer): """The HGVS tokenizer class.""" splitter = re.compile( - r"^(?P(NC_|NM_|NP_|ENSP|ENST)[^:\s]+):(?P[cgnpr])\.(?P\S+)$" # noqa: E501 + r"^(?P(NC_|NM_|NP_|ENSP|ENST)[^:\s]+):(?P[cgnpr])\.(?P\S+)$" ) def match(self, input_string: str) -> Optional[HgvsToken]: @@ -32,5 +32,5 @@ def match(self, input_string: str) -> Optional[HgvsToken]: coordinate_type=AnnotationLayer(match_dict["coordinate"]), change=match_dict["change"], ) - else: - return None + + return None diff --git a/variation/tokenizers/protein_deletion.py b/variation/tokenizers/protein_deletion.py index 903611aa..eb459a8b 100644 --- a/variation/tokenizers/protein_deletion.py +++ b/variation/tokenizers/protein_deletion.py @@ -92,3 +92,4 @@ def match(self, input_string: str) -> Optional[ProteinDeletionToken]: pos1=pos1, deleted_sequence=one_letter_del_seq, ) + return None diff --git a/variation/tokenizers/protein_delins.py b/variation/tokenizers/protein_delins.py index f80af492..5bfe086e 100644 --- a/variation/tokenizers/protein_delins.py +++ b/variation/tokenizers/protein_delins.py @@ -85,3 +85,4 @@ def match(self, input_string: str) -> Optional[ProteinDelInsToken]: pos1=pos1, inserted_sequence=one_letter_ins_seq, ) + return None diff --git a/variation/tokenizers/protein_insertion.py b/variation/tokenizers/protein_insertion.py index 0f7c8861..6b482eeb 100644 --- a/variation/tokenizers/protein_insertion.py +++ b/variation/tokenizers/protein_insertion.py @@ -67,3 +67,5 @@ def match(self, input_string: str) -> Optional[ProteinInsertionToken]: pos1=pos1, inserted_sequence=one_letter_ins_seq, ) + + return None diff --git a/variation/tokenizers/protein_reference_agree.py b/variation/tokenizers/protein_reference_agree.py index a14acd8a..89b74e75 100644 --- a/variation/tokenizers/protein_reference_agree.py +++ b/variation/tokenizers/protein_reference_agree.py @@ -1,4 +1,5 @@ """A module for Reference Agree Tokenization.""" +import contextlib from typing import Optional from bioutils.sequences import aa1_to_aa3, aa3_to_aa1 @@ -42,10 +43,8 @@ def match(self, input_string: str) -> Optional[ProteinReferenceAgreeToken]: aa1_to_aa3(ref) except KeyError: # maybe 3 letter AA code was used - try: + with contextlib.suppress(KeyError): aa1_ref = aa3_to_aa1(ref) - except KeyError: - pass else: aa1_ref = ref @@ -56,3 +55,5 @@ def match(self, input_string: str) -> Optional[ProteinReferenceAgreeToken]: pos=pos, ref=aa1_ref, ) + + return None diff --git a/variation/tokenizers/protein_substitution.py b/variation/tokenizers/protein_substitution.py index f66e28f3..dcc0fccd 100644 --- a/variation/tokenizers/protein_substitution.py +++ b/variation/tokenizers/protein_substitution.py @@ -74,5 +74,7 @@ def match( if aa1_alt == "*": return ProteinStopGainToken(**params) - else: - return ProteinSubstitutionToken(**params) + + return ProteinSubstitutionToken(**params) + + return None diff --git a/variation/tokenizers/tokenizer.py b/variation/tokenizers/tokenizer.py index 5f310913..3175d4d2 100644 --- a/variation/tokenizers/tokenizer.py +++ b/variation/tokenizers/tokenizer.py @@ -1,6 +1,6 @@ """Module for Tokenization.""" from abc import ABC, abstractmethod -from typing import Optional, Tuple +from typing import ClassVar, Dict, Optional, Tuple from cool_seq_tool.schemas import AnnotationLayer @@ -10,7 +10,9 @@ class Tokenizer(ABC): """The tokenizer class.""" - coord_types = {k: v.value for k, v in AnnotationLayer.__members__.items()} + coord_types: ClassVar[Dict[str, str]] = { + k: v.value for k, v in AnnotationLayer.__members__.items() + } @abstractmethod def match(self, input_string: str) -> Optional[Token]: diff --git a/variation/translators/__init__.py b/variation/translators/__init__.py index de52c9be..703976cc 100644 --- a/variation/translators/__init__.py +++ b/variation/translators/__init__.py @@ -19,3 +19,26 @@ from .protein_reference_agree import ProteinReferenceAgree from .protein_stop_gain import ProteinStopGain from .protein_substitution import ProteinSubstitution + +__all__ = [ + "Amplification", + "CdnaDeletion", + "CdnaDelIns", + "CdnaInsertion", + "CdnaReferenceAgree", + "CdnaSubstitution", + "GenomicDeletion", + "GenomicDeletionAmbiguous", + "GenomicDelIns", + "GenomicDuplication", + "GenomicDuplicationAmbiguous", + "GenomicInsertion", + "GenomicReferenceAgree", + "GenomicSubstitution", + "ProteinDeletion", + "ProteinDelIns", + "ProteinInsertion", + "ProteinReferenceAgree", + "ProteinStopGain", + "ProteinSubstitution", +] diff --git a/variation/translators/ambiguous_translator_base.py b/variation/translators/ambiguous_translator_base.py index a796b665..f287f5c2 100644 --- a/variation/translators/ambiguous_translator_base.py +++ b/variation/translators/ambiguous_translator_base.py @@ -188,29 +188,29 @@ async def translate( if w: warnings.append(w) return None + + # assembly is either 37 or 38 + if assembly == ClinVarAssembly.GRCH37: + grch38_data = await self.get_grch38_data_ambiguous( + classification, errors, validation_result.accession + ) + if errors: + warnings += errors + return None + + ac = grch38_data.ac + pos0 = grch38_data.pos0 + pos1 = grch38_data.pos1 + pos2 = grch38_data.pos2 + pos3 = grch38_data.pos3 else: - # assembly is either 37 or 38 - if assembly == ClinVarAssembly.GRCH37: - grch38_data = await self.get_grch38_data_ambiguous( - classification, errors, validation_result.accession - ) - if errors: - warnings += errors - return None - - ac = grch38_data.ac - pos0 = grch38_data.pos0 - pos1 = grch38_data.pos1 - pos2 = grch38_data.pos2 - pos3 = grch38_data.pos3 - else: - ac = validation_result.accession - pos0 = classification.pos0 - pos1 = classification.pos1 - pos2 = classification.pos2 - pos3 = classification.pos3 - - assembly = ClinVarAssembly.GRCH38 + ac = validation_result.accession + pos0 = classification.pos0 + pos1 = classification.pos1 + pos2 = classification.pos2 + pos3 = classification.pos3 + + assembly = ClinVarAssembly.GRCH38 else: ac = validation_result.accession pos0 = classification.pos0 @@ -306,5 +306,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/amplification.py b/variation/translators/amplification.py index ef2b6411..cbd4034a 100644 --- a/variation/translators/amplification.py +++ b/variation/translators/amplification.py @@ -65,5 +65,5 @@ async def translate( return TranslationResult( vrs_variation=vrs_cx, validation_result=validation_result ) - else: - return None + + return None diff --git a/variation/translators/cdna_deletion.py b/variation/translators/cdna_deletion.py index 9eb99199..96fdb5a9 100644 --- a/variation/translators/cdna_deletion.py +++ b/variation/translators/cdna_deletion.py @@ -53,7 +53,7 @@ async def translate( cds_start = validation_result.cds_start classification: CdnaDeletionClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -63,4 +63,3 @@ async def translate( warnings, cds_start=cds_start, ) - return translation_result diff --git a/variation/translators/cdna_delins.py b/variation/translators/cdna_delins.py index 815c30f5..1498b93c 100644 --- a/variation/translators/cdna_delins.py +++ b/variation/translators/cdna_delins.py @@ -53,7 +53,7 @@ async def translate( cds_start = validation_result.cds_start classification: CdnaDelInsClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -64,4 +64,3 @@ async def translate( cds_start=cds_start, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/cdna_insertion.py b/variation/translators/cdna_insertion.py index 8654aca7..f0d5d029 100644 --- a/variation/translators/cdna_insertion.py +++ b/variation/translators/cdna_insertion.py @@ -53,7 +53,7 @@ async def translate( cds_start = validation_result.cds_start classification: CdnaInsertionClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -64,4 +64,3 @@ async def translate( cds_start=cds_start, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/cdna_reference_agree.py b/variation/translators/cdna_reference_agree.py index d7392536..92fe1a09 100644 --- a/variation/translators/cdna_reference_agree.py +++ b/variation/translators/cdna_reference_agree.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -65,4 +65,3 @@ async def translate( warnings, cds_start=cds_start, ) - return translation_result diff --git a/variation/translators/cdna_substitution.py b/variation/translators/cdna_substitution.py index 38a928fd..755bdf0e 100644 --- a/variation/translators/cdna_substitution.py +++ b/variation/translators/cdna_substitution.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -67,4 +67,3 @@ async def translate( ref=classification.ref, alt=classification.alt, ) - return translation_result diff --git a/variation/translators/genomic_del_dup_base.py b/variation/translators/genomic_del_dup_base.py index 3103823e..80c879de 100644 --- a/variation/translators/genomic_del_dup_base.py +++ b/variation/translators/genomic_del_dup_base.py @@ -117,44 +117,46 @@ async def translate( if w: warnings.append(w) return None - else: - # assembly is either 37 or 38 - if assembly == ClinVarAssembly.GRCH37: - grch38_data = await self.get_grch38_data( - classification, errors, validation_result.accession - ) - if errors: - warnings += errors - return None - pos0 = grch38_data.pos0 - 1 - if grch38_data.pos1 is None: - pos1 = grch38_data.pos0 - else: - pos1 = grch38_data.pos1 - residue_mode = ResidueMode.INTER_RESIDUE - ac = grch38_data.ac - - if alt_type == AltType.DELETION: - if classification.nomenclature == Nomenclature.GNOMAD_VCF: - ref = classification.matching_tokens[0].ref - invalid_ref_msg = self.validate_reference_sequence( - ac, - pos0, - pos0 + (len(ref) - 1), - ref, - residue_mode=residue_mode, - ) - if invalid_ref_msg: - warnings.append(invalid_ref_msg) - return None + # assembly is either 37 or 38 + if assembly == ClinVarAssembly.GRCH37: + grch38_data = await self.get_grch38_data( + classification, errors, validation_result.accession + ) + if errors: + warnings += errors + return None + + pos0 = grch38_data.pos0 - 1 + if grch38_data.pos1 is None: + pos1 = grch38_data.pos0 else: - pos0 = classification.pos0 - pos1 = classification.pos1 - ac = validation_result.accession - grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) + pos1 = grch38_data.pos1 + residue_mode = ResidueMode.INTER_RESIDUE + ac = grch38_data.ac - assembly = ClinVarAssembly.GRCH38 + if ( + alt_type == AltType.DELETION + and classification.nomenclature == Nomenclature.GNOMAD_VCF + ): + ref = classification.matching_tokens[0].ref + invalid_ref_msg = self.validate_reference_sequence( + ac, + pos0, + pos0 + (len(ref) - 1), + ref, + residue_mode=residue_mode, + ) + if invalid_ref_msg: + warnings.append(invalid_ref_msg) + return None + else: + pos0 = classification.pos0 + pos1 = classification.pos1 + ac = validation_result.accession + grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1) + + assembly = ClinVarAssembly.GRCH38 else: pos0 = classification.pos0 pos1 = classification.pos1 @@ -177,10 +179,7 @@ async def translate( ac = grch38_data.ac pos0 = grch38_data.pos0 - 1 - if grch38_data.pos1 is None: - pos1 = grch38_data.pos0 - else: - pos1 = grch38_data.pos1 + pos1 = grch38_data.pos0 if grch38_data.pos1 is None else grch38_data.pos1 residue_mode = ResidueMode.INTER_RESIDUE self.is_valid(classification.gene_token, ac, pos0, pos1, errors) @@ -211,11 +210,13 @@ async def translate( return None alt = None - if classification.nomenclature == Nomenclature.GNOMAD_VCF: - if alt_type == AltType.DELETION: - pos0 -= 1 - pos1 -= 1 - alt = classification.matching_tokens[0].alt + if ( + classification.nomenclature == Nomenclature.GNOMAD_VCF + and alt_type == AltType.DELETION + ): + pos0 -= 1 + pos1 -= 1 + alt = classification.matching_tokens[0].alt if alt_type == AltType.INSERTION: alt = classification.inserted_sequence @@ -268,5 +269,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_delins.py b/variation/translators/genomic_delins.py index a9934b78..9a6d6dcc 100644 --- a/variation/translators/genomic_delins.py +++ b/variation/translators/genomic_delins.py @@ -125,5 +125,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_insertion.py b/variation/translators/genomic_insertion.py index eec2b556..03d636bc 100644 --- a/variation/translators/genomic_insertion.py +++ b/variation/translators/genomic_insertion.py @@ -126,5 +126,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_reference_agree.py b/variation/translators/genomic_reference_agree.py index d4719993..f1b7d4a9 100644 --- a/variation/translators/genomic_reference_agree.py +++ b/variation/translators/genomic_reference_agree.py @@ -122,5 +122,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/genomic_substitution.py b/variation/translators/genomic_substitution.py index 789c6015..6b273337 100644 --- a/variation/translators/genomic_substitution.py +++ b/variation/translators/genomic_substitution.py @@ -146,5 +146,5 @@ async def translate( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/translators/protein_deletion.py b/variation/translators/protein_deletion.py index 5369fd5b..70910d44 100644 --- a/variation/translators/protein_deletion.py +++ b/variation/translators/protein_deletion.py @@ -53,7 +53,7 @@ async def translate( # First will translate valid result to VRS Allele classification: ProteinDeletionClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -62,4 +62,3 @@ async def translate( AnnotationLayer.PROTEIN, warnings, ) - return translation_result diff --git a/variation/translators/protein_delins.py b/variation/translators/protein_delins.py index e72c8713..11f1d77f 100644 --- a/variation/translators/protein_delins.py +++ b/variation/translators/protein_delins.py @@ -53,7 +53,7 @@ async def translate( # First will translate valid result to VRS Allele classification: ProteinDelInsClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -63,4 +63,3 @@ async def translate( warnings, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/protein_insertion.py b/variation/translators/protein_insertion.py index 2bf9d0be..0dfb3983 100644 --- a/variation/translators/protein_insertion.py +++ b/variation/translators/protein_insertion.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos0, @@ -65,4 +65,3 @@ async def translate( warnings, alt=classification.inserted_sequence, ) - return translation_result diff --git a/variation/translators/protein_reference_agree.py b/variation/translators/protein_reference_agree.py index 2cfac144..05e9f3d8 100644 --- a/variation/translators/protein_reference_agree.py +++ b/variation/translators/protein_reference_agree.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -65,4 +65,3 @@ async def translate( warnings, ref=classification.ref, ) - return translation_result diff --git a/variation/translators/protein_stop_gain.py b/variation/translators/protein_stop_gain.py index 1d1fc7a1..88144412 100644 --- a/variation/translators/protein_stop_gain.py +++ b/variation/translators/protein_stop_gain.py @@ -53,7 +53,7 @@ async def translate( # First will translate valid result to VRS Allele classification: ProteinStopGainClassification = validation_result.classification - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -63,4 +63,3 @@ async def translate( warnings, alt=classification.alt, ) - return translation_result diff --git a/variation/translators/protein_substitution.py b/variation/translators/protein_substitution.py index b58213b5..a1933090 100644 --- a/variation/translators/protein_substitution.py +++ b/variation/translators/protein_substitution.py @@ -55,7 +55,7 @@ async def translate( validation_result.classification ) - translation_result = await self.get_p_or_cdna_translation_result( + return await self.get_p_or_cdna_translation_result( endpoint_name, validation_result, classification.pos, @@ -66,4 +66,3 @@ async def translate( ref=classification.ref, alt=classification.alt, ) - return translation_result diff --git a/variation/translators/translator.py b/variation/translators/translator.py index 2f01c011..9d8bb661 100644 --- a/variation/translators/translator.py +++ b/variation/translators/translator.py @@ -106,15 +106,14 @@ def is_valid( gene_end = None for ext in gene_token.gene.extensions: - if ext.name == "ensembl_locations": - if ext.value: - ensembl_loc = ext.value[0] - gene_start = ensembl_loc["start"] - gene_end = ensembl_loc["end"] - 1 + if ext.name == "ensembl_locations" and ext.value: + ensembl_loc = ext.value[0] + gene_start = ensembl_loc["start"] + gene_end = ensembl_loc["end"] - 1 if gene_start is None and gene_end is None: errors.append( - f"gene-normalizer unable to find Ensembl location for: {gene_token.token}" # noqa: E501 + f"gene-normalizer unable to find Ensembl location for: {gene_token.token}" ) for pos in [pos0, pos1, pos2, pos3]: @@ -124,7 +123,7 @@ def is_valid( if not (gene_start <= pos <= gene_end): errors.append( - f"Inter-residue position {pos} out of index on {alt_ac} on gene, {gene_token.token}" # noqa: E501 + f"Inter-residue position {pos} out of index on {alt_ac} on gene, {gene_token.token}" ) def validate_reference_sequence( @@ -245,5 +244,5 @@ async def get_p_or_cdna_translation_result( og_ac=validation_result.accession, validation_result=validation_result, ) - else: - return None + + return None diff --git a/variation/utils.py b/variation/utils.py index 0f088555..f76d44ab 100644 --- a/variation/utils.py +++ b/variation/utils.py @@ -1,4 +1,5 @@ """Module for general functionality throughout the app""" +import contextlib import re from typing import Dict, List, Literal, Optional, Tuple, Union @@ -101,10 +102,8 @@ def get_aa1_codes(aa: str) -> Optional[str]: _aa1_to_aa3(aa) except KeyError: # see if it's 3 AA - try: + with contextlib.suppress(KeyError): aa1 = _aa3_to_aa1(aa) - except KeyError: - pass else: aa1 = aa @@ -147,11 +146,16 @@ def get_ambiguous_type( (pos0 == "?", isinstance(pos1, int), isinstance(pos2, int), pos3 is None) ): ambiguous_type = AmbiguousType.AMBIGUOUS_5 - elif ambiguous_regex_type == AmbiguousRegexType.REGEX_3: - if all( - (isinstance(pos0, int), pos1 is None, isinstance(pos2, int), pos3 == "?") - ): - ambiguous_type = AmbiguousType.AMBIGUOUS_7 + elif all( + ( + ambiguous_regex_type == AmbiguousRegexType.REGEX_3, + isinstance(pos0, int), + pos1 is None, + isinstance(pos2, int), + pos3 == "?", + ) + ): + ambiguous_type = AmbiguousType.AMBIGUOUS_7 return ambiguous_type diff --git a/variation/validate.py b/variation/validate.py index 088469df..15781ea6 100644 --- a/variation/validate.py +++ b/variation/validate.py @@ -108,7 +108,7 @@ async def perform(self, classification: Classification) -> ValidationSummary: if not found_valid_result: warnings = [ - f"Unable to find valid result for classification: {invalid_classification}" # noqa: E501 + f"Unable to find valid result for classification: {invalid_classification}" ] else: warnings = [] diff --git a/variation/validators/__init__.py b/variation/validators/__init__.py index 3215eff8..33f8f2e1 100644 --- a/variation/validators/__init__.py +++ b/variation/validators/__init__.py @@ -20,3 +20,27 @@ from .protein_reference_agree import ProteinReferenceAgree from .protein_stop_gain import ProteinStopGain from .protein_substitution import ProteinSubstitution + +__all__ = [ + "Amplification", + "CdnaDeletion", + "CdnaDelIns", + "CdnaInsertion", + "CdnaReferenceAgree", + "CdnaSubstitution", + "GenomicBase", + "GenomicDeletion", + "GenomicDeletionAmbiguous", + "GenomicDelIns", + "GenomicDuplication", + "GenomicDuplicationAmbiguous", + "GenomicInsertion", + "GenomicReferenceAgree", + "GenomicSubstitution", + "ProteinDeletion", + "ProteinDelIns", + "ProteinInsertion", + "ProteinReferenceAgree", + "ProteinStopGain", + "ProteinSubstitution", +] diff --git a/variation/validators/genomic_base.py b/variation/validators/genomic_base.py index b0400d1c..eafad2df 100644 --- a/variation/validators/genomic_base.py +++ b/variation/validators/genomic_base.py @@ -58,7 +58,7 @@ def get_nc_accession(self, identifier: str) -> Optional[str]: identifier ) except KeyError: - logger.warning("Data Proxy unable to get metadata" f"for {identifier}") + logger.warning("Data Proxy unable to get metadata for %s", identifier) else: aliases = [a for a in translated_identifiers if a.startswith("refseq:NC_")] if aliases: diff --git a/variation/validators/genomic_deletion.py b/variation/validators/genomic_deletion.py index 7b2b3f87..cb3c9c4f 100644 --- a/variation/validators/genomic_deletion.py +++ b/variation/validators/genomic_deletion.py @@ -47,38 +47,40 @@ async def get_valid_invalid_results( if invalid_ac_pos: errors.append(invalid_ac_pos) else: - if classification.nomenclature in { - Nomenclature.FREE_TEXT, - Nomenclature.HGVS, - }: + if ( + classification.nomenclature + in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + } + and classification.deleted_sequence + ): # Validate deleted sequence # HGVS deleted sequence includes start and end - if classification.deleted_sequence: - invalid_del_seq_message = self.validate_reference_sequence( - alt_ac, - classification.pos0, - classification.pos1 - if classification.pos1 - else classification.pos0, - classification.deleted_sequence, - ) - - if invalid_del_seq_message: - errors.append(invalid_del_seq_message) - - if not errors: - if classification.nomenclature == Nomenclature.GNOMAD_VCF: - # Validate reference sequence - ref = classification.matching_tokens[0].ref - validate_ref_msg = self.validate_reference_sequence( + invalid_del_seq_message = self.validate_reference_sequence( alt_ac, - classification.pos0 - 1, - end_pos=classification.pos0 + (len(ref) - 1), - expected_ref=ref, + classification.pos0, + classification.pos1 + if classification.pos1 + else classification.pos0, + classification.deleted_sequence, ) - if validate_ref_msg: - errors.append(validate_ref_msg) + if invalid_del_seq_message: + errors.append(invalid_del_seq_message) + + if not errors and classification.nomenclature == Nomenclature.GNOMAD_VCF: + # Validate reference sequence + ref = classification.matching_tokens[0].ref + validate_ref_msg = self.validate_reference_sequence( + alt_ac, + classification.pos0 - 1, + end_pos=classification.pos0 + (len(ref) - 1), + expected_ref=ref, + ) + + if validate_ref_msg: + errors.append(validate_ref_msg) if not errors and classification.gene_token: # Validate positions exist within gene range diff --git a/variation/validators/genomic_deletion_ambiguous.py b/variation/validators/genomic_deletion_ambiguous.py index b0ddb8a0..ab999e53 100644 --- a/variation/validators/genomic_deletion_ambiguous.py +++ b/variation/validators/genomic_deletion_ambiguous.py @@ -48,10 +48,10 @@ async def get_valid_invalid_results( if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: start_pos = classification.pos0 end_pos = classification.pos3 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2: - start_pos = classification.pos1 - end_pos = classification.pos2 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5: + elif ( + classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2 + or classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5 + ): start_pos = classification.pos1 end_pos = classification.pos2 elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: diff --git a/variation/validators/genomic_duplication_ambiguous.py b/variation/validators/genomic_duplication_ambiguous.py index ea522393..9479d744 100644 --- a/variation/validators/genomic_duplication_ambiguous.py +++ b/variation/validators/genomic_duplication_ambiguous.py @@ -48,10 +48,10 @@ async def get_valid_invalid_results( if classification.ambiguous_type == AmbiguousType.AMBIGUOUS_1: start_pos = classification.pos0 end_pos = classification.pos3 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2: - start_pos = classification.pos1 - end_pos = classification.pos2 - elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5: + elif ( + classification.ambiguous_type == AmbiguousType.AMBIGUOUS_2 + or classification.ambiguous_type == AmbiguousType.AMBIGUOUS_5 + ): start_pos = classification.pos1 end_pos = classification.pos2 elif classification.ambiguous_type == AmbiguousType.AMBIGUOUS_7: diff --git a/variation/validators/protein_deletion.py b/variation/validators/protein_deletion.py index 5756fb58..c8fc9d0d 100644 --- a/variation/validators/protein_deletion.py +++ b/variation/validators/protein_deletion.py @@ -74,22 +74,25 @@ async def get_valid_invalid_results( errors.append(invalid_aa1_seq_msg) # Validate that deleted sequence matches expected - if classification.nomenclature in { - Nomenclature.FREE_TEXT, - Nomenclature.HGVS, - }: + if ( + classification.nomenclature + in { + Nomenclature.FREE_TEXT, + Nomenclature.HGVS, + } + and classification.deleted_sequence + and classification.pos1 is not None + ): # HGVS deleted sequence includes start and end - if classification.deleted_sequence: - if classification.pos1 is not None: - invalid_del_seq_msg = self.validate_reference_sequence( - p_ac, - classification.pos0, - classification.pos1, - classification.deleted_sequence, - ) - - if invalid_del_seq_msg: - errors.append(invalid_del_seq_msg) + invalid_del_seq_msg = self.validate_reference_sequence( + p_ac, + classification.pos0, + classification.pos1, + classification.deleted_sequence, + ) + + if invalid_del_seq_msg: + errors.append(invalid_del_seq_msg) validation_results.append( ValidationResult( diff --git a/variation/validators/validator.py b/variation/validators/validator.py index 6d51a8f6..f7fb8b0d 100644 --- a/variation/validators/validator.py +++ b/variation/validators/validator.py @@ -112,10 +112,7 @@ async def validate(self, classification: Classification) -> List[ValidationResul errors=errors, ) ] - validation_results = await self.get_valid_invalid_results( - classification, accessions - ) - return validation_results + return await self.get_valid_invalid_results(classification, accessions) def get_protein_accessions(self, gene_token: GeneToken, errors: List) -> List[str]: """Get accessions for variations with protein reference sequence. @@ -194,31 +191,33 @@ async def _validate_gene_pos( if gene_start_end["start"] is None and gene_start_end["end"] is None: return f"gene-normalizer unable to find Ensembl location for gene: {gene}" - else: - assembly = await self.uta.get_chr_assembly(alt_ac) - if assembly: - # Not in GRCh38 assembly. Gene normalizer only uses 38, so we - # need to liftover to GRCh37 coords - chromosome, assembly = assembly - for key in gene_start_end.keys(): - gene_pos = gene_start_end[key] - gene_pos_liftover = self.uta.liftover_38_to_37.convert_coordinate( - chromosome, gene_pos - ) - if gene_pos_liftover is None or len(gene_pos_liftover) == 0: - return f"{gene_pos} does not exist on {chromosome}" - else: - gene_start_end[key] = gene_pos_liftover[0][1] - - gene_start = gene_start_end["start"] - gene_end = gene_start_end["end"] - - for pos in [pos0, pos1, pos2, pos3]: - if pos not in ["?", None]: - if residue_mode == "residue": - pos -= 1 - if not (gene_start <= pos <= gene_end): - return f"Position {pos} out of index on {alt_ac} on gene, {gene}" # noqa: E501 + + assembly = await self.uta.get_chr_assembly(alt_ac) + if assembly: + # Not in GRCh38 assembly. Gene normalizer only uses 38, so we + # need to liftover to GRCh37 coords + chromosome, assembly = assembly + for key in gene_start_end: + gene_pos = gene_start_end[key] + gene_pos_liftover = self.uta.liftover_38_to_37.convert_coordinate( + chromosome, gene_pos + ) + if gene_pos_liftover is None or len(gene_pos_liftover) == 0: + return f"{gene_pos} does not exist on {chromosome}" + + gene_start_end[key] = gene_pos_liftover[0][1] + + gene_start = gene_start_end["start"] + gene_end = gene_start_end["end"] + + for pos in [pos0, pos1, pos2, pos3]: + if pos not in ["?", None]: + if residue_mode == "residue": + pos -= 1 + if not (gene_start <= pos <= gene_end): + return f"Position {pos} out of index on {alt_ac} on gene, {gene}" + + return None def validate_reference_sequence( self, @@ -302,7 +301,7 @@ def validate_ac_and_pos( else: if end_pos: if not ref_len or (end_pos - start_pos != ref_len): - msg = f"Positions ({start_pos}, {end_pos}) not valid on accession ({ac})" # noqa: E501 + msg = f"Positions ({start_pos}, {end_pos}) not valid on accession ({ac})" else: if not ref_len: msg = f"Position ({start_pos}) not valid on accession ({ac})" @@ -338,8 +337,8 @@ def validate_5_prime_to_3_prime( "should be listed from 5' to 3'" ) break - else: - prev_pos = pos + + prev_pos = pos return invalid_msg def validate_ambiguous_classification( @@ -413,15 +412,12 @@ def validate_protein_hgvs_classification( else: errors.append(f"`aa0` not valid amino acid(s): {classification.aa0}") - if hasattr(classification, "aa1"): - if classification.aa1: - aa1_codes = get_aa1_codes(classification.aa1) - if aa1_codes: - classification.aa1 = aa1_codes - else: - errors.append( - f"`aa1` not valid amino acid(s): {classification.aa1}" - ) + if hasattr(classification, "aa1") and classification.aa1: + aa1_codes = get_aa1_codes(classification.aa1) + if aa1_codes: + classification.aa1 = aa1_codes + else: + errors.append(f"`aa1` not valid amino acid(s): {classification.aa1}") if hasattr(classification, "inserted_sequence"): ins_codes = get_aa1_codes(classification.inserted_sequence) diff --git a/variation/vrs_representation.py b/variation/vrs_representation.py index bde025f9..1d12c4b3 100644 --- a/variation/vrs_representation.py +++ b/variation/vrs_representation.py @@ -46,10 +46,9 @@ def get_start_end( errors.append("Start/End must be valid ints") return None - if coordinate == "c": - if cds_start: - start += cds_start - end += cds_start + if coordinate == "c" and cds_start: + start += cds_start + end += cds_start return start, end @staticmethod