Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: return MANE gene(s) in normalize endpoint for genomic queries #576

Merged
merged 2 commits into from
Jul 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions src/variation/hgvs_dup_del_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.schemas import ResidueMode
from ga4gh.core import ga4gh_identify
from ga4gh.core import entity_models, ga4gh_identify
from ga4gh.vrs import models, normalize

from variation.schemas.normalize_response_schema import HGVSDupDelModeOption
Expand Down Expand Up @@ -49,6 +49,7 @@ def default_mode(
baseline_copies: int | None = None,
copy_change: models.CopyChange | None = None,
alt: str | None = None,
extensions: list[entity_models.Extension] | None = None,
) -> dict | None:
"""Use default characteristics to return a variation.
If baseline_copies not provided and endpoints are ambiguous - copy_number_change
Expand All @@ -65,31 +66,40 @@ def default_mode(
:param baseline_copies: Baseline copies for Copy Number Count variation
:param copy_change: copy change for Copy Number Change Variation
:param alt: Alteration
:param extensions: List of extensions for variation
:raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``.
:return: VRS Variation object represented as a dict
"""
_check_supported_alt_type(alt_type)

variation = None
if not baseline_copies and alt_type in AMBIGUOUS_REGIONS:
variation = self.copy_number_change_mode(alt_type, location, copy_change)
variation = self.copy_number_change_mode(
alt_type, location, copy_change, extensions=extensions
)
elif baseline_copies:
variation = self.copy_number_count_mode(alt_type, location, baseline_copies)
variation = self.copy_number_count_mode(
alt_type, location, baseline_copies, extensions=extensions
)
else:
variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt)
variation = self.allele_mode(
location, alt_type, vrs_seq_loc_ac, alt, extensions=extensions
)
return variation

def copy_number_count_mode(
self,
alt_type: AltType,
location: dict,
baseline_copies: int,
extensions: list[entity_models.Extension] | None = None,
) -> dict:
"""Return a VRS Copy Number Variation.

:param alt_type: The type of alteration. Must be one of ``DELS_DUPS``.
:param location: VRS SequenceLocation
:param baseline_copies: Baseline copies number
:param extensions: List of extensions for variation
:raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``.
:return: VRS Copy Number object represented as a dict
"""
Expand All @@ -98,7 +108,9 @@ def copy_number_count_mode(
copies = baseline_copies - 1 if alt_type in DELS else baseline_copies + 1
seq_loc = models.SequenceLocation(**location)
seq_loc.id = ga4gh_identify(seq_loc)
cn = models.CopyNumberCount(copies=copies, location=seq_loc)
cn = models.CopyNumberCount(
copies=copies, location=seq_loc, extensions=extensions
)
cn.id = ga4gh_identify(cn)
return cn.model_dump(exclude_none=True)

Expand All @@ -107,12 +119,14 @@ def copy_number_change_mode(
alt_type: AltType,
location: dict,
copy_change: models.CopyChange | None = None,
extensions: list[entity_models.Extension] | None = None,
) -> dict:
"""Return copy number change variation

:param alt_type: The type of alteration. Must be one of ``DELS_DUPS``.
:param location: VRS SequenceLocation
:param copy_change: The copy change
:param extensions: List of extensions for variation
:raises ValueError: If ``alt_type`` not one of ``DELS_DUPS``.
:return: Copy Number Change variation as a dict
"""
Expand All @@ -127,7 +141,9 @@ def copy_number_change_mode(

seq_loc = models.SequenceLocation(**location)
seq_loc.id = ga4gh_identify(seq_loc)
cx = models.CopyNumberChange(location=seq_loc, copyChange=copy_change)
cx = models.CopyNumberChange(
location=seq_loc, copyChange=copy_change, extensions=extensions
)
cx.id = ga4gh_identify(cx)
return cx.model_dump(exclude_none=True)

Expand All @@ -137,6 +153,7 @@ def allele_mode(
alt_type: AltType,
vrs_seq_loc_ac: str,
alt: str,
extensions: list[entity_models.Extension] | None = None,
) -> dict | None:
"""Return a VRS Allele with a normalized LiteralSequenceExpression or
ReferenceLengthExpression.
Expand All @@ -145,6 +162,7 @@ def allele_mode(
:param alt_type: Alteration type
:param vrs_seq_loc_ac: Accession used in VRS Sequence Location
:param alt: Alteration
:param extensions: List of extensions for variation
:return: VRS Allele object represented as a dict
"""
if alt_type in AMBIGUOUS_REGIONS:
Expand All @@ -168,6 +186,7 @@ def allele_mode(
allele = models.Allele(
location=models.SequenceLocation(**location),
state=models.LiteralSequenceExpression(sequence=state),
extensions=extensions,
)

try:
Expand All @@ -189,6 +208,7 @@ def interpret_variation(
baseline_copies: int | None = None,
copy_change: models.CopyChange | None = None,
alt: str | None = None,
extensions: list[entity_models.Extension] | None = None,
) -> dict:
"""Interpret variation using HGVSDupDelMode

Expand All @@ -201,6 +221,7 @@ def interpret_variation(
:param baseline_copies: Baseline copies number
:param copy_change: The copy change
:param alt: The alteration
:param extensions: List of extensions for variation
:return: VRS Variation object
"""
variation = None
Expand All @@ -212,21 +233,24 @@ def interpret_variation(
baseline_copies=baseline_copies,
copy_change=copy_change,
alt=alt,
extensions=extensions,
)
elif hgvs_dup_del_mode == HGVSDupDelModeOption.ALLELE:
variation = self.allele_mode(location, alt_type, vrs_seq_loc_ac, alt)
variation = self.allele_mode(
location, alt_type, vrs_seq_loc_ac, alt, extensions=extensions
)
elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_COUNT:
if baseline_copies:
variation = self.copy_number_count_mode(
alt_type, location, baseline_copies
alt_type, location, baseline_copies, extensions=extensions
)
else:
errors.append(
"`baseline_copies` must be provided for Copy Number Count Variation"
)
elif hgvs_dup_del_mode == HGVSDupDelModeOption.COPY_NUMBER_CHANGE:
variation = self.copy_number_change_mode(
alt_type, location, copy_change=copy_change
alt_type, location, copy_change=copy_change, extensions=extensions
)

if not variation:
Expand Down
27 changes: 21 additions & 6 deletions src/variation/translators/genomic_del_dup_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import NamedTuple

from cool_seq_tool.schemas import ResidueMode
from cool_seq_tool.schemas import ManeGeneData, ResidueMode
from ga4gh.vrs import models
from pydantic import StrictInt, StrictStr, ValidationError

Expand Down Expand Up @@ -30,6 +30,7 @@ class DelDupData(NamedTuple):
ac: StrictStr
pos0: StrictInt
pos1: StrictInt | None
mane_genes: list[ManeGeneData] | None


class GenomicDelDupTranslator(Translator):
Expand All @@ -51,32 +52,34 @@ async def get_grch38_data(
:param ac: Genomic RefSeq accession
:return: Data on GRCh38 assembly if successful liftover. Else, `None`
"""
pos0, pos1, new_ac = None, None, None
pos0, pos1, new_ac, mane_genes = None, None, None, None

if classification.pos1:
# `g_to_grch38` return inter-residue, but we want residue here
# so we increment start by 1
grch38_pos = await self.mane_transcript.g_to_grch38(
ac, classification.pos0 + 1, classification.pos1
ac, classification.pos0 + 1, classification.pos1, get_mane_genes=True
)
if grch38_pos:
pos0, pos1 = grch38_pos.pos
new_ac = grch38_pos.ac
mane_genes = grch38_pos.mane_genes
else:
# `g_to_grch38` return inter-residue, but we want residue here
# so we increment start by 1
grch38_pos = await self.mane_transcript.g_to_grch38(
ac, classification.pos0 + 1, classification.pos0
ac, classification.pos0 + 1, classification.pos0, get_mane_genes=True
)
if grch38_pos:
pos0, _ = grch38_pos.pos
new_ac = grch38_pos.ac
mane_genes = grch38_pos.mane_genes

if not new_ac:
errors.append(f"Unable to find a GRCh38 accession for: {ac}")

try:
data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1)
data = DelDupData(ac=new_ac, pos0=pos0, pos1=pos1, mane_genes=mane_genes)
except ValidationError:
data = None
return data
Expand Down Expand Up @@ -114,6 +117,7 @@ async def translate(
vrs_variation = None
vrs_seq_loc_ac_status = VrsSeqLocAcStatus.NA
residue_mode = ResidueMode.RESIDUE
mane_genes = None

if do_liftover or endpoint_name == Endpoint.NORMALIZE:
errors = []
Expand All @@ -131,6 +135,7 @@ async def translate(
warnings += errors
return None

mane_genes = grch38_data.mane_genes
pos0 = grch38_data.pos0 - 1
if grch38_data.pos1 is None:
pos1 = grch38_data.pos0
Expand Down Expand Up @@ -158,7 +163,15 @@ async def translate(
pos0 = classification.pos0
pos1 = classification.pos1
ac = validation_result.accession
grch38_data = DelDupData(ac=ac, pos0=pos0, pos1=pos1)
# `g_to_grch38` return inter-residue, but we want residue here
# so we increment start by 1
_grch38_data = await self.mane_transcript.g_to_grch38(
ac, pos0 + 1, pos0, get_mane_genes=True
)
mane_genes = _grch38_data.mane_genes
grch38_data = DelDupData(
ac=ac, pos0=pos0, pos1=pos1, mane_genes=mane_genes
)

assembly = ClinVarAssembly.GRCH38
else:
Expand All @@ -184,6 +197,7 @@ async def translate(
ac = grch38_data.ac
pos0 = grch38_data.pos0 - 1
pos1 = grch38_data.pos0 if grch38_data.pos1 is None else grch38_data.pos1
mane_genes = grch38_data.mane_genes
residue_mode = ResidueMode.INTER_RESIDUE
self.is_valid(classification.gene_token, ac, pos0, pos1, errors)

Expand Down Expand Up @@ -246,6 +260,7 @@ async def translate(
baseline_copies=baseline_copies,
copy_change=copy_change,
alt=alt,
extensions=self._mane_gene_extensions(mane_genes),
)
elif endpoint_name == Endpoint.HGVS_TO_COPY_NUMBER_COUNT:
vrs_variation = self.hgvs_dup_del_mode.copy_number_count_mode(
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_delins.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -102,6 +104,7 @@ async def translate(
alt=classification.inserted_sequence,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_insertion.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -103,6 +105,7 @@ async def translate(
alt=classification.inserted_sequence,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_reference_agree.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -100,6 +102,7 @@ async def translate(
warnings,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
3 changes: 3 additions & 0 deletions src/variation/translators/genomic_substitution.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,11 @@ async def translate(
vrs_seq_loc_ac = mane.refseq
coord_type = AnnotationLayer.CDNA
validation_result.classification = classification
extensions = None
else:
vrs_seq_loc_ac = mane.ac
coord_type = AnnotationLayer.GENOMIC
extensions = self._mane_gene_extensions(mane.mane_genes)

vrs_allele = self.vrs.to_vrs_allele(
vrs_seq_loc_ac,
Expand All @@ -123,6 +125,7 @@ async def translate(
alt=classification.alt,
cds_start=mane.coding_start_site if gene else None,
residue_mode=ResidueMode.INTER_RESIDUE,
extensions=extensions,
)
else:
vrs_seq_loc_ac = validation_result.accession
Expand Down
25 changes: 24 additions & 1 deletion src/variation/translators/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from cool_seq_tool.handlers import SeqRepoAccess
from cool_seq_tool.mappers import ManeTranscript
from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
from cool_seq_tool.schemas import AnnotationLayer, ManeGeneData, ResidueMode
from cool_seq_tool.sources import UtaDatabase
from ga4gh.core import entity_models
from ga4gh.vrs import models

from variation.hgvs_dup_del_mode import HGVSDupDelMode
Expand Down Expand Up @@ -253,3 +254,25 @@ async def get_p_or_cdna_translation_result(
)

return None

@staticmethod
def _mane_gene_extensions(
mane_genes: list[ManeGeneData],
) -> list[entity_models.Extension] | None:
"""Transform mane genes to list of extensions

This is only used in Genomic translators

:param mane_genes: Optional list of mane gene data
:return: List of extensions containing mane gene data if found. Otherwise,
``None``
"""
mane_genes_exts = None
if mane_genes:
mane_genes_exts = [
entity_models.Extension(
name="mane_genes",
value=mane_genes,
)
]
return mane_genes_exts
Loading