Skip to content

Commit

Permalink
fix!: fix protein ins unknown repeat notation
Browse files Browse the repository at this point in the history
  • Loading branch information
nokara26 committed Sep 25, 2024
1 parent c2e025d commit 3e29a99
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
11 changes: 8 additions & 3 deletions src/clj_hgvs/mutation.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -1745,6 +1745,7 @@
;;;
;;; e.g. Lys23_Leu24insArgSerGln
;;; Arg78_Gly79ins23
;;; Cys103_Met104insX[5]

(defrecord ProteinInsertion [ref-start coord-start ref-end coord-end alts]
Mutation
Expand All @@ -1759,7 +1760,7 @@
(coord/format coord-end)
"ins"
(if (every? #(= % "Xaa") alts)
(count alts)
(str "X[" (count alts) "]")
(cond->> alts
(= amino-acid-format :short) (map ->short-amino-acid)))])))
(plain [this]
Expand Down Expand Up @@ -1790,10 +1791,14 @@
[s]
(condp re-matches s
#"([A-Z*]([a-z]{2})?)+" (mapv ->long-amino-acid (re-seq #"[A-Z*](?:[a-z]{2})?" s))
#"\d+" (vec (repeat (intl/parse-long s) "Xaa"))))
#"X\[\d+\]" (-> (re-find #"X\[(\d+)\]" s)
second
intl/parse-long
(repeat "Xaa")
vec)))

(def ^:private protein-insertion-re
#"([A-Z](?:[a-z]{2})?)(\d+)_([A-Z](?:[a-z]{2})?)(\d+)ins([\da-zA-Z*]+)")
#"([A-Z](?:[a-z]{2})?)(\d+)_([A-Z](?:[a-z]{2})?)(\d+)ins([\da-zA-Z*\[\]]+)")

(defn parse-protein-insertion
[s]
Expand Down
2 changes: 1 addition & 1 deletion test/clj_hgvs/mutation_test.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -1327,7 +1327,7 @@
"Leu" (coord/protein-coordinate 24)
["Arg" "Ser" "Ter"]))

(def protein-insertion3s "Arg78_Gly79ins5")
(def protein-insertion3s "Arg78_Gly79insX[5]")
(def protein-insertion3 (mut/protein-insertion "Arg" (coord/protein-coordinate 78)
"Gly" (coord/protein-coordinate 79)
["Xaa" "Xaa" "Xaa" "Xaa" "Xaa"]))
Expand Down

0 comments on commit 3e29a99

Please sign in to comment.