Skip to content

Commit

Permalink
fix: CLIN-2119 manage column mc with multiple entries
Browse files Browse the repository at this point in the history
  • Loading branch information
meek0 committed Oct 16, 2024
1 parent fbb7c58 commit dc9da62
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ case class Clinvar(rc: RuntimeETLContext) extends SimpleETLP(rc) {
)
.withColumn("clndisdbincl", split(concat_ws("", col("clndisdbincl")), "\\|"))
.withColumn("clndnincl", split(concat_ws("", col("clndnincl")), "\\|"))
.withColumn("mc", split(concat_ws("|", col("mc")), "\\|"))
.withColumn("mc", fusion_udf(col("mc")))
.withColumn("inheritance", inheritance_udf(col("origin")))
.drop("clin_sig_original", "clndn")

Expand Down Expand Up @@ -114,6 +114,10 @@ case class Clinvar(rc: RuntimeETLContext) extends SimpleETLP(rc) {
}
}

val fusion_udf: UserDefinedFunction = udf { array: mutable.WrappedArray[String] =>
array.mkString("|").split("\\|")
}

implicit class DataFrameOps(df: DataFrame) {
def withInterpretations: DataFrame = {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ case class NormalizedClinvar(chromosome: String = "2",
af_tgp: Double = 0.01118,
clnvc: String = "single_nucleotide_variant",
clnhgvs: List[String] = List("NC_000002.12:g.69359261T>A"),
mc: List[String] = List("SO:0001627", "intron_variant"),
mc: List[String] = List("SO:0001627", "intron_variant", "SO:0001589", "frameshift_variant"),
af_esp: Double = 0.01415,
clndisdbincl: List[String] = List(""),
conditions: List[String] = List("Congenital myasthenic syndrome 12", "not specified", "not provided"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ case class RawClinvar(contigName: String = "2",
INFO_AF_TGP: Double = 0.01118,
INFO_CLNVC: String = "single_nucleotide_variant",
INFO_CLNHGVS: List[String] = List("NC_000002.12:g.69359261T>A"),
INFO_MC: List[String] = List("SO:0001627|intron_variant"),
INFO_MC: List[String] = List("SO:0001627|intron_variant", "SO:0001589|frameshift_variant"),
INFO_CLNSIGCONF: Option[List[String]] = None,
INFO_AF_ESP: Double = 0.01415,
INFO_CLNDISDBINCL: Option[List[String]] = None,
Expand Down

0 comments on commit dc9da62

Please sign in to comment.