diff --git a/README.md b/README.md index 174b6c6..9e99c4b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Osita is an implementation of the [Optimal String Alignment distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Optimal_string_alignment_distance) algorithm. It implements the standard version of the algorithm and an extension of it where the substitution cost has been replaced by a function which calculates the keyboard distance between characters using the Euclidean distance -between keys on a QWERTY-keyboard. +between keys on a QWERTY or AZERTY-keyboard. You can also supply your own substitution cost function. ## Installation @@ -26,7 +26,7 @@ import nl.gn0s1s.osita.Osita._ osa("abcde", "abcde") // val res0: Double = 0.0 osa("abcde", "abcd") // val res1: Double = 1.0 -osaWithSubstitutionCost("abc", "agc")(weightedKeyboardSubstitutionCost) // val res2: Double = 1.118033988749895 +osaWithSubstitutionCost("abc", "agc")(qwertySubstitutionCost) // val res2: Double = 1.118033988749895 ``` diff --git a/src/main/scala/nl/gn0s1s/osita/Osita.scala b/src/main/scala/nl/gn0s1s/osita/Osita.scala index acc5123..6dadc11 100644 --- a/src/main/scala/nl/gn0s1s/osita/Osita.scala +++ b/src/main/scala/nl/gn0s1s/osita/Osita.scala @@ -13,11 +13,19 @@ object Osita { .map(_.toCharArray) } + private val azertyKeyboardGrid: Array[Array[Char]] = { + """azertyuiop + |qsdfghjklm + |wxcvbn""".stripMargin + .split('\n') + .map(_.toCharArray) + } + private def simpleSubstitutionCost[A](a: A, b: A): Double = if (a == b) 0.0D else 1.0D - private def findPos(c: Char): (Double, Double) = { - val t = qwertyKeyboardGrid.map(row => row.indexOf(c)) + private def findPos(c: Char, keyboardGrid: Array[Array[Char]]): (Double, Double) = { + val t = keyboardGrid.map(row => row.indexOf(c)) val column = t.max val row = t.indexOf(column) // compensate for the difference between rows on a keyboard @@ -35,7 +43,21 @@ object Osita { def euclideanDistance(p1: (Double, Double), p2: (Double, Double)): Double = scala.math.sqrt(scala.math.pow(p1._1 - p2._1, 2) + scala.math.pow(p1._2 - p2._2, 2)) - euclideanDistance(findPos(a), findPos(b)) + euclideanDistance(findPos(a, qwertyKeyboardGrid), findPos(b, qwertyKeyboardGrid)) + } + + def qwertySubstitutionCost(a: Char, b: Char): Double = { + def euclideanDistance(p1: (Double, Double), p2: (Double, Double)): Double = + scala.math.sqrt(scala.math.pow(p1._1 - p2._1, 2) + scala.math.pow(p1._2 - p2._2, 2)) + + euclideanDistance(findPos(a, qwertyKeyboardGrid), findPos(b, qwertyKeyboardGrid)) + } + + def azertySubstitutionCost(a: Char, b: Char): Double = { + def euclideanDistance(p1: (Double, Double), p2: (Double, Double)): Double = + scala.math.sqrt(scala.math.pow(p1._1 - p2._1, 2) + scala.math.pow(p1._2 - p2._2, 2)) + + euclideanDistance(findPos(a, azertyKeyboardGrid), findPos(b, azertyKeyboardGrid)) } // https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#cite_note-Boytsov-7 diff --git a/src/test/scala/nl/gn0s1s/osita/OsitaSuite.scala b/src/test/scala/nl/gn0s1s/osita/OsitaSuite.scala index c16e37f..e85f472 100644 --- a/src/test/scala/nl/gn0s1s/osita/OsitaSuite.scala +++ b/src/test/scala/nl/gn0s1s/osita/OsitaSuite.scala @@ -45,4 +45,9 @@ class OsitaSuite extends ScalaCheckSuite { property("osaWithKeyboard counts transpositions correctly") { osaWithSubstitutionCost("abc", "acb")(weightedKeyboardSubstitutionCost) == 1 } + + property("osaWithKeyboard with azerty works") { + assertEquals(osaWithSubstitutionCost("mop", "nop")(qwertySubstitutionCost), 1D) + assertEquals(osaWithSubstitutionCost("mop", "nop")(azertySubstitutionCost), 2D) + } }