-
Notifications
You must be signed in to change notification settings - Fork 1
/
damerau_levenshtein.go
107 lines (93 loc) · 2.78 KB
/
damerau_levenshtein.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
package disfun
// DamerauLevenshtein computes the Damerau-Levenshtein distance between two strings. The returned value - distance - is the number of insertions, deletions, substitutions, and transpositions it takes to transform one string (s1) into another (s2). Each step in the transformation "costs" one distance point. It is similar to the Optimal String Alignment, algorithm, but is more complex because it allows multiple edits on substrings.
//
// References:
// This implementation is based off of the one found on Wikipedia at
// http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions
// as well as KevinStern's Java implementation found at
// https://github.com/KevinStern/software-and-algorithms.
func DamerauLevenshtein(s1, s2 string) (distance int) {
// index by code point, not byte
r1 := []rune(s1)
r2 := []rune(s2)
// the maximum possible distance
inf := len(r1) + len(r2)
// if one string is blank, we needs insertions
// for all characters in the other one
if len(r1) == 0 {
return len(r2)
}
if len(r2) == 0 {
return len(r1)
}
// construct the edit-tracking matrix
matrix := make([][]int, len(r1))
for i := range matrix {
matrix[i] = make([]int, len(r2))
}
// seen characters
seenRunes := make(map[rune]int)
if r1[0] != r2[0] {
matrix[0][0] = 1
}
seenRunes[r1[0]] = 0
for i := 1; i < len(r1); i++ {
deleteDist := matrix[i-1][0] + 1
insertDist := (i+1)*1 + 1
var matchDist int
if r1[i] == r2[0] {
matchDist = i
} else {
matchDist = i + 1
}
matrix[i][0] = minInt32(minInt32(deleteDist, insertDist), matchDist)
}
for j := 1; j < len(r2); j++ {
deleteDist := (j + 1) * 2
insertDist := matrix[0][j-1] + 1
var matchDist int
if r1[0] == r2[j] {
matchDist = j
} else {
matchDist = j + 1
}
matrix[0][j] = minInt32(minInt32(deleteDist, insertDist), matchDist)
}
for i := 1; i < len(r1); i++ {
var maxSrcMatchIndex int
if r1[i] == r2[0] {
maxSrcMatchIndex = 0
} else {
maxSrcMatchIndex = -1
}
for j := 1; j < len(r2); j++ {
swapIndex, ok := seenRunes[r2[j]]
jSwap := maxSrcMatchIndex
deleteDist := matrix[i-1][j] + 1
insertDist := matrix[i][j-1] + 1
matchDist := matrix[i-1][j-1]
if r1[i] != r2[j] {
matchDist++
} else {
maxSrcMatchIndex = j
}
// for transpositions
var swapDist int
if ok && jSwap != -1 {
iSwap := swapIndex
var preSwapCost int
if iSwap == 0 && jSwap == 0 {
preSwapCost = 0
} else {
preSwapCost = matrix[maxInt32(0, iSwap-1)][maxInt32(0, jSwap-1)]
}
swapDist = i + j + preSwapCost - iSwap - jSwap - 1
} else {
swapDist = inf
}
matrix[i][j] = minInt32(minInt32(minInt32(deleteDist, insertDist), matchDist), swapDist)
}
seenRunes[r1[i]] = i
}
return matrix[len(r1)-1][len(r2)-1]
}