-
Notifications
You must be signed in to change notification settings - Fork 1
/
soundex.go
72 lines (60 loc) · 1.4 KB
/
soundex.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
package disfun
import "strings"
// Soundex computes the Soundex phonetic representation of the input string. It
// attempts to encode homophones with the same characters. More information can
// be found at http://en.wikipedia.org/wiki/Soundex.
func Soundex(s1 string) string {
if len(s1) == 0 {
return ""
}
// we should work with all uppercase
s1 = strings.ToUpper(s1)
input := NewString(s1)
// the encoded value
enc := input.Slice(0, 1)
c := ""
prev := ""
hw := false
for i := 0; i < input.RuneCount(); i++ {
switch rune(input.At(i)) {
case 'B', 'F', 'P', 'V':
c = "1"
case 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
c = "2"
case 'D', 'T':
c = "3"
case 'L':
c = "4"
case 'M', 'N':
c = "5"
case 'R':
c = "6"
case 'H', 'W':
hw = true
default:
c = ""
}
// don't encode the first position, but we need its code value
// to prevent repeats
if c != "" && c != prev && i > 0 {
// if the next encoded digit is different, we can add it right away
// if it is the same, though, it must not have been preceded
// by an 'H' or a 'W'
if enc[len(enc)-1:len(enc)] != c || !hw {
enc = enc + c
}
// we're done when we reach four encoded characters
if len(enc) == 4 {
break
}
}
prev = c
hw = false
}
// if we've fallen short of 4 "real" encoded characters,
// it gets padded with zeros
for len(enc) < 4 {
enc = enc + "0"
}
return enc
}