forked from im-kulikov/hrw
-
Notifications
You must be signed in to change notification settings - Fork 4
/
hrw.go
129 lines (105 loc) · 3.16 KB
/
hrw.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// Package hrw implements Rendezvous hashing.
// https://en.wikipedia.org/wiki/Rendezvous_hashing.
package hrw
import (
"sort"
"github.com/twmb/murmur3"
"golang.org/x/exp/constraints"
)
// Hash hashes the data in a fashion similar to the
// [Hashable] interface. Can be used as a hash function
// required for the HRW hashing algorithm.
func Hash(data []byte) uint64 {
return murmur3.Sum64(data)
}
// Hashable is something that can be hashed.
type Hashable interface{ Hash() uint64 }
// HashableBytes implements Hashable interface over
// raw data. Use [WrapBytes] to instantiate a correct
// byte slice wrapper.
type HashableBytes []byte
func (h HashableBytes) Hash() uint64 {
return murmur3.Sum64(h)
}
// WrapBytes creates [HashableBytes] that implements
// [Hashable] interface over a raw data.
// Can be used for [Sort] and [SortWeighted].
func WrapBytes(b []byte) HashableBytes {
return b
}
// Sort defines and sorts the scores for the provided hashable
// entities against the provided hashable object (in its general
// sense).
// See [Hashable], [HashableBytes] and https://en.wikipedia.org/wiki/Rendezvous_hashing.
func Sort[V, P Hashable](vv []V, object P) {
oHash := object.Hash()
var s sliceToSort[V, uint64]
s.s = vv
s.distances = make([]uint64, len(vv))
for i := range vv {
s.distances[i] = distance(vv[i].Hash(), oHash)
}
sort.Stable(&s)
}
// SortWeighted is the same as [Sort] but allows using weights for
// a slice being sorted. A weight is applied to the corresponding
// element's score in the resulting slice. A weight allows modifying
// the default (equal to any element) probability of an element to
// win HRW sorting.
// Value slice's length and weight slice's length MUST be the same.
// Weights MUST be in [0.0; 1.0] range.
func SortWeighted[V, P Hashable, W constraints.Float](vv []V, weights []W, object P) {
if len(vv) != len(weights) {
return
}
if allSameF(weights) {
Sort(vv, object)
return
}
oHash := object.Hash()
var s sliceToSort[V, W]
s.s = vv
s.distances = make([]W, len(vv))
for i := range vv {
// the distance is a bad characteristic in our case (we sort in ascending order)
// so a bigger weight should lower the distance more
s.distances[i] = W(distance(vv[i].Hash(), oHash)) / weights[i]
}
sort.Stable(&s)
}
type distancesValue interface {
constraints.Unsigned | constraints.Float
}
type sliceToSort[V any, W distancesValue] struct {
s []V
distances []W
}
func (s *sliceToSort[V, _]) Len() int {
return len(s.s)
}
func (s *sliceToSort[V, _]) Less(i, j int) bool {
return s.distances[i] < s.distances[j]
}
func (s *sliceToSort[V, _]) Swap(i, j int) {
s.s[i], s.s[j] = s.s[j], s.s[i]
s.distances[i], s.distances[j] = s.distances[j], s.distances[i]
}
func distance(x uint64, y uint64) uint64 {
acc := x ^ y
// here used mmh3 64 bit finalizer
// https://github.com/aappleby/smhasher/blob/61a0530f28277f2e850bfc39600ce61d02b518de/src/MurmurHash3.cpp#L81
acc ^= acc >> 33
acc = acc * 0xff51afd7ed558ccd
acc ^= acc >> 33
acc = acc * 0xc4ceb9fe1a85ec53
acc ^= acc >> 33
return acc
}
func allSameF[W constraints.Float](fs []W) bool {
for i := range fs {
if fs[i] != fs[0] {
return false
}
}
return true
}