-
Notifications
You must be signed in to change notification settings - Fork 0
/
weighted_tfidf.go
93 lines (80 loc) · 2.97 KB
/
weighted_tfidf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
package fragbag
import (
"fmt"
"github.com/TuftsBCB/seq"
"github.com/TuftsBCB/structure"
)
var (
_ = WeightedLibrary(&weightedTfIdf{})
_ = StructureLibrary(&weightedTfIdf{})
_ = SequenceLibrary(&weightedTfIdf{})
)
// weightedTfIdf wraps any fragment library so that all BOWs are weighted
// according to a simple tf-idf scheme.
//
// A weightedTfIdf can satisfy either the Structure or Sequence library
// interfaces, but only one will work, depending upon the underlying value
// of the wrapped library.
type weightedTfIdf struct {
Library
FragIDFs []float32
}
// NewWeightedTfIdf wraps any fragment library and stores a list of inverse
// document frequencies for each fragment in the wrapped library.
//
// Note that this library satisfies both the Structure and Sequence library
// interfaces.
//
// When computing a BOW from this library, the AddWeights method should be
// applied to the regular unweighted BOW. Note that this is done for you if
// you're using the bow sub-package.
func NewWeightedTfIdf(lib Library, idfs []float32) (WeightedLibrary, error) {
if len(idfs) != lib.Size() {
return nil, fmt.Errorf("Cannot wrap library with weights since the "+
"library has %d fragments but %d weights were given.",
lib.Size(), len(idfs))
}
return &weightedTfIdf{lib, idfs}, nil
}
func (lib *weightedTfIdf) SubLibrary() Library {
return lib.Library
}
// AddWeights returns the tf-idf weight given the frequency of a particular
// fragment. The idf portion of the computation is already computed as part
// of the representation of the underlying fragment library.
func (lib *weightedTfIdf) AddWeights(fragNum int, frequency float32) float32 {
return frequency * lib.FragIDFs[fragNum]
}
func (lib *weightedTfIdf) Tag() string {
return libTagWeightedTfIdf
}
func makeWeightedTfIdf(subTags ...string) (Library, error) {
if len(subTags) == 0 {
return nil, fmt.Errorf("The weighted-tfidf fragment library must " +
"have a sub-tag specified for its sub fragment library.")
}
empty, err := makeEmptySubLibrary(subTags...)
if err != nil {
return nil, err
}
return &weightedTfIdf{empty, nil}, nil
}
// BestStructureFragment calls the corresponding method on the underlying
// fragment library.
func (lib *weightedTfIdf) BestStructureFragment(atoms []structure.Coords) int {
return lib.Library.(StructureLibrary).BestStructureFragment(atoms)
}
// Atoms calls the corresponding method on the underlying fragment library.
func (lib *weightedTfIdf) Atoms(fragNum int) []structure.Coords {
return lib.Library.(StructureLibrary).Atoms(fragNum)
}
// BestSequenceFragment calls the corresponding method on the underlying
// fragment library.
func (lib *weightedTfIdf) BestSequenceFragment(s seq.Sequence) int {
return lib.Library.(SequenceLibrary).BestSequenceFragment(s)
}
// AlignmentProb calls the corresponding method on the underlying fragment
// library.
func (lib *weightedTfIdf) AlignmentProb(fragNum int, s seq.Sequence) seq.Prob {
return lib.Library.(SequenceLibrary).AlignmentProb(fragNum, s)
}