Skip to content

Commit

Permalink
Update src
Browse files Browse the repository at this point in the history
  • Loading branch information
FuexFollets committed Jan 31, 2024
1 parent f37a682 commit b4b4e27
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 37 deletions.
32 changes: 0 additions & 32 deletions src/lexocraft/cereal_annoy_index.hpp

This file was deleted.

4 changes: 2 additions & 2 deletions src/lexocraft/llm/text_completion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ namespace lc {
TextCompleter& load(const std::filesystem::path& filepath);

TextCompleter();
TextCompleter(const TextCompleter&) = delete;
TextCompleter& operator=(const TextCompleter&) = delete;
TextCompleter(const TextCompleter&) = default;
TextCompleter& operator=(const TextCompleter&) = default;
TextCompleter(TextCompleter&&) = default;
TextCompleter& operator=(TextCompleter&&) = default;

Expand Down
6 changes: 6 additions & 0 deletions src/lexocraft/llm/vector_database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ namespace lc {
return 1 - (difference.squaredNorm() / (WORD_VECTOR_DIMENSIONS * 4));
}

const float* WordVector::data() const {
return vector.data();
}

VectorDatabase::VectorDatabase(const std::vector<WordVector>& words) : words(words) {
for (const WordVector& word: words) {
word_map [word.word] = word;
Expand All @@ -66,6 +70,8 @@ namespace lc {
const WordVector new_word {std::string {word}, randomize_vector};
words.push_back(new_word);
word_map [word] = new_word;
const int index = words.size() - 1;
annoy_index->add_item(index, new_word.vector.data());
}

void VectorDatabase::add_word(const WordVector& word, bool replace_existing) {
Expand Down
16 changes: 13 additions & 3 deletions src/lexocraft/llm/vector_database.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
#include <string>
#include <vector>

#include <annoy/annoylib.h>
#include <annoy/kissrandom.h>
#include <cereal/types/memory.hpp>
#include <cereal/types/string.hpp>
#include <cereal/types/vector.hpp>
#include <Eigen/Eigen>
#include <mapbox/eternal.hpp>
#include <tsl/robin_map.h>
#include <annoy/annoylib.h>

#include <lexocraft/cereal_eigen.hpp>

Expand Down Expand Up @@ -43,6 +45,8 @@ namespace lc {
[[nodiscard]] float
similarity(const Eigen::VectorXf& other) const; /* Magnitude squared (0.0 - 1.0) */

[[nodiscard]] const float* data() const;

template <class Archive>
void serialize(Archive& archive) {
archive(word, vector);
Expand All @@ -59,6 +63,10 @@ namespace lc {
*/

using RobinMap_t = tsl::robin_map<std::string, WordVector>;
// using ai = Annoy::AnnoyIndex<typename S, typename T, typename Distance, typename Random,
// class ThreadedBuildPolicy>
using AnnoyIndex_t = Annoy::AnnoyIndex<int, float, Annoy::Euclidean, Annoy::Kiss64Random,
Annoy::AnnoyIndexSingleThreadedBuildPolicy>;

// all default constructors
VectorDatabase() = default;
Expand All @@ -69,8 +77,10 @@ namespace lc {

explicit VectorDatabase(const std::vector<WordVector>& words);

std::vector<WordVector> words;
std::vector<WordVector> words {};
RobinMap_t word_map {};
std::shared_ptr<AnnoyIndex_t> annoy_index {
std::make_shared<AnnoyIndex_t>(WordVector::WORD_VECTOR_DIMENSIONS)};

void add_word(const std::string& word, bool randomize_vector = true);
void add_word(const WordVector& word, bool replace_existing = true);
Expand Down Expand Up @@ -102,7 +112,7 @@ namespace lc {

template <class Archive>
void serialize(Archive& archive) {
archive(words);
archive(words, annoy_index);
}
};

Expand Down

0 comments on commit b4b4e27

Please sign in to comment.