Skip to content

Commit

Permalink
Setup library and basic example
Browse files Browse the repository at this point in the history
  • Loading branch information
Gegy committed Nov 28, 2023
1 parent d771c28 commit 8e92a14
Show file tree
Hide file tree
Showing 6 changed files with 603 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
/build/

/.idea/
/cmake-build-debug/
34 changes: 34 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
cmake_minimum_required(VERSION 3.15)

project("translatador")

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Make sure that we have pulled submodules (https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html)
find_package(Git QUIET)
if (GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
option(GIT_SUBMODULE "Check submodules during build" ON)
if (GIT_SUBMODULE)
message(STATUS "Submodule update")
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GIT_SUBMOD_RESULT)
if (NOT GIT_SUBMOD_RESULT EQUAL "0")
message(FATAL_ERROR "git submodule update --init --recursive failed with ${GIT_SUBMOD_RESULT}, please checkout submodules")
endif ()
endif ()
endif ()

# WASM-compatible Bergamot allows us to ship a library without dependency on MKL/CUDA
set(USE_WASM_COMPATIBLE_SOURCE ON)

add_subdirectory(extern/bergamot-translator)

set_target_properties(bergamot-translator PROPERTIES GIT_SUBMODULE OFF)

add_library(translatador STATIC "src/translatador.cpp")
target_include_directories(translatador PUBLIC "${PROJECT_SOURCE_DIR}/include")
target_link_libraries(translatador PRIVATE bergamot-translator)

add_subdirectory(examples)
2 changes: 2 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
add_executable(translatador-example "simple.c")
target_link_libraries(translatador-example PRIVATE translatador)
71 changes: 71 additions & 0 deletions examples/simple.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include <translatador.h>
#include <stdio.h>

int read_file(const char* file_name, char** result) {
FILE* file = fopen(file_name, "rb");

fseek(file, 0, SEEK_END);
const int size = ftell(file);
rewind(file);

char* buffer = malloc(size);
fread(buffer, size, 1, file);

*result = buffer;
return size;
}

int main(int argc, char* argv[]) {
if (argc != 4) {
printf("Usage: <model file> <short-list file> <vocab file>");
return 0;
}

const char* model_file = argv[1];
const char* short_list_file = argv[2];
const char* vocab_file = argv[3];

char* model_buffer;
int model_size = read_file(model_file, &model_buffer);
char* short_list_buffer;
int short_list_size = read_file(short_list_file, &short_list_buffer);
char* vocab_buffer;
int vocab_size = read_file(vocab_file, &vocab_buffer);

const char* yaml_config =
"beam-size: 1\n"
"normalize: 1.0\n"
"word-penalty: 0\n"
"max-length-break: 128\n"
"mini-batch-words: 1024\n"
"workspace: 128\n"
"max-length-factor: 2.0\n"
"skip-cost: true\n"
"gemm-precision: int8shiftAlphaAll\n"
"alignment: soft\n";

const TrlModel* model = trl_create_model(yaml_config, model_buffer, model_size, vocab_buffer, vocab_size, 0, 0, short_list_buffer, short_list_size);
if (!model) {
printf("Failed to create model: %s", trl_get_last_error());
return 0;
}
free(model_buffer);
free(vocab_buffer);
free(short_list_buffer);

const TrlString* source = trl_create_string("Hello from the C programming language!");
const TrlString* target;

if (trl_translate(model, &source, &target, 1)) {
printf("Failed to translate text: %s", trl_get_last_error());
return 0;
}

printf("%s -> %s\n", trl_get_string_utf(source), trl_get_string_utf(target));

trl_destroy_string(source);
trl_destroy_string(target);
trl_destroy_model(model);

return 0;
}
107 changes: 107 additions & 0 deletions include/translatador.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#ifndef TRANSLATADOR_H
#define TRANSLATADOR_H

#include <stdlib.h>

#ifdef __cplusplus
extern "C" {
#endif

#define TRL_OK 0
#define TRL_ERROR 1

/**
* \brief A model that supports translation between a source and target language.
* Should not be used from multiple threads.
*/
typedef struct TrlModel TrlModel;

/**
* \brief A wrapper around a string that can or has been translated.
* May contain additional metadata from translation, so strings should be kept in this form as long as possible if they
* need to be passed through multiple models through a pivot language.
*/
typedef struct TrlString TrlString;

/**
* \brief Returns a string describing the last error to occur. If none has occurred since the library was initialized,
* or since this function was last called, null will be returned.
*
* The caller is expected to free() this memory after use.
*
* \return an error string, or null
*/
char* trl_get_last_error();

/**
* \brief Loads a translation model from the given binaries and configurations.
* If the given data is malformed, null will be returned, and an error message should be accessible through \link trl_get_last_error.
*
* This function does not take ownership of any of the passed memory, and this should be freed by the caller when no longer required.
*
* \link trl_destroy_model should be used once the model is no longer needed.
*
* \param yaml_config optional Bergamot YAML configuration to be used to load this model, or null to use defaults (<https://github.com/mozilla/firefox-translations-models/blob/main/evals/translators/bergamot.config.yml>)
* \param model model binary to load
* \param model_size size of the model binary
* \param source_vocab vocabulary of the source language to load
* \param source_vocab_size size of the source vocabulary
* \param target_vocab optional vocabulary of the target language to load, or null to use a shared vocabulary between source and target
* \param target_vocab_size size of the target vocabulary, or 0 if shared
* \param short_list optional short list to load
* \param short_list_size size of teh short list, or 0 if unused
* \return the loaded model, or null if the model failed to load
*/
const TrlModel* trl_create_model(const char* yaml_config, const char* model, size_t model_size, const char* source_vocab, size_t source_vocab_size, const char* target_vocab, size_t target_vocab_size, const char* short_list, size_t short_list_size);

/**
* \brief Takes a copy of the given translation model. As \link TrlModel is not thread-safe, this might be used from another thread.
*
* \param model the model to clone
* \return a new model instance
*/
const TrlModel* trl_clone_model(const TrlModel* model);

/**
* \brief Tears down and frees the memory held by the given \TrlModel.
* \param model the model to destroy
*/
void trl_destroy_model(const TrlModel* model);

/**
* \brief Wraps the given string by copying for use in translation.
* \param utf plain string to wrap
* \return a new \link TrlString that can be used for translation
*/
const TrlString* trl_create_string(const char* utf);

/**
* \brief Unwraps the plain string held by the given \link TrlString.
* \param string the string to unwrap
* \return a reference to the plain string held by the given \link TrlString
*/
const char* trl_get_string_utf(const TrlString* string);

/**
* \brief Tears down and frees the memory held by the given \link TrlString.
* \param string the string to destroy
*/
void trl_destroy_string(const TrlString* string);

/**
* \brief Translates the given source strings into the target language using the given model.
* If an error occurs, the target will not be modified, and the error message will be accessible through \trl_get_last_error.
*
* \param model the model to use for translation
* \param source the source strings to translate
* \param target a pointer to place translated strings
* \param count the number of strings to translate
* \return \link TRL_OK if translation was successful, or \link TRL_ERROR if not
*/
int trl_translate(const TrlModel* model, const TrlString* const* source, const TrlString** target, size_t count);

#ifdef __cplusplus
}
#endif

#endif
Loading

0 comments on commit 8e92a14

Please sign in to comment.