-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
603 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
/build/ | ||
|
||
/.idea/ | ||
/cmake-build-debug/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
cmake_minimum_required(VERSION 3.15) | ||
|
||
project("translatador") | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) | ||
|
||
# Make sure that we have pulled submodules (https://cliutils.gitlab.io/modern-cmake/chapters/projects/submodule.html) | ||
find_package(Git QUIET) | ||
if (GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git") | ||
option(GIT_SUBMODULE "Check submodules during build" ON) | ||
if (GIT_SUBMODULE) | ||
message(STATUS "Submodule update") | ||
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive | ||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} | ||
RESULT_VARIABLE GIT_SUBMOD_RESULT) | ||
if (NOT GIT_SUBMOD_RESULT EQUAL "0") | ||
message(FATAL_ERROR "git submodule update --init --recursive failed with ${GIT_SUBMOD_RESULT}, please checkout submodules") | ||
endif () | ||
endif () | ||
endif () | ||
|
||
# WASM-compatible Bergamot allows us to ship a library without dependency on MKL/CUDA | ||
set(USE_WASM_COMPATIBLE_SOURCE ON) | ||
|
||
add_subdirectory(extern/bergamot-translator) | ||
|
||
set_target_properties(bergamot-translator PROPERTIES GIT_SUBMODULE OFF) | ||
|
||
add_library(translatador STATIC "src/translatador.cpp") | ||
target_include_directories(translatador PUBLIC "${PROJECT_SOURCE_DIR}/include") | ||
target_link_libraries(translatador PRIVATE bergamot-translator) | ||
|
||
add_subdirectory(examples) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
add_executable(translatador-example "simple.c") | ||
target_link_libraries(translatador-example PRIVATE translatador) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#include <translatador.h> | ||
#include <stdio.h> | ||
|
||
int read_file(const char* file_name, char** result) { | ||
FILE* file = fopen(file_name, "rb"); | ||
|
||
fseek(file, 0, SEEK_END); | ||
const int size = ftell(file); | ||
rewind(file); | ||
|
||
char* buffer = malloc(size); | ||
fread(buffer, size, 1, file); | ||
|
||
*result = buffer; | ||
return size; | ||
} | ||
|
||
int main(int argc, char* argv[]) { | ||
if (argc != 4) { | ||
printf("Usage: <model file> <short-list file> <vocab file>"); | ||
return 0; | ||
} | ||
|
||
const char* model_file = argv[1]; | ||
const char* short_list_file = argv[2]; | ||
const char* vocab_file = argv[3]; | ||
|
||
char* model_buffer; | ||
int model_size = read_file(model_file, &model_buffer); | ||
char* short_list_buffer; | ||
int short_list_size = read_file(short_list_file, &short_list_buffer); | ||
char* vocab_buffer; | ||
int vocab_size = read_file(vocab_file, &vocab_buffer); | ||
|
||
const char* yaml_config = | ||
"beam-size: 1\n" | ||
"normalize: 1.0\n" | ||
"word-penalty: 0\n" | ||
"max-length-break: 128\n" | ||
"mini-batch-words: 1024\n" | ||
"workspace: 128\n" | ||
"max-length-factor: 2.0\n" | ||
"skip-cost: true\n" | ||
"gemm-precision: int8shiftAlphaAll\n" | ||
"alignment: soft\n"; | ||
|
||
const TrlModel* model = trl_create_model(yaml_config, model_buffer, model_size, vocab_buffer, vocab_size, 0, 0, short_list_buffer, short_list_size); | ||
if (!model) { | ||
printf("Failed to create model: %s", trl_get_last_error()); | ||
return 0; | ||
} | ||
free(model_buffer); | ||
free(vocab_buffer); | ||
free(short_list_buffer); | ||
|
||
const TrlString* source = trl_create_string("Hello from the C programming language!"); | ||
const TrlString* target; | ||
|
||
if (trl_translate(model, &source, &target, 1)) { | ||
printf("Failed to translate text: %s", trl_get_last_error()); | ||
return 0; | ||
} | ||
|
||
printf("%s -> %s\n", trl_get_string_utf(source), trl_get_string_utf(target)); | ||
|
||
trl_destroy_string(source); | ||
trl_destroy_string(target); | ||
trl_destroy_model(model); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#ifndef TRANSLATADOR_H | ||
#define TRANSLATADOR_H | ||
|
||
#include <stdlib.h> | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
#define TRL_OK 0 | ||
#define TRL_ERROR 1 | ||
|
||
/** | ||
* \brief A model that supports translation between a source and target language. | ||
* Should not be used from multiple threads. | ||
*/ | ||
typedef struct TrlModel TrlModel; | ||
|
||
/** | ||
* \brief A wrapper around a string that can or has been translated. | ||
* May contain additional metadata from translation, so strings should be kept in this form as long as possible if they | ||
* need to be passed through multiple models through a pivot language. | ||
*/ | ||
typedef struct TrlString TrlString; | ||
|
||
/** | ||
* \brief Returns a string describing the last error to occur. If none has occurred since the library was initialized, | ||
* or since this function was last called, null will be returned. | ||
* | ||
* The caller is expected to free() this memory after use. | ||
* | ||
* \return an error string, or null | ||
*/ | ||
char* trl_get_last_error(); | ||
|
||
/** | ||
* \brief Loads a translation model from the given binaries and configurations. | ||
* If the given data is malformed, null will be returned, and an error message should be accessible through \link trl_get_last_error. | ||
* | ||
* This function does not take ownership of any of the passed memory, and this should be freed by the caller when no longer required. | ||
* | ||
* \link trl_destroy_model should be used once the model is no longer needed. | ||
* | ||
* \param yaml_config optional Bergamot YAML configuration to be used to load this model, or null to use defaults (<https://github.com/mozilla/firefox-translations-models/blob/main/evals/translators/bergamot.config.yml>) | ||
* \param model model binary to load | ||
* \param model_size size of the model binary | ||
* \param source_vocab vocabulary of the source language to load | ||
* \param source_vocab_size size of the source vocabulary | ||
* \param target_vocab optional vocabulary of the target language to load, or null to use a shared vocabulary between source and target | ||
* \param target_vocab_size size of the target vocabulary, or 0 if shared | ||
* \param short_list optional short list to load | ||
* \param short_list_size size of teh short list, or 0 if unused | ||
* \return the loaded model, or null if the model failed to load | ||
*/ | ||
const TrlModel* trl_create_model(const char* yaml_config, const char* model, size_t model_size, const char* source_vocab, size_t source_vocab_size, const char* target_vocab, size_t target_vocab_size, const char* short_list, size_t short_list_size); | ||
|
||
/** | ||
* \brief Takes a copy of the given translation model. As \link TrlModel is not thread-safe, this might be used from another thread. | ||
* | ||
* \param model the model to clone | ||
* \return a new model instance | ||
*/ | ||
const TrlModel* trl_clone_model(const TrlModel* model); | ||
|
||
/** | ||
* \brief Tears down and frees the memory held by the given \TrlModel. | ||
* \param model the model to destroy | ||
*/ | ||
void trl_destroy_model(const TrlModel* model); | ||
|
||
/** | ||
* \brief Wraps the given string by copying for use in translation. | ||
* \param utf plain string to wrap | ||
* \return a new \link TrlString that can be used for translation | ||
*/ | ||
const TrlString* trl_create_string(const char* utf); | ||
|
||
/** | ||
* \brief Unwraps the plain string held by the given \link TrlString. | ||
* \param string the string to unwrap | ||
* \return a reference to the plain string held by the given \link TrlString | ||
*/ | ||
const char* trl_get_string_utf(const TrlString* string); | ||
|
||
/** | ||
* \brief Tears down and frees the memory held by the given \link TrlString. | ||
* \param string the string to destroy | ||
*/ | ||
void trl_destroy_string(const TrlString* string); | ||
|
||
/** | ||
* \brief Translates the given source strings into the target language using the given model. | ||
* If an error occurs, the target will not be modified, and the error message will be accessible through \trl_get_last_error. | ||
* | ||
* \param model the model to use for translation | ||
* \param source the source strings to translate | ||
* \param target a pointer to place translated strings | ||
* \param count the number of strings to translate | ||
* \return \link TRL_OK if translation was successful, or \link TRL_ERROR if not | ||
*/ | ||
int trl_translate(const TrlModel* model, const TrlString* const* source, const TrlString** target, size_t count); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif |
Oops, something went wrong.