Skip to content

Commit

Permalink
Add bindings for LoRA adapters. Closes juncongmoo#88
Browse files Browse the repository at this point in the history
  • Loading branch information
abetlen committed Apr 18, 2023
1 parent 3f68e95 commit 35abf89
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion llama_cpp/llama_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ class llama_context_params(Structure):
LLAMA_FTYPE_MOSTLY_F16 = ctypes.c_int(1) # except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_0 = ctypes.c_int(2) # except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_1 = ctypes.c_int(3) # except 1d tensors
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(4) # tok_embeddings.weight and output.weight are F16
LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int(
4
) # tok_embeddings.weight and output.weight are F16

# Functions

Expand Down Expand Up @@ -175,6 +177,22 @@ def llama_model_quantize(fname_inp: bytes, fname_out: bytes, itype: c_int) -> c_
_lib.llama_model_quantize.restype = c_int


# Apply a LoRA adapter to a loaded model
# path_base_model is the path to a higher quality model to use as a base for
# the layers modified by the adapter. Can be NULL to use the current loaded model.
# The model needs to be reloaded before applying a new adapter, otherwise the adapter
# will be applied on top of the previous one
# Returns 0 on success
def llama_apply_lora_from_file(
ctx: llama_context_p, path_lora: bytes, path_base_model: bytes, n_threads: c_int
) -> c_int:
return _lib.llama_apply_lora_from_file(ctx, path_lora, path_base_model, n_threads)


_lib.llama_apply_lora_from_file.argtypes = [llama_context_p, c_char_p, c_char_p, c_int]
_lib.llama_apply_lora_from_file.restype = c_int


# Returns the KV cache that will contain the context for the
# ongoing prediction with the model.
def llama_get_kv_cache(ctx: llama_context_p):
Expand Down

0 comments on commit 35abf89

Please sign in to comment.