Skip to content

Commit

Permalink
[CI-Examples] Add Candle ML framework example
Browse files Browse the repository at this point in the history
Candle is a minimalist ML framework for Rust with a focus on performance
and ease of use. This commit adds two examples with Candle: simple
matrix multiplication (to quickly test functionality) and Quantized
LLaMA (to test performance).

Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
  • Loading branch information
dimakuv committed Jul 12, 2024
1 parent c4adc12 commit dd23d1d
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 0 deletions.
7 changes: 7 additions & 0 deletions CI-Examples/candle/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/candle_matmul
/candle_quantized
/src

# model
/*.bin
/*.json
88 changes: 88 additions & 0 deletions CI-Examples/candle/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)

ifeq ($(DEBUG),1)
GRAMINE_LOG_LEVEL = debug
else
GRAMINE_LOG_LEVEL = error
endif

SRCDIR = src

.PHONY: all
all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest
ifeq ($(SGX),1)
all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig
endif

######################### Simple Matrix Multiplication #########################

$(SRCDIR)/candle_matmul/target/debug/candle_matmul:
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
cargo new candle_matmul && cd candle_matmul && \
cargo add --git https://github.com/huggingface/candle.git candle-core && \
cp ../../prepared_matmul_src/main.rs ./src/main.rs && \
cargo build

candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul
cp $< $@

candle_matmul.manifest: candle_matmul.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< > $@

candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign
@:

.INTERMEDIATE: candle_matmul_sgx_sign
candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul
gramine-sgx-sign \
--manifest $< \
--output $<.sgx

############################## Quantized LLaMA #################################

llama-2-7b.ggmlv3.q4_0.bin:
../common_tools/download --output $@ \
--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@

tokenizer.json:
../common_tools/download --output $@ \
--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@

$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
git clone https://github.com/huggingface/candle.git candle_quantized && \
cd candle_quantized && \
cargo build --example quantized --release

candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
cp $< $@

candle_quantized.manifest: candle_quantized.manifest.template
gramine-manifest \
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
-Darch_libdir=$(ARCH_LIBDIR) \
$< > $@

candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
@:

.INTERMEDIATE: candle_quantized_sgx_sign
candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
gramine-sgx-sign \
--manifest $< \
--output $<.sgx
.PHONY: clean
clean:
$(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized

.PHONY: distclean
distclean: clean
$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
33 changes: 33 additions & 0 deletions CI-Examples/candle/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Candle

Candle is a minimalist ML framework for Rust with a focus on performance
(including GPU support) and ease of use: https://github.com/huggingface/candle

This directory contains the Makefile and the template manifest for the most
recent version of Candle as of this writing (v0.6.0).

# Warning

The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
happens automatically in the Makefile.

# Quick Start

```sh
# build Candle (uses Rust Cargo) and the final manifest
make SGX=1

# run simple matrix multiplication
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
./candle_matmul
gramine-direct ./candle_matmul
gramine-sgx ./candle_matmul

# run Quantized LLaMA (quantized version of the LLaMA model)
# note that for Gramine, the cmdline args are already defined in the manifest file
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
RAYON_NUM_THREADS=36 ./candle_quantized \
--model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
```
25 changes: 25 additions & 0 deletions CI-Examples/candle/candle_matmul.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

libos.entrypoint = "/candle_matmul"

loader.log_level = "{{ log_level }}"

loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"

fs.mounts = [
{ path = "/candle_matmul", uri = "file:candle_matmul" },
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
{ path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" },
]

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
sgx.enclave_size = "1G"

sgx.trusted_files = [
"file:candle_matmul",
"file:{{ gramine.runtimedir() }}/",
"file:{{ arch_libdir }}/libgcc_s.so.1",
]
37 changes: 37 additions & 0 deletions CI-Examples/candle/candle_quantized.manifest.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (C) 2024 Gramine contributors
# SPDX-License-Identifier: BSD-3-Clause

libos.entrypoint = "/candle_quantized"

loader.log_level = "{{ log_level }}"

loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
loader.env.RAYON_NUM_THREADS = { passthrough = true }

loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
"--tokenizer", "tokenizer.json", "--sample-len", "200" ]

fs.mounts = [
{ path = "/candle_quantized", uri = "file:candle_quantized" },
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },

{ path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
{ path = "/tokenizer.json", uri = "file:tokenizer.json" },
]

sgx.debug = true
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
sgx.enclave_size = "32G"

sgx.trusted_files = [
"file:candle_quantized",
"file:{{ gramine.runtimedir() }}/",
"file:{{ arch_libdir }}/libcrypto.so.3",
"file:{{ arch_libdir }}/libgcc_s.so.1",
"file:{{ arch_libdir }}/libssl.so.3",

"file:llama-2-7b.ggmlv3.q4_0.bin",
"file:tokenizer.json",
]
14 changes: 14 additions & 0 deletions CI-Examples/candle/prepared_matmul_src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started

use candle_core::{Device, Tensor};

fn main() -> Result<(), Box<dyn std::error::Error>> {
let device = Device::Cpu;

let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
let b = Tensor::randn(0f32, 1., (3, 4), &device)?;

let c = a.matmul(&b)?;
println!("{c}");
Ok(())
}

0 comments on commit dd23d1d

Please sign in to comment.