[CI-Examples] Add Candle ML framework example

Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds two examples with Candle: simple matrix multiplication (to quickly test functionality) and Quantized LLaMA (to test performance). Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
gramineproject · Jul 12, 2024 · dd23d1d · dd23d1d
1 parent c4adc12
commit dd23d1d
Show file tree

Hide file tree

Showing 6 changed files with 204 additions and 0 deletions.
diff --git a/CI-Examples/candle/.gitignore b/CI-Examples/candle/.gitignore
@@ -0,0 +1,7 @@
+/candle_matmul
+/candle_quantized
+/src
+
+# model
+/*.bin
+/*.json
diff --git a/CI-Examples/candle/Makefile b/CI-Examples/candle/Makefile
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
+
+ifeq ($(DEBUG),1)
+GRAMINE_LOG_LEVEL = debug
+else
+GRAMINE_LOG_LEVEL = error
+endif
+
+SRCDIR = src
+
+.PHONY: all
+all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest
+ifeq ($(SGX),1)
+all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig
+endif
+
+######################### Simple Matrix Multiplication #########################
+
+$(SRCDIR)/candle_matmul/target/debug/candle_matmul:
+ mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+ cargo new candle_matmul && cd candle_matmul && \
+ cargo add --git https://github.com/huggingface/candle.git candle-core && \
+ cp ../../prepared_matmul_src/main.rs ./src/main.rs && \
+ cargo build
+
+candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul
+ cp $< $@
+
+candle_matmul.manifest: candle_matmul.manifest.template
+ gramine-manifest \
+ -Dlog_level=$(GRAMINE_LOG_LEVEL) \
+ -Darch_libdir=$(ARCH_LIBDIR) \
+ $< > $@
+
+candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign
+ @:
+
+.INTERMEDIATE: candle_matmul_sgx_sign
+candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul
+ gramine-sgx-sign \
+ --manifest $< \
+ --output $<.sgx
+
+############################## Quantized LLaMA #################################
+
+llama-2-7b.ggmlv3.q4_0.bin:
+ ../common_tools/download --output $@ \
+ --sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
+ --url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@
+
+tokenizer.json:
+ ../common_tools/download --output $@ \
+ --sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
+ --url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@
+
+$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
+ mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+ git clone https://github.com/huggingface/candle.git candle_quantized && \
+ cd candle_quantized && \
+ cargo build --example quantized --release
+
+candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
+ cp $< $@
+
+candle_quantized.manifest: candle_quantized.manifest.template
+ gramine-manifest \
+ -Dlog_level=$(GRAMINE_LOG_LEVEL) \
+ -Darch_libdir=$(ARCH_LIBDIR) \
+ $< > $@
+
+candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
+ @:
+
+.INTERMEDIATE: candle_quantized_sgx_sign
+candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
+ gramine-sgx-sign \
+ --manifest $< \
+ --output $<.sgx
+.PHONY: clean
+clean:
+ $(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized
+
+.PHONY: distclean
+distclean: clean
+ $(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
diff --git a/CI-Examples/candle/README.md b/CI-Examples/candle/README.md
@@ -0,0 +1,33 @@
+# Candle
+
+Candle is a minimalist ML framework for Rust with a focus on performance
+(including GPU support) and ease of use: https://github.com/huggingface/candle
+
+This directory contains the Makefile and the template manifest for the most
+recent version of Candle as of this writing (v0.6.0).
+
+# Warning
+
+The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
+happens automatically in the Makefile.
+
+# Quick Start
+
+```sh
+# build Candle (uses Rust Cargo) and the final manifest
+make SGX=1
+
+# run simple matrix multiplication
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
+./candle_matmul
+gramine-direct ./candle_matmul
+gramine-sgx ./candle_matmul
+
+# run Quantized LLaMA (quantized version of the LLaMA model)
+# note that for Gramine, the cmdline args are already defined in the manifest file
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
+RAYON_NUM_THREADS=36 ./candle_quantized \
+ --model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
+RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
+RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
+```
diff --git a/CI-Examples/candle/candle_matmul.manifest.template b/CI-Examples/candle/candle_matmul.manifest.template
@@ -0,0 +1,25 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+libos.entrypoint = "/candle_matmul"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+
+fs.mounts = [
+ { path = "/candle_matmul", uri = "file:candle_matmul" },
+ { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+ { path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" },
+]
+
+sgx.debug = true
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
+sgx.enclave_size = "1G"
+
+sgx.trusted_files = [
+ "file:candle_matmul",
+ "file:{{ gramine.runtimedir() }}/",
+ "file:{{ arch_libdir }}/libgcc_s.so.1",
+]
diff --git a/CI-Examples/candle/candle_quantized.manifest.template b/CI-Examples/candle/candle_quantized.manifest.template
@@ -0,0 +1,37 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+libos.entrypoint = "/candle_quantized"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+loader.env.RAYON_NUM_THREADS = { passthrough = true }
+
+loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
+ "--tokenizer", "tokenizer.json", "--sample-len", "200" ]
+
+fs.mounts = [
+ { path = "/candle_quantized", uri = "file:candle_quantized" },
+ { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+ { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
+
+ { path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
+ { path = "/tokenizer.json", uri = "file:tokenizer.json" },
+]
+
+sgx.debug = true
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
+sgx.enclave_size = "32G"
+
+sgx.trusted_files = [
+ "file:candle_quantized",
+ "file:{{ gramine.runtimedir() }}/",
+ "file:{{ arch_libdir }}/libcrypto.so.3",
+ "file:{{ arch_libdir }}/libgcc_s.so.1",
+ "file:{{ arch_libdir }}/libssl.so.3",
+
+ "file:llama-2-7b.ggmlv3.q4_0.bin",
+ "file:tokenizer.json",
+]
diff --git a/CI-Examples/candle/prepared_matmul_src/main.rs b/CI-Examples/candle/prepared_matmul_src/main.rs
@@ -0,0 +1,14 @@
+// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
+
+use candle_core::{Device, Tensor};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+ let device = Device::Cpu;
+
+ let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
+ let b = Tensor::randn(0f32, 1., (3, 4), &device)?;
+
+ let c = a.matmul(&b)?;
+ println!("{c}");
+ Ok(())
+}