Skip to content
This repository has been archived by the owner on Jun 24, 2024. It is now read-only.

Commit

Permalink
Merge branch 'develop' into feat/ggml-update
Browse files Browse the repository at this point in the history
  • Loading branch information
philpax committed Nov 12, 2023
2 parents fcbfb4d + 52c2bb6 commit 5e4b35f
Show file tree
Hide file tree
Showing 16 changed files with 629 additions and 97 deletions.
102 changes: 46 additions & 56 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ members = [
"crates/llm",
"crates/llm-base",
"crates/models/*",
"binaries/*"
"binaries/*",
]
resolver = "2"
default-members = ["binaries/llm-cli", "crates/llm"]
Expand All @@ -27,12 +27,12 @@ anyhow = "1.0"
rustyline = { version = "11.0.0", features = ["derive"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = { version = "1.0" }
spinoff = { version = "0.7.0", default-features = false, features = ["dots2"] }
spinoff = { version = "0.8.0", default-features = false, features = ["dots2"] }
clap = { version = "4.1.8", features = ["derive"] }
memmap2 = "0.5.10"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing = { version = "0.1", features = ["log"] }
llm-samplers = "=0.0.6"
llm-samplers = "=0.0.7"

# Config for 'cargo dist'
[workspace.metadata.dist]
Expand All @@ -45,7 +45,12 @@ ci = ["github"]
# The installers to generate for each app
installers = ["shell", "powershell"]
# Target platforms to build apps for (Rust target-triple syntax)
targets = ["x86_64-unknown-linux-gnu", "x86_64-apple-darwin", "x86_64-pc-windows-msvc", "aarch64-apple-darwin"]
targets = [
"x86_64-unknown-linux-gnu",
"x86_64-apple-darwin",
"x86_64-pc-windows-msvc",
"aarch64-apple-darwin",
]

# The profile that 'cargo dist' will build with
[profile.dist]
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ Absolutely! Please see the [contributing guide](./doc/CONTRIBUTING.md).
inference API on your local machine using `llm`.
- [secondbrain](https://github.com/juliooa/secondbrain): Desktop app to download and run LLMs locally in your computer using `llm`.
- [floneum](https://floneum.com/): A graph editor for local AI workflows.
- [poly](https://github.com/pixelspark/poly): A versatile LLM serving back-end with tasks, streaming completion, memory retrieval, and more.

#### Libraries

Expand Down
15 changes: 12 additions & 3 deletions binaries/llm-cli/src/cli_args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,15 @@ pub struct Generate {
/// top_p - The probability for the top tokens are added until the result is greater or equal to P and at least min_keep tokens have been seen.
/// p(0.95): The cumulative probability after which no more tokens are kept for sampling.
/// min_keep(1): Minimum tokens to keep. Setting this to 0 is not recommended.
///
/// top_a (default: disabled) - This sampler prunes tokens that don't meet a threshold based on the most probable token. The formula is `a1 * pow(max_prob, a2)`. See https://github.com/BlinkDL/RWKV-LM#the-top-a-sampling-method for more information.
/// a1(0.0): Threshold scale. A reasonable value is 0.2. Setting either a1 or a2 to 0 disables the sampler.
/// a2(0.0): Threshold power. A reasonable value is 2.
/// min_keep(1): Minimum tokens to keep. Setting this to 0 is not recommended.
///
/// min_p (default: disabled) - This sampler prunes tokens that don't meet a certain percentage of the most probable token. For example if `p` is `0.05` then after `min_keep` is satisfied, other tokens must be at least 5% of the most probable token. See https://github.com/ggerganov/llama.cpp/issues/3483 for more information.
/// p(0.0): Probability threshold. 0.05 to 0.2 are good starting values to try. Setting this to 0 disables the sampler.
/// min_keep(1): Minimum tokens to keep. Setting this to 0 is not recommended.
#[arg(long = "sampler", short = 's', verbatim_doc_comment)]
pub sampler_options: Vec<String>,

Expand Down Expand Up @@ -533,7 +542,7 @@ impl ModelLoad {
let tokenizer_source = match self.model_and_tokenizer.to_source() {
Ok(vs) => vs,
Err(err) => {
if let Some(sp) = sp.take() {
if let Some(mut sp) = sp.take() {
sp.fail(&format!("Failed to load tokenizer: {}", err));
}
return Err(err);
Expand Down Expand Up @@ -586,7 +595,7 @@ impl ModelLoad {
file_size,
tensor_count,
} => {
if let Some(sp) = sp.take() {
if let Some(mut sp) = sp.take() {
sp.success(&format!(
"Loaded {tensor_count} tensors ({}) after {}ms",
bytesize::to_string(file_size, false),
Expand All @@ -601,7 +610,7 @@ impl ModelLoad {
if model.is_err() {
// If we've failed at loading the model, we probably haven't stopped the spinner yet.
// Cancel it now if needed.
if let Some(sp) = sp {
if let Some(mut sp) = sp {
sp.fail("Failed to load model")
}
}
Expand Down
2 changes: 1 addition & 1 deletion binaries/llm-cli/src/interactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ fn feed_prompt_with_spinner(
prompt.insert(0, '\n');
}

let sp = spinoff::Spinner::new(spinoff::spinners::Dots2, "".to_string(), None);
let mut sp = spinoff::Spinner::new(spinoff::spinners::Dots2, "".to_string(), None);
let result = session.feed_prompt(
model,
&prompt,
Expand Down
10 changes: 5 additions & 5 deletions binaries/llm-test/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,14 @@ fn run_inference(
// Takes the most likely element from the logits, except if they've appeared in `previous_tokens`
// at all
#[derive(Debug, Default)]
struct DeterministicSampler(SampleGreedy<TokenId>);
struct DeterministicSampler(SampleGreedy);

impl Sampler<TokenId, f32> for DeterministicSampler {
impl Sampler for DeterministicSampler {
fn sample<'a>(
&mut self,
res: &mut dyn HasSamplerResources<TokenId = TokenId>,
logits: &'a mut Logits<TokenId, f32>,
) -> anyhow::Result<&'a mut Logits<TokenId, f32>> {
res: &mut dyn HasSamplerResources,
logits: &'a mut Logits,
) -> anyhow::Result<&'a mut Logits> {
let mut flat_bias = Default::default();

// This might look a little weird, but it's necessary because the resource
Expand Down
Loading

0 comments on commit 5e4b35f

Please sign in to comment.