Skip to content

Commit

Permalink
add QuantFactory's quantized llama-3 models
Browse files Browse the repository at this point in the history
  • Loading branch information
cameronfyfe committed May 6, 2024
1 parent dda0388 commit b840f54
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 0 deletions.
56 changes: 56 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llam

```present ./scripts/list-cmds.sh github:cameronfyfe/nix-run-ai
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf
Expand All @@ -55,6 +69,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__Llama-2
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-fp16_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-q4_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf
Expand All @@ -72,6 +100,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__Llama-
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-fp16_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-q4_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf
Expand All @@ -89,6 +131,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llam
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-fp16_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-q4_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf
nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf
Expand Down
22 changes: 22 additions & 0 deletions models.nix
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,27 @@
};
};
};
QuantFactory = {
"Meta-Llama-3-8B-Instruct-GGUF" = {
commit = "071f0830e6b83051d4626c5d95c1b33ebcbedad0";
models = {
"Meta-Llama-3-8B-Instruct.Q2_K.gguf" = "sha256-fAgV4g8g5fM31h511LIA594yTQjUv2EtX5V/xA/DA3A=";
"Meta-Llama-3-8B-Instruct.Q3_K_L.gguf" = "sha256-FBFZGjtAXvRTE+klYOeiiSARSioRpueteaNtm1jMAIQ=";
"Meta-Llama-3-8B-Instruct.Q3_K_M.gguf" = "sha256-7Vc34DGAzUXzgrm+mvklEGAUj5vlUjDbf8TTc0vf2PU=";
"Meta-Llama-3-8B-Instruct.Q3_K_S.gguf" = "sha256-d0ukIu6sMLI5DnKWBpSzXrp0as2CeFstZEyScW7Uebs=";
"Meta-Llama-3-8B-Instruct.Q4_0.gguf" = "sha256-Gd7Zlv5sYCVNx1RNeCJ27/QQRu1Cql8tAAXcRX5cCJU=";
"Meta-Llama-3-8B-Instruct.Q4_1.gguf" = "sha256-/T5dxB2bIe4x1hBPHPi5MnAQEZqSvDGEdTpkUS3wmCM=";
"Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" = "sha256-ZHorZMvNvmcEMtBQLrslkrNt02TVGp73oTh7ekNleB8=";
"Meta-Llama-3-8B-Instruct.Q4_K_S.gguf" = "sha256-U+7/6dAY7dB13LdWL6FFnS70WXYZ8gdVJGdurQFsCj0=";
"Meta-Llama-3-8B-Instruct.Q5_0.gguf" = "sha256-BFpYhrzGDCHHsvFLx9+lhL469h8tjyqWMC0OWyBQl8s=";
"Meta-Llama-3-8B-Instruct.Q5_1.gguf" = "sha256-t16P5Lp0uwC4qA4J9tj0MCIsBR9AVxwUDVaA4FgFMvY=";
"Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" = "sha256-1rT4iaAK/JDhIWu1z4FLFxHtGHBFPH3Yt/FcxgAaBl4=";
"Meta-Llama-3-8B-Instruct.Q5_K_S.gguf" = "sha256-T+QXDmnCf+Plij+EVu5SJyELnNhRF/VtLi/p+tTznYU=";
"Meta-Llama-3-8B-Instruct.Q6_K.gguf" = "sha256-t7rUVhjip2zB6JoPu5Oiysm/QQ4nphnIAk7W21Oqm0o=";
"Meta-Llama-3-8B-Instruct.Q8_0.gguf" = "sha256-jJZqnsJbp74PklLeTmiU3EBSayibaVJRcuNQh7g0UeI=";
};
};
};
};
}

0 comments on commit b840f54

Please sign in to comment.