add QuantFactory's quantized llama-3 models

cameronfyfe · May 6, 2024 · b840f54 · b840f54
1 parent dda0388
commit b840f54
Show file tree

Hide file tree

Showing 2 changed files with 78 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -38,6 +38,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llam
 
 ```present ./scripts/list-cmds.sh github:cameronfyfe/nix-run-ai
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf 
@@ -55,6 +69,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__TheBloke__Llama-2
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-fp16_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-q4_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf 
@@ -72,6 +100,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__TheBloke__Llama-
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-fp16_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__cli__cuda__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-q4_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf 
@@ -89,6 +131,20 @@ nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__TheBloke__Llam
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-fp16_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cpu__HF__microsoft__Phi-3-mini-4k-instruct-gguf__Phi-3-mini-4k-instruct-q4_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q2_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_L_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q3_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q4_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_0_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_1_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_M_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q5_K_S_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q6_K_gguf 
+nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__QuantFactory__Meta-Llama-3-8B-Instruct-GGUF__Meta-Llama-3-8B-Instruct_Q8_0_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__TheBloke__CapybaraHermes-2_5-Mistral-7B-GGUF__capybarahermes-2_5-mistral-7b_Q4_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__TheBloke__Llama-2-13B-chat-GGUF__llama-2-13b-chat_Q5_K_M_gguf 
 nix run github:cameronfyfe/nix-run-ai#llama-cpp__server__cuda__HF__TheBloke__Llama-2-7B-Chat-GGUF__llama-2-7b-chat_Q2_K_gguf 

diff --git a/models.nix b/models.nix
@@ -40,5 +40,27 @@
         };
       };
     };
+    QuantFactory = {
+      "Meta-Llama-3-8B-Instruct-GGUF" = {
+        commit = "071f0830e6b83051d4626c5d95c1b33ebcbedad0";
+        models = {
+          "Meta-Llama-3-8B-Instruct.Q2_K.gguf" = "sha256-fAgV4g8g5fM31h511LIA594yTQjUv2EtX5V/xA/DA3A=";
+          "Meta-Llama-3-8B-Instruct.Q3_K_L.gguf" = "sha256-FBFZGjtAXvRTE+klYOeiiSARSioRpueteaNtm1jMAIQ=";
+          "Meta-Llama-3-8B-Instruct.Q3_K_M.gguf" = "sha256-7Vc34DGAzUXzgrm+mvklEGAUj5vlUjDbf8TTc0vf2PU=";
+          "Meta-Llama-3-8B-Instruct.Q3_K_S.gguf" = "sha256-d0ukIu6sMLI5DnKWBpSzXrp0as2CeFstZEyScW7Uebs=";
+          "Meta-Llama-3-8B-Instruct.Q4_0.gguf" = "sha256-Gd7Zlv5sYCVNx1RNeCJ27/QQRu1Cql8tAAXcRX5cCJU=";
+          "Meta-Llama-3-8B-Instruct.Q4_1.gguf" = "sha256-/T5dxB2bIe4x1hBPHPi5MnAQEZqSvDGEdTpkUS3wmCM=";
+          "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf" = "sha256-ZHorZMvNvmcEMtBQLrslkrNt02TVGp73oTh7ekNleB8=";
+          "Meta-Llama-3-8B-Instruct.Q4_K_S.gguf" = "sha256-U+7/6dAY7dB13LdWL6FFnS70WXYZ8gdVJGdurQFsCj0=";
+          "Meta-Llama-3-8B-Instruct.Q5_0.gguf" = "sha256-BFpYhrzGDCHHsvFLx9+lhL469h8tjyqWMC0OWyBQl8s=";
+          "Meta-Llama-3-8B-Instruct.Q5_1.gguf" = "sha256-t16P5Lp0uwC4qA4J9tj0MCIsBR9AVxwUDVaA4FgFMvY=";
+          "Meta-Llama-3-8B-Instruct.Q5_K_M.gguf" = "sha256-1rT4iaAK/JDhIWu1z4FLFxHtGHBFPH3Yt/FcxgAaBl4=";
+          "Meta-Llama-3-8B-Instruct.Q5_K_S.gguf" = "sha256-T+QXDmnCf+Plij+EVu5SJyELnNhRF/VtLi/p+tTznYU=";
+          "Meta-Llama-3-8B-Instruct.Q6_K.gguf" = "sha256-t7rUVhjip2zB6JoPu5Oiysm/QQ4nphnIAk7W21Oqm0o=";
+          "Meta-Llama-3-8B-Instruct.Q8_0.gguf" = "sha256-jJZqnsJbp74PklLeTmiU3EBSayibaVJRcuNQh7g0UeI=";
+        };
+      };
+    };
   };
 }
+