default context size increased to 2048

mgonzs13 · Dec 12, 2023 · 9ddeef2 · 9ddeef2
1 parent 693f616
commit 9ddeef2
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -33,11 +33,11 @@ def generate_launch_description():
 
     return LaunchDescription([
         create_llama_launch(
-            n_ctx=512, # context of the LLM in tokens
+            n_ctx=2048, # context of the LLM in tokens
             n_batch=8, # batch size in tokens
             n_gpu_layers=0, # layers to load in GPU
             n_threads=4, # threads
-            n_predict=512, # max tokens (prompt tokens + predicted tokens
+            n_predict=2048, # max tokens (prompt tokens + predicted tokens). -1 == inf
 
             model_repo="TheBloke/Marcoroni-7B-v3-GGUF", # Hugging Face repo
             model_filename="marcoroni-7b-v3.Q4_K_M.gguf", # model file

diff --git a/llama_bringup/launch/cybertron.launch.py b/llama_bringup/launch/cybertron.launch.py
@@ -29,11 +29,11 @@ def generate_launch_description():
 
     return LaunchDescription([
         create_llama_launch(
-            n_ctx=512,
+            n_ctx=2048,
             n_batch=8,
             n_gpu_layers=0,
             n_threads=4,
-            n_predict=512,
+            n_predict=2048,
 
             model_repo="TheBloke/MetaMath-Cybertron-Starling-GGUF",
             model_filename="metamath-cybertron-starling.Q4_K_M.gguf",

diff --git a/llama_bringup/launch/hermes.launch.py b/llama_bringup/launch/hermes.launch.py
@@ -29,11 +29,11 @@ def generate_launch_description():
 
     return LaunchDescription([
         create_llama_launch(
-            n_ctx=512,
+            n_ctx=2048,
             n_batch=8,
             n_gpu_layers=0,
             n_threads=4,
-            n_predict=512,
+            n_predict=2048,
 
             model_repo="TheBloke/OpenHermes-2.5-neural-chat-v3-3-Slerp-GGUF",
             model_filename="openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",

diff --git a/llama_bringup/launch/marcoroni.launch.py b/llama_bringup/launch/marcoroni.launch.py
@@ -29,11 +29,11 @@ def generate_launch_description():
 
     return LaunchDescription([
         create_llama_launch(
-            n_ctx=512,
+            n_ctx=2048,
             n_batch=8,
             n_gpu_layers=0,
             n_threads=4,
-            n_predict=512,
+            n_predict=2048,
 
             model_repo="TheBloke/Marcoroni-7B-v3-GGUF",
             model_filename="marcoroni-7b-v3.Q4_K_M.gguf",

diff --git a/llama_bringup/launch/neural-chat.launch.py b/llama_bringup/launch/neural-chat.launch.py
@@ -29,11 +29,11 @@ def generate_launch_description():
 
     return LaunchDescription([
         create_llama_launch(
-            n_ctx=512,
+            n_ctx=2048,
             n_batch=8,
             n_gpu_layers=0,
             n_threads=4,
-            n_predict=512,
+            n_predict=2048,
 
             model_repo="janhq/neural-chat-7b-v3-3-slerp-GGUF",
             model_filename="neural-chat-7b-v3-3-slerp.Q4_K_M.gguf",