diff --git a/README.md b/README.md index 2cd7d3d..404ce79 100644 --- a/README.md +++ b/README.md @@ -33,11 +33,11 @@ def generate_launch_description(): return LaunchDescription([ create_llama_launch( - n_ctx=512, # context of the LLM in tokens + n_ctx=2048, # context of the LLM in tokens n_batch=8, # batch size in tokens n_gpu_layers=0, # layers to load in GPU n_threads=4, # threads - n_predict=512, # max tokens (prompt tokens + predicted tokens + n_predict=2048, # max tokens (prompt tokens + predicted tokens). -1 == inf model_repo="TheBloke/Marcoroni-7B-v3-GGUF", # Hugging Face repo model_filename="marcoroni-7b-v3.Q4_K_M.gguf", # model file diff --git a/llama_bringup/launch/cybertron.launch.py b/llama_bringup/launch/cybertron.launch.py index fed5563..bfe2a1c 100644 --- a/llama_bringup/launch/cybertron.launch.py +++ b/llama_bringup/launch/cybertron.launch.py @@ -29,11 +29,11 @@ def generate_launch_description(): return LaunchDescription([ create_llama_launch( - n_ctx=512, + n_ctx=2048, n_batch=8, n_gpu_layers=0, n_threads=4, - n_predict=512, + n_predict=2048, model_repo="TheBloke/MetaMath-Cybertron-Starling-GGUF", model_filename="metamath-cybertron-starling.Q4_K_M.gguf", diff --git a/llama_bringup/launch/hermes.launch.py b/llama_bringup/launch/hermes.launch.py index 79bfbbc..e763c1e 100644 --- a/llama_bringup/launch/hermes.launch.py +++ b/llama_bringup/launch/hermes.launch.py @@ -29,11 +29,11 @@ def generate_launch_description(): return LaunchDescription([ create_llama_launch( - n_ctx=512, + n_ctx=2048, n_batch=8, n_gpu_layers=0, n_threads=4, - n_predict=512, + n_predict=2048, model_repo="TheBloke/OpenHermes-2.5-neural-chat-v3-3-Slerp-GGUF", model_filename="openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf", diff --git a/llama_bringup/launch/marcoroni.launch.py b/llama_bringup/launch/marcoroni.launch.py index 0ab4c70..430381f 100644 --- a/llama_bringup/launch/marcoroni.launch.py +++ b/llama_bringup/launch/marcoroni.launch.py @@ -29,11 +29,11 @@ def generate_launch_description(): return LaunchDescription([ create_llama_launch( - n_ctx=512, + n_ctx=2048, n_batch=8, n_gpu_layers=0, n_threads=4, - n_predict=512, + n_predict=2048, model_repo="TheBloke/Marcoroni-7B-v3-GGUF", model_filename="marcoroni-7b-v3.Q4_K_M.gguf", diff --git a/llama_bringup/launch/neural-chat.launch.py b/llama_bringup/launch/neural-chat.launch.py index acb90e3..53b8e3b 100644 --- a/llama_bringup/launch/neural-chat.launch.py +++ b/llama_bringup/launch/neural-chat.launch.py @@ -29,11 +29,11 @@ def generate_launch_description(): return LaunchDescription([ create_llama_launch( - n_ctx=512, + n_ctx=2048, n_batch=8, n_gpu_layers=0, n_threads=4, - n_predict=512, + n_predict=2048, model_repo="janhq/neural-chat-7b-v3-3-slerp-GGUF", model_filename="neural-chat-7b-v3-3-slerp.Q4_K_M.gguf",