batch inference, tp wip

pytorch · Oct 30, 2024 · 682ea2f · 682ea2f
1 parent 93685bc
commit 682ea2f
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 125 deletions.
diff --git a/test/tensor_parallel_example.py b/test/tensor_parallel_example.py
diff --git a/torchtitan/models/llama/__init__.py b/torchtitan/models/llama/__init__.py
@@ -31,7 +31,7 @@
 llama3_configs = {
     "debugmodel": ModelArgs(dim=256, n_layers=8, n_heads=16, rope_theta=500000),
     "3B": ModelArgs(
-        dim=3072, # 256, # 3072,
+        dim=3072,
         n_layers=28,
         n_heads=24,
         n_kv_heads=8,

diff --git a/train_configs/llama3_3b.toml b/train_configs/llama3_3b.toml
@@ -3,7 +3,7 @@
 
 [job]
 dump_folder = "./outputs"
-description = "Llama 3 3B inference"
+description = "Llama 3 3B training"
 
 [profiling]
 enable_profiling = false
@@ -27,7 +27,7 @@ lr = 3e-4
 
 [training]
 batch_size = 1
-seq_len = 1024 # 8192
+seq_len = 8192
 warmup_steps = 200  # lr scheduler warm up
 max_norm = 1.0  # grad norm clipping
 steps = 1000