Skip to content

Commit

Permalink
Update CodeLlama configs (and fix a couple Phi3 ones) (#1358)
Browse files Browse the repository at this point in the history
  • Loading branch information
joecummings authored Aug 17, 2024
1 parent 367e9ab commit 8bb3a6f
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 65 deletions.
36 changes: 17 additions & 19 deletions recipes/configs/code_llama2/7B_full_low_memory.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Config for single device full finetuning in full_finetune_single_device.py
# using a Code-Llama2 7B model
# using a CodeLlama 7B model
#
# This config assumes that you've run the following command before launching
# this run:
Expand All @@ -19,22 +19,16 @@
#
# This config works only for training on single device.

# Model arguments
model:
_component_: torchtune.models.code_llama2.code_llama2_7b

# Tokenizer
tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
path: /tmp/CodeLlama-7b-hf/tokenizer.model

# Dataset
dataset:
_component_: torchtune.datasets.alpaca_dataset
seed: null
shuffle: True

# Model Arguments
model:
_component_: torchtune.models.code_llama2.code_llama2_7b

# Checkpointer
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
checkpoint_dir: /tmp/CodeLlama-7b-hf
Expand All @@ -48,32 +42,36 @@ checkpointer:
model_type: LLAMA2
resume_from_checkpoint: False

# Dataset
dataset:
_component_: torchtune.datasets.alpaca_dataset
seed: null
shuffle: True

# Fine-tuning arguments
epochs: 1
max_steps_per_epoch: null
batch_size: 2
epochs: 3
gradient_accumulation_steps: 1
optimizer:
_component_: bitsandbytes.optim.PagedAdamW
lr: 2e-5
optimizer_in_bwd: True
loss:
_component_: torch.nn.CrossEntropyLoss
max_steps_per_epoch: null
gradient_accumulation_steps: 1
compile: False

# Training environment
# Training env
device: cuda

# Memory management
enable_activation_checkpointing: True

# Reduced precision
dtype: bf16

# Logging
output_dir: /tmp/codellama_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/code_llama2_finetune
log_dir: /tmp/CodeLlama-7b-hf/logs
log_every_n_steps: 1
log_peak_memory_stats: False
38 changes: 19 additions & 19 deletions recipes/configs/code_llama2/7B_lora_single_device.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Config for single device full finetuning in full_finetune_single_device.py
# using a Code-Llama2 7B model
# using a CodeLlama 7B model
#
# This config assumes that you've run the following command before launching
# this run:
Expand Down Expand Up @@ -29,13 +29,7 @@ tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
path: /tmp/CodeLlama-7b-hf/tokenizer.model

# Dataset
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
seed: null
shuffle: True


# Checkpointer
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
checkpoint_dir: /tmp/CodeLlama-7b-hf
Expand All @@ -51,45 +45,51 @@ checkpointer:
resume_from_checkpoint: False
save_adapter_weights_only: False

# Dataset
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
seed: null
shuffle: True

# Fine-tuning arguments
batch_size: 2
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 64
compile: False

batch_size: 2
gradient_accumulation_steps: 16
optimizer:
_component_: torch.optim.AdamW
weight_decay: 0.01
lr: 3e-4

lr_scheduler:
_component_: torchtune.modules.get_cosine_schedule_with_warmup
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
compile: False


# Training environment
# Training env
device: cuda

# Memory management
enable_activation_checkpointing: True
dtype: bf16

# Logging
output_dir: /tmp/codellama_lora_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/lora_code_llama2_finetune_output
log_dir: /tmp/CodeLlama-7b-hf/logs
log_every_n_steps: 1
log_peak_memory_stats: False

# Showcase the usage of PyTorch profiler
# Set enabled to False as it's only needed for debugging training
profiler:
_component_: torchtune.utils.setup_torch_profiler
enabled: False

#Output directory of trace artifacts
output_dir: ${output_dir}/profiling_outputs
output_dir: /tmp/CodeLlama-7b-hf/profiling_outputs

#`torch.profiler.ProfilerActivity` types to trace
cpu: True
Expand Down
42 changes: 20 additions & 22 deletions recipes/configs/code_llama2/7B_qlora_single_device.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Config for single device QLoRA finetuning in lora_finetune_single_device.py
# using a Code-Llama2 7B model
# using a CodeLlama 7B model
#
# This config assumes that you've run the following command before launching
# this run:
Expand All @@ -18,8 +18,8 @@
# Model Arguments
model:
_component_: torchtune.models.code_llama2.qlora_code_llama2_7b
lora_attn_modules: ['q_proj', 'v_proj']
apply_lora_to_mlp: False
lora_attn_modules: ['q_proj', 'v_proj', 'k_proj', 'output_proj']
apply_lora_to_mlp: True
apply_lora_to_output: False
lora_rank: 8
lora_alpha: 16
Expand All @@ -29,13 +29,7 @@ tokenizer:
_component_: torchtune.models.llama2.llama2_tokenizer
path: /tmp/CodeLlama-7b-hf/tokenizer.model

# Dataset
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
seed: null
shuffle: True


# Checkpointer
checkpointer:
_component_: torchtune.utils.FullModelHFCheckpointer
checkpoint_dir: /tmp/CodeLlama-7b-hf
Expand All @@ -51,36 +45,40 @@ checkpointer:
resume_from_checkpoint: False
save_adapter_weights_only: False

# Fine-tuning arguments and training
batch_size: 2
# Dataset
dataset:
_component_: torchtune.datasets.alpaca_cleaned_dataset
seed: null
shuffle: True

# Fine-tuning arguments
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 64
compile: False

batch_size: 2
gradient_accumulation_steps: 16
optimizer:
_component_: torch.optim.AdamW
weight_decay: 0.01
lr: 3e-4

lr_scheduler:
_component_: torchtune.modules.get_cosine_schedule_with_warmup
num_warmup_steps: 100

loss:
_component_: torch.nn.CrossEntropyLoss
compile: False


# Training environment
# Training env
device: cuda

# Memory management
enable_activation_checkpointing: True
dtype: bf16

# Logging
output_dir: /tmp/codellama_qlora_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: ${output_dir}
output_dir: /tmp/qlora_code_llama2_finetune_output
log_dir: /tmp/CodeLlama-7b-hf/logs
log_every_n_steps: 1
log_peak_memory_stats: False

Expand All @@ -91,7 +89,7 @@ profiler:
enabled: False

#Output directory of trace artifacts
output_dir: ${output_dir}/profiling_outputs
output_dir: /tmp/CodeLlama-7b-hf/profiling_outputs

#`torch.profiler.ProfilerActivity` types to trace
cpu: True
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/phi3/mini_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ enable_activation_checkpointing: True
dtype: bf16

# Logging
output_dir: /tmp/phi3_full_finetune_output
output_dir: /tmp/phi3_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: /tmp/Phi-3-mini-4k-instruct/logs
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/phi3/mini_full_low_memory.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ enable_activation_checkpointing: True
dtype: bf16

# Logging
output_dir: /tmp/phi3_lora_finetune_output
output_dir: /tmp/phi3_finetune_output
metric_logger:
_component_: torchtune.utils.metric_logging.DiskLogger
log_dir: /tmp/Phi-3-mini-4k-instruct/logs
Expand Down
2 changes: 1 addition & 1 deletion recipes/configs/phi3/mini_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ shuffle: True
epochs: 1
max_steps_per_epoch: null
batch_size: 2
gradient_accumulation_steps: 32
gradient_accumulation_steps: 16
optimizer:
_component_: torch.optim.AdamW
weight_decay: 0.01
Expand Down
4 changes: 2 additions & 2 deletions recipes/configs/phi3/mini_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ shuffle: True
# Fine-tuning arguments
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 64
batch_size: 2
gradient_accumulation_steps: 16
optimizer:
_component_: torch.optim.AdamW
weight_decay: 0.01
Expand All @@ -69,8 +69,8 @@ compile: False
device: cuda

# Memory management
dtype: bf16
enable_activation_checkpointing: True
dtype: bf16

# Logging
output_dir: /tmp/phi3_lora_finetune_output
Expand Down

0 comments on commit 8bb3a6f

Please sign in to comment.