-
Notifications
You must be signed in to change notification settings - Fork 200
/
train.py
71 lines (53 loc) · 2.31 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import pathlib
from dataclasses import dataclass, field
from typing import Optional
import torch
import transformers
from peft import get_peft_model_state_dict
from transformers import Trainer
from llmzoo.datasets.datasets import make_supervised_data_module
from llmzoo.models import build_model
from llmzoo.utils import safe_save_model_for_hf_trainer
@dataclass
class ModelArguments:
model_name_or_path: Optional[str] = field(default="facebook/opt-125m")
lora: Optional[bool] = field(default=False)
lora_r: Optional[int] = field(default=16)
lora_alpha: Optional[int] = field(default=16)
lora_dropout: Optional[float] = field(default=0.05)
@dataclass
class DataArguments:
data_path: str = field(default=None, metadata={"help": "Path to the training data."})
@dataclass
class TrainingArguments(transformers.TrainingArguments):
cache_dir: Optional[str] = field(default=None)
optim: str = field(default="adamw_torch")
model_max_length: int = field(
default=512,
metadata={"help": "Maximum sequence length. Sequences will be right padded (and possibly truncated)."},
)
def train():
parser = transformers.HfArgumentParser((ModelArguments, DataArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses()
model, tokenizer = build_model(model_args, training_args)
data_module = make_supervised_data_module(tokenizer=tokenizer, data_args=data_args)
trainer = Trainer(model=model, tokenizer=tokenizer, args=training_args, **data_module)
if model_args.lora:
old_state_dict = model.state_dict
model.state_dict = (
lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))
if torch.__version__ >= "2":
model = torch.compile(model)
if list(pathlib.Path(training_args.output_dir).glob("checkpoint-*")):
trainer.train(resume_from_checkpoint=True)
else:
trainer.train()
trainer.save_state()
if model_args.lora:
model.save_pretrained(os.path.join(training_args.output_dir, "lora"))
tokenizer.save_pretrained(os.path.join(training_args.output_dir, "lora"))
safe_save_model_for_hf_trainer(trainer=trainer, output_dir=training_args.output_dir)
if __name__ == "__main__":
train()