Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add gpt2 based llama2 #529

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions projects/Llama/configs/llama_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from omegaconf import DictConfig, OmegaConf

from libai.config import LazyCall
from projects.Llama.llama import LlamaForCausalLM
from projects.Llama.llama_gpt import LlamaForCausalLM

# from projects.Llama.llama import LlamaForCausalLM
from projects.Llama.tokenizer import LlamaTokenizer
from configs.common.train import train

Expand All @@ -21,7 +23,7 @@
tie_word_embeddings=False,
vocab_size=32000,
use_scaled_init_for_output_weights=False,
scale_mask_softmax_fusion=False,
scale_mask_softmax_fusion=True,
amp_enabled=True,
# Inference
is_encoder_decoder=False,
Expand All @@ -48,7 +50,7 @@
eos_token_id=2,
pad_token_id=0,
# train
pretrained_model_path="meta-llama/Llama-2-7b-hf",
pretrained_model_path="Llama-2-7b-hf",
)

cfg = DictConfig(cfg)
Expand All @@ -57,5 +59,5 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(LlamaTokenizer)(
pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
pretrained_model_path="Llama-2-7b-hf/tokenizer.model"
)
10 changes: 5 additions & 5 deletions projects/Llama/configs/llama_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
from projects.Llama.configs.llama_config import cfg
from projects.Llama.dataset import AlpacaDataset
from projects.Llama.tokenizer import LlamaTokenizer
from projects.Llama.llama import LlamaForCausalLM
from projects.Llama.llama_gpt import LlamaForCausalLM


# Hyperparameters
weight_decay = 0.1
learning_rate = 5e-5
dataset_path = "alpaca_data"
pretrained_model_path = "meta-llama/Llama-2-7b-hf"
dataset_path = "/home/lixin/Data/alpaca"
pretrained_model_path = "/data/hf_models/Llama-2-7b-hf"

# graph & optim
graph["enabled"] = False
Expand Down Expand Up @@ -68,7 +68,7 @@
train_iter=1,
log_period=10,
warmup_ratio=1 / 3,
num_accumulation_steps=8,
num_accumulation_steps=1,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
Expand All @@ -79,7 +79,7 @@
dist=dict(
data_parallel_size=1,
tensor_parallel_size=1,
pipeline_parallel_size=8,
pipeline_parallel_size=4,
pipeline_num_layers=cfg.hidden_layers,
),
evaluation=dict(
Expand Down
1 change: 1 addition & 0 deletions projects/Llama/images/LLamaLayer.drawio
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<mxfile host="Electron" modified="2024-01-22T04:36:07.699Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.0 Chrome/100.0.4896.160 Electron/18.3.2 Safari/537.36" etag="yhvSpiKs_DXS7XiJI6U_" version="19.0.0" type="device"><diagram id="acZnmB1dkhpwk139XDq4" name="第 1 页">7V1bc5s4FP41nn2qxwIE+LG5tH1IZzubndntvmTUoNg0YHmx3MT76xdiCWMkYtEgkHDHk6mRAZvznfOdi3TUiXuZPn/M0Hr5mUQ4mTiz6HniXk0cB8zcMP+nGNntR3xvvh9YZHHETjoM3Mb/YX4lG93GEd4cnUgJSWi8Ph68J6sVvqdHYyjLyNPxaQ8kOf7WNVpgYeD2HiXi6F9xRJf70dAJDuOfcLxY8m8GPnu+FPGT2ZNsligiT5Uh93riXmaE0P279PkSJ4XwuFz2131o+LT8YRleUZUL/nl8ysL1jQcpvfwdzf6Y/Y3cdz67zQ+UbNkTs19Ld1wEGdmuIlzcBUzci6dlTPHtGt0Xnz7loOdjS5om7OMIbZbluRuakcdSbvkTX4g/mv8AnFH8XBliD/ERkxTTbJefwj+FTKA7rilwf/x0wCeYsXOWFWxcPoiYTizKex/Elr9hkmsjxdNCXORSXKs/fanQ6Bu/w+xVqfg1oYQzQSi+RCaONpl4gkyWcRTh1cTxk/zLL75l+bsFfdESRHMTb1a72Wm1EyX7KlDKyiaqFuxVilDULHiRP+3ln/nf1QReCWLLH4wey2ZvhZckIVk+siKr/MyLhzhJakMoiRer/PA+lyHOxy8KMcU5Fb5nH6Q5gMXXSME4huuBrOgt+1ESpnojOFzJnWDqBJVXqKTz0JtCr/KCnibwguFpofQ+XGSuGi8EuhQ6FGTy7xaz++q2/qBBqm+0fm2ymjcZ/ydOAL+NiwHUAWLq7NVNWUm5wXw6m1dfriYAeRBrEgOUx0MxABBDzkfcDwGUeNjCAMA5NwpoAZElHCAGwoNzgAuG5gAxrt2/7YUFvAa5GssCkvxy5CygDpElLBAI6OBogblcSEaXZEFWKLk+jNbkeDjnhpA1A/Q7pnTHqmZoS8kx3LmQs93f7PqXg6/FwRTyw6vn6odXO3bUSEUbss3u8SvPyXCjKFvg1wBmaUAhg1fRzXCCaPzjuBgnQ+fl0vdZhnaVE9YkXtFN5c5fioGDXYN5LTZiavNB8Xzgw5pm7H/BQU/KR3mD6oQ2qQ5+jmlxGZjOIDvcX+gGITs+XFkcVC/8grM4F1fBOD3pIQ9Gu1PEt9HE3Case4AHjoInXK8HnnDEZDOJVxhlhsYF6vFu81SAV8NFsbrl6orUJLH9KO03VLRfR4/9Cgbn1ac/mGMub7H/peyqGshd2J4r2F5G8u/c3a3J5g6n33J4R2uFrlsTvje0FRqQcnv1WcqhC++OmHL3V3l3bEu5nbNLuVtAZEfK7Rgw/VanxsGL7444/9ZX8d2xbfrNObv5txYQ2cEBLhDQGWU8zssYpwPyzgtvb8PHMY+jB58cccVkor/JkRIRW1jalQT7B5a+HSFLt4DIEpaGZ8LSUJGlOQOYwtJiMrRGG3onixzNMCt1d9HsFkL/yC0MXlt0xXzmBQW5bxgNDk7gHbtnxRRKHw5iCtV7zFKvLg2eV7pirtRXXlniYUvEwsujZxSxqENkR8TinUle6ahO9HD7NyRi8SR55TWczGeT8HJy7U0uwkn43lAD68RnQrNiF09MaXv3mbA+I6PYIqXPD4iZK6J01YvTLAGxxmk2dkLxYuztuHxmC4Qs8Zn+ID6Tr0ebVBej8aVpDUvR+vezqpUBzhmm+Nlh1iZagSkPiWzDFErSk759tW+ar4Zi01JvvroExBZfDSXR96h9dQuE7PDV0P3F66fo+iSvc84whdcNWA8D6w0UkrrlXKL4UBtVicXce7TdoOLCFG0e+yH41itj3sGpyBgh7FNwjYtjwEg5vjVI4ZGqu2AagsrLUWN8d+q71VeoCU/+5WOvaELViqZh7O0bUEELvJ+LyvVtj+OLJTSypest7YW3fU1FNI3yOllFG9vUUwuMmnYVUovNhV2FdEEoRiwb8kBT9CzurJUimm4TQ9FUJ6rmMkGt7cfx1XaA0jbPwTf/+5U3NXP1Sc8bOF17XnZprWksrHd1ht7Un0MfOiBw58Ct3VBzCxmvxlRd2d06I9/Ha75BOD8GAPrDmm+gsBum7iArDH4yyALapCKW83oMsoLWajV0kBVIQvXRbtrYAp6yE7Ke06rFV/K9HrtHzxvEhfefCKu2jPidT0+9XNp2D4agvo8deH0PBuF8xqNa92AIhlnJbkX8xxe126pwtU08DFE4cWn+521C4yVG0XtKc5BiIs5Smuhu2PFex4HfTYQJasGUL1ZdQSApm+uLMMVJGBRFhgLUAQD+rM7aA69EDBRW7xu34f3xak5PFjHNpVqsTYqS2Z++E6Va7QcCtThSX9wfihm8AVvel1BZkz+Fkix8vPmTOjyW7HoPJL1ANzcoRTdol0vYBug0xSI1GpesfQBAQuPaUl3gGVDvqu9NPDyPA0n3iAFEfkDLGiYHsr6P0VJ5G4Bs4XLPqmqYMmuf3uCQSf30DqWcLAxZJwIkjTLJ3vHOViRLTbY3hpymtAmGEuPqM/cHngErMN0wMM7diim5Ee5W035lOiXZuChzlO629W5lxrtbPikz3vmD7lx06XkVfLRZu54BSYNMyqv2xRfKOosMMtRu/bRTW4YD56JHklU3HW00CocJepkVHizva+WTU1Y4dWDVEEELM3wVx1GboTh5Zu5kjA7L44s3mOUFkliwZ8szIEKGvPnNnAhZ0qVkQoTcviVm8Ai5uW1pjBFyC4BsiZD9YSLkDqLdt7pZ1calki1McbN80cXZV6Q8P5iaVpMyoq/MvCkgSWOZCR7Xvo4z0NxyNkaP20G7mXEe16r/urJDj6vcKgy6X7H6RsjEfCFN1ibbWbeO1q81KhvgZgdtDJxNg5+qKQ1WUrLW8iRdfGdVUvJ8UAtxhy8qmdjVZ0CIK2nrMyHEta/fD5xVw18bgGwJccffttWhc1bu4wfdN/I3wJwfZoTQamtVru7LzyTCxRn/Aw==</diagram></mxfile>
4 changes: 4 additions & 0 deletions projects/Llama/images/LLamaLayer.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading