From 9b77d17ee5cf7c2129701f46150d1dfb1c52f4f0 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 08:30:00 +0000 Subject: [PATCH 01/15] update llama for multi devices --- projects/Llama/pipeline.py | 59 +++++++++++++--------------- projects/Llama/tokenizer.py | 4 +- projects/Llama/utils/llama_loader.py | 4 ++ 3 files changed, 34 insertions(+), 33 deletions(-) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index bea4a2f56..99c5153e8 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import click from libai.inference.basic import BasePipeline from libai.utils import distributed as dist @@ -30,11 +31,7 @@ def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"): if mode == "huggingface": from projects.Llama.utils.llama_loader import LlamaLoaderHuggerFace - model_loader = LlamaLoaderHuggerFace( - libai_cfg_model, - libai_cfg_model.cfg, - model_path, - ) + model_loader = LlamaLoaderHuggerFace(libai_cfg_model, libai_cfg_model.cfg, model_path,) model = model_loader.load() model.eval() return model @@ -42,11 +39,7 @@ def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"): elif mode == "libai": from projects.Llama.utils.llama_loader import LlamaLoaderLiBai - model_loader = LlamaLoaderLiBai( - libai_cfg_model, - libai_cfg_model.cfg, - model_path, - ) + model_loader = LlamaLoaderLiBai(libai_cfg_model, libai_cfg_model.cfg, model_path,) model = model_loader.load() model.eval() return model @@ -67,7 +60,7 @@ def _parse_parameters(self, **pipeline_parameters): def preprocess(self, inputs, **kwargs) -> dict: # tokenizer encoderW - inputs = self.tokenizer.tokenize(inputs, add_bos=True, padding=True) + inputs = self.tokenizer.tokenize(inputs, add_bos=True, padding=True, device=self.device) inputs = { "input_ids": inputs, } @@ -87,31 +80,31 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: return records -if __name__ == "__main__": - # ----- load huggingface checkpoint ----- - # pipeline = TextGenerationPipeline( - # "projects/Llama/configs/llama_config.py", - # data_parallel=1, - # tensor_parallel=1, - # pipeline_parallel=1, - # pipeline_num_layers=32, - # model_path="", - # mode="huggingface", - # ) - - # output = pipeline(inputs=text) - # if dist.is_main_process(): - # print(output) - - # ----- load libai checkpoint ----- +@click.command() +@click.option( + "--config_file", + default="projects/Llama/configs/llama_config.py", + help="Path to the configuration file.", +) +@click.option("--model_path", default="", help="Path to the model checkpoint.") +@click.option( + "--mode", + default="libai", + help="Mode for the dataloader pipeline, e.g., 'libai' or 'huggingface'.", +) +@click.option( + "--device", default="cuda", help="Device to run the model on, e.g., 'cuda', 'xpu', 'npu'." +) +def main(config_file, model_path, mode, device): pipeline = TextGenerationPipeline( - "projects/Llama/configs/llama_config.py", + config_file, data_parallel=1, tensor_parallel=1, pipeline_parallel=1, pipeline_num_layers=32, - model_path="", - mode="libai", + model_path=model_path, + mode=mode, + device=device, ) text = [ @@ -120,3 +113,7 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: output = pipeline(inputs=text) if dist.is_main_process(): print(output) + + +if __name__ == "__main__": + main() diff --git a/projects/Llama/tokenizer.py b/projects/Llama/tokenizer.py index 56aca8336..1598a1dbe 100644 --- a/projects/Llama/tokenizer.py +++ b/projects/Llama/tokenizer.py @@ -75,9 +75,9 @@ def tokenize( if add_eos: tokens = [token + [self.eos_token_id] for token in tokens] - if device == "cuda": + if device: sbp = kwargs.get("sbp", dist.get_nd_sbp([flow.sbp.broadcast, flow.sbp.broadcast])) - placement = kwargs.get("placement", flow.placement("cuda", [0])) + placement = kwargs.get("placement", flow.placement(device, [0])) return_token_ids = flow.tensor(tokens, sbp=sbp, placement=placement, dtype=flow.long) else: return_token_ids = flow.tensor(tokens, dtype=flow.long) diff --git a/projects/Llama/utils/llama_loader.py b/projects/Llama/utils/llama_loader.py index 20b9ba258..c46cb480a 100644 --- a/projects/Llama/utils/llama_loader.py +++ b/projects/Llama/utils/llama_loader.py @@ -26,6 +26,8 @@ def __init__(self, model, libai_cfg, pretrained_model_path, **kwargs): self.base_model_prefix_1 = "model" self.base_model_prefix_2 = "model" + if not pretrained_model_path: + self.pretrained_model_path = libai_cfg.pretrained_model_path def _convert_state_dict(self, flow_state_dict, cfg): """Convert state_dict's keys to match model. @@ -104,3 +106,5 @@ class LlamaLoaderLiBai(ModelLoaderLiBai): def __init__(self, model, libai_cfg, pretrained_model_path, **kwargs): super().__init__(model, libai_cfg, pretrained_model_path, **kwargs) self.base_model_prefix_2 = "model" + if not pretrained_model_path: + self.pretrained_model_path = libai_cfg.pretrained_model_path From 87b2c4182aa32650baa88adfcbd33c00cda62031 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 08:31:53 +0000 Subject: [PATCH 02/15] xpu and npu config files --- projects/Llama/configs/llama_config_npu.py | 64 ++++++++++++++++++++++ projects/Llama/configs/llama_config_xpu.py | 64 ++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 projects/Llama/configs/llama_config_npu.py create mode 100644 projects/Llama/configs/llama_config_xpu.py diff --git a/projects/Llama/configs/llama_config_npu.py b/projects/Llama/configs/llama_config_npu.py new file mode 100644 index 000000000..1d959cc2e --- /dev/null +++ b/projects/Llama/configs/llama_config_npu.py @@ -0,0 +1,64 @@ +from omegaconf import DictConfig, OmegaConf + +from libai.config import LazyCall +from projects.Llama.llama import LlamaForCausalLM +from projects.Llama.tokenizer import LlamaTokenizer +from configs.common.train import train + +import oneflow_npu + +cfg = dict( + # Model + hidden_act="silu", + hidden_size=4096, + initializer_range=0.02, + intermediate_size=11008, + max_position_embeddings=2048, + num_attention_heads=32, + hidden_layers=32, + pretraining_tp=1, + rms_norm_eps=1e-05, + rope_scaling=None, + tie_word_embeddings=False, + vocab_size=32000, + use_scaled_init_for_output_weights=False, + scale_mask_softmax_fusion=False, + amp_enabled=True, + # Inference + is_encoder_decoder=False, + max_length=256, + min_length=0, + do_sample=False, + early_stopping=False, + num_beams=1, + num_beam_groups=1, + diversity_penalty=0.0, + temperature=0.9, + top_k=50, + top_p=0.6, + typical_p=1.0, + repetition_penalty=1.0, + length_penalty=1.0, + no_repeat_ngram_size=0, + encoder_no_repeat_ngram_size=0, + num_return_sequences=1, + chunk_size_feed_forward=0, + output_scores=False, + use_cache=True, + bos_token_id=1, + eos_token_id=2, + pad_token_id=0, + # train + #pretrained_model_path="meta-llama/Llama-2-7b-hf", + pretrained_model_path="/root/models/Llama-2-7b-chat-hf", +) + +cfg = DictConfig(cfg) + +model = LazyCall(LlamaForCausalLM)(cfg=cfg) +tokenization = OmegaConf.create() +tokenization.make_vocab_size_divisible_by = 1 +tokenization.tokenizer = LazyCall(LlamaTokenizer)( + #pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" + pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model" +) diff --git a/projects/Llama/configs/llama_config_xpu.py b/projects/Llama/configs/llama_config_xpu.py new file mode 100644 index 000000000..48f103293 --- /dev/null +++ b/projects/Llama/configs/llama_config_xpu.py @@ -0,0 +1,64 @@ +from omegaconf import DictConfig, OmegaConf + +from libai.config import LazyCall +from projects.Llama.llama import LlamaForCausalLM +from projects.Llama.tokenizer import LlamaTokenizer +from configs.common.train import train + +import oneflow_xpu + +cfg = dict( + # Model + hidden_act="silu", + hidden_size=4096, + initializer_range=0.02, + intermediate_size=11008, + max_position_embeddings=2048, + num_attention_heads=32, + hidden_layers=32, + pretraining_tp=1, + rms_norm_eps=1e-05, + rope_scaling=None, + tie_word_embeddings=False, + vocab_size=32000, + use_scaled_init_for_output_weights=False, + scale_mask_softmax_fusion=False, + amp_enabled=True, + # Inference + is_encoder_decoder=False, + max_length=256, + min_length=0, + do_sample=False, + early_stopping=False, + num_beams=1, + num_beam_groups=1, + diversity_penalty=0.0, + temperature=0.9, + top_k=50, + top_p=0.6, + typical_p=1.0, + repetition_penalty=1.0, + length_penalty=1.0, + no_repeat_ngram_size=0, + encoder_no_repeat_ngram_size=0, + num_return_sequences=1, + chunk_size_feed_forward=0, + output_scores=False, + use_cache=True, + bos_token_id=1, + eos_token_id=2, + pad_token_id=0, + # train + #pretrained_model_path="meta-llama/Llama-2-7b-hf", + pretrained_model_path="/root/models/Llama-2-7b-chat-hf", +) + +cfg = DictConfig(cfg) + +model = LazyCall(LlamaForCausalLM)(cfg=cfg) +tokenization = OmegaConf.create() +tokenization.make_vocab_size_divisible_by = 1 +tokenization.tokenizer = LazyCall(LlamaTokenizer)( + #pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" + pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model" +) From f87b71331ed339f50277e9123583954433a54d17 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 08:35:22 +0000 Subject: [PATCH 03/15] update device for inference --- libai/inference/basic.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/libai/inference/basic.py b/libai/inference/basic.py index 94d3f1781..53933e094 100644 --- a/libai/inference/basic.py +++ b/libai/inference/basic.py @@ -43,6 +43,7 @@ def __init__( pipeline_num_layers=None, model_path=None, mode="libai", + device="cuda", **kwargs, ): # init cfg @@ -60,6 +61,7 @@ def __init__( pipeline_stage_id, pipeline_num_layers, ) + self.device = device dist.setup_dist_util(self.cfg.train.dist) logger.info(self.cfg.train.dist) @@ -104,10 +106,7 @@ def update_cfg( ), "cfg.train.dist.pipeline_num_layers must be set when run pipeline parallel" def load_pretrain_weight( - self, - libai_cfg_model, - model_path, - mode="libai", + self, libai_cfg_model, model_path, mode="libai", ): """load pretrained model. @@ -167,7 +166,9 @@ def to_local(self, model_outputs_dict): for key, value in model_outputs_dict.items(): if isinstance(value, flow.Tensor) and value.is_global: model_outputs_dict[key] = dist.ttol( - value, ranks=[0] if value.placement.ranks.ndim == 1 else [[0]] + value, + device=self.device, + ranks=[0] if value.placement.ranks.ndim == 1 else [[0]], ) if flow.cuda.is_available(): dist.synchronize() From 9ba5a652a6983d7ec3cac81cb5a556c205d15a04 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 09:22:39 +0000 Subject: [PATCH 04/15] update --- libai/inference/basic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libai/inference/basic.py b/libai/inference/basic.py index 53933e094..e923bda99 100644 --- a/libai/inference/basic.py +++ b/libai/inference/basic.py @@ -62,6 +62,8 @@ def __init__( pipeline_num_layers, ) self.device = device + if device: + self.cfg.train.dist.device_type = device dist.setup_dist_util(self.cfg.train.dist) logger.info(self.cfg.train.dist) From 336c48122b7f4e6953d4c63e345a692068345f03 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 10:07:42 +0000 Subject: [PATCH 05/15] update --- projects/Llama/configs/llama_config_npu.py | 4 ++-- projects/Llama/pipeline.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/projects/Llama/configs/llama_config_npu.py b/projects/Llama/configs/llama_config_npu.py index 1d959cc2e..5bdb059a9 100644 --- a/projects/Llama/configs/llama_config_npu.py +++ b/projects/Llama/configs/llama_config_npu.py @@ -50,7 +50,7 @@ pad_token_id=0, # train #pretrained_model_path="meta-llama/Llama-2-7b-hf", - pretrained_model_path="/root/models/Llama-2-7b-chat-hf", + pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf", ) cfg = DictConfig(cfg) @@ -60,5 +60,5 @@ tokenization.make_vocab_size_divisible_by = 1 tokenization.tokenizer = LazyCall(LlamaTokenizer)( #pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" - pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model" + pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf/tokenizer.model" ) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index 99c5153e8..5c5acc357 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -96,6 +96,8 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: "--device", default="cuda", help="Device to run the model on, e.g., 'cuda', 'xpu', 'npu'." ) def main(config_file, model_path, mode, device): + if model_path: + print("Note: The '--model_path' option is for the model checkpoint only. Please configure 'tokenization.tokenizer.pretrained_model_path' directly in the config file.") pipeline = TextGenerationPipeline( config_file, data_parallel=1, From 24e9c1af0a45caaa946e43aa383d8b3538435429 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 10:16:00 +0000 Subject: [PATCH 06/15] update README --- projects/Llama/{readme.md => README.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename projects/Llama/{readme.md => README.md} (100%) diff --git a/projects/Llama/readme.md b/projects/Llama/README.md similarity index 100% rename from projects/Llama/readme.md rename to projects/Llama/README.md From 032664a65ac76b945e922f603695c9cf59ddb307 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Tue, 3 Sep 2024 10:25:34 +0000 Subject: [PATCH 07/15] update --- projects/Llama/README.md | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/projects/Llama/README.md b/projects/Llama/README.md index 9adb3d925..8c6dd862d 100644 --- a/projects/Llama/README.md +++ b/projects/Llama/README.md @@ -44,4 +44,17 @@ python projects/Llama/utils/eval_adapter.py - Adjust the parameters in the `projects/Llama/pipeline.py`, and running: ```bash bash tools/infer.sh projects/Llama/pipeline.py 8 -``` \ No newline at end of file +``` + +## npu/xpu example + +- npu +```bash +python projects/Llama/pipeline.py --device=npu --mode=huggingface --config_file=projects/Llama/configs/llama_config_npu.py +``` + +- xpu +```bash +python projects/Llama/pipeline.py --device=xpu --mode=huggingface --config_file=projects/Llama/configs/llama_config_xpu.py +``` + From 6f921cb899f4638e767be5cf3450c28d33f5bfc6 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 4 Sep 2024 06:59:02 +0000 Subject: [PATCH 08/15] format --- libai/inference/basic.py | 5 ++++- projects/Llama/configs/llama_config_npu.py | 4 ++-- projects/Llama/configs/llama_config_xpu.py | 4 ++-- projects/Llama/pipeline.py | 16 +++++++++++++--- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/libai/inference/basic.py b/libai/inference/basic.py index e923bda99..752f4578a 100644 --- a/libai/inference/basic.py +++ b/libai/inference/basic.py @@ -108,7 +108,10 @@ def update_cfg( ), "cfg.train.dist.pipeline_num_layers must be set when run pipeline parallel" def load_pretrain_weight( - self, libai_cfg_model, model_path, mode="libai", + self, + libai_cfg_model, + model_path, + mode="libai", ): """load pretrained model. diff --git a/projects/Llama/configs/llama_config_npu.py b/projects/Llama/configs/llama_config_npu.py index 5bdb059a9..e06ca1bee 100644 --- a/projects/Llama/configs/llama_config_npu.py +++ b/projects/Llama/configs/llama_config_npu.py @@ -49,7 +49,7 @@ eos_token_id=2, pad_token_id=0, # train - #pretrained_model_path="meta-llama/Llama-2-7b-hf", + # pretrained_model_path="meta-llama/Llama-2-7b-hf", pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf", ) @@ -59,6 +59,6 @@ tokenization = OmegaConf.create() tokenization.make_vocab_size_divisible_by = 1 tokenization.tokenizer = LazyCall(LlamaTokenizer)( - #pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" + # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf/tokenizer.model" ) diff --git a/projects/Llama/configs/llama_config_xpu.py b/projects/Llama/configs/llama_config_xpu.py index 48f103293..0f9fa66c2 100644 --- a/projects/Llama/configs/llama_config_xpu.py +++ b/projects/Llama/configs/llama_config_xpu.py @@ -49,7 +49,7 @@ eos_token_id=2, pad_token_id=0, # train - #pretrained_model_path="meta-llama/Llama-2-7b-hf", + # pretrained_model_path="meta-llama/Llama-2-7b-hf", pretrained_model_path="/root/models/Llama-2-7b-chat-hf", ) @@ -59,6 +59,6 @@ tokenization = OmegaConf.create() tokenization.make_vocab_size_divisible_by = 1 tokenization.tokenizer = LazyCall(LlamaTokenizer)( - #pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" + # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model" ) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index 5c5acc357..f0cd302de 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -31,7 +31,11 @@ def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"): if mode == "huggingface": from projects.Llama.utils.llama_loader import LlamaLoaderHuggerFace - model_loader = LlamaLoaderHuggerFace(libai_cfg_model, libai_cfg_model.cfg, model_path,) + model_loader = LlamaLoaderHuggerFace( + libai_cfg_model, + libai_cfg_model.cfg, + model_path, + ) model = model_loader.load() model.eval() return model @@ -39,7 +43,11 @@ def load_pretrain_weight(self, libai_cfg_model, model_path, mode="huggingface"): elif mode == "libai": from projects.Llama.utils.llama_loader import LlamaLoaderLiBai - model_loader = LlamaLoaderLiBai(libai_cfg_model, libai_cfg_model.cfg, model_path,) + model_loader = LlamaLoaderLiBai( + libai_cfg_model, + libai_cfg_model.cfg, + model_path, + ) model = model_loader.load() model.eval() return model @@ -97,7 +105,9 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: ) def main(config_file, model_path, mode, device): if model_path: - print("Note: The '--model_path' option is for the model checkpoint only. Please configure 'tokenization.tokenizer.pretrained_model_path' directly in the config file.") + print( + "Note: The '--model_path' option is for the model checkpoint only. Please configure 'tokenization.tokenizer.pretrained_model_path' directly in the config file." + ) pipeline = TextGenerationPipeline( config_file, data_parallel=1, From a238a4b78063bee3e0e9929060f93113740d1243 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 4 Sep 2024 07:07:40 +0000 Subject: [PATCH 09/15] format --- projects/Llama/pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index f0cd302de..2374b0ee4 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -14,6 +14,7 @@ # limitations under the License. import click + from libai.inference.basic import BasePipeline from libai.utils import distributed as dist From d4bd6dbcfd7632ea4933ede833ab802d60f7961d Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 4 Sep 2024 07:17:41 +0000 Subject: [PATCH 10/15] fix --- projects/Llama/pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index 2374b0ee4..3014f6d40 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -107,7 +107,9 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: def main(config_file, model_path, mode, device): if model_path: print( - "Note: The '--model_path' option is for the model checkpoint only. Please configure 'tokenization.tokenizer.pretrained_model_path' directly in the config file." + "Note: The '--model_path' option is for the model checkpoint only. " + "Please configure 'tokenization.tokenizer.pretrained_model_path' " + "directly in the config file." ) pipeline = TextGenerationPipeline( config_file, From 593937f553e0181c6ac5c65d57450c116c2e6901 Mon Sep 17 00:00:00 2001 From: Qunhong Zeng <871206929@qq.com> Date: Wed, 4 Sep 2024 16:41:18 +0800 Subject: [PATCH 11/15] feat: support third-party oneflow device extension (#549) * feat: support third-party device oneflow extentions also, refactor the build process of model and tokenizer using pretrained_model_path cofnig * refactor: remove unnecessary config and warnings * docs: update readme for commands to run llama on npu and xpu --- libai/inference/basic.py | 20 ++++++- libai/utils/distributed.py | 16 ++++++ projects/Llama/README.md | 4 +- projects/Llama/configs/llama_config.py | 2 +- projects/Llama/configs/llama_config_npu.py | 64 ---------------------- projects/Llama/configs/llama_config_xpu.py | 64 ---------------------- projects/Llama/pipeline.py | 8 +-- 7 files changed, 38 insertions(+), 140 deletions(-) delete mode 100644 projects/Llama/configs/llama_config_npu.py delete mode 100644 projects/Llama/configs/llama_config_xpu.py diff --git a/libai/inference/basic.py b/libai/inference/basic.py index 752f4578a..4de70d010 100644 --- a/libai/inference/basic.py +++ b/libai/inference/basic.py @@ -16,6 +16,7 @@ import logging from abc import ABCMeta, abstractmethod from typing import Any, Dict +from pathlib import Path import oneflow as flow @@ -62,12 +63,20 @@ def __init__( pipeline_num_layers, ) self.device = device - if device: - self.cfg.train.dist.device_type = device + self.cfg.train.dist.device_type = device dist.setup_dist_util(self.cfg.train.dist) logger.info(self.cfg.train.dist) # initial and load model + self.model_path = model_path + if self.model_path is not None: + # If a model_path is provided in BasePipeline, + # we use it with priority, overwrite the pretrained_model_path in config + self.cfg.model.cfg.pretrained_model_path = self.model_path + else: + # If the model_path in BasePipeline is None, then use the one from the config + assert "pretrained_model_path" in self.cfg.model.cfg + self.model_path = self.cfg.model.cfg.pretrained_model_path self.model = self.load_pretrain_weight(self.cfg.model, model_path, mode=mode) self.model._apply(dist.convert_to_distributed_default_setting) @@ -138,6 +147,13 @@ def load_pretrain_weight( def build_tokenizer(self, cfg): tokenizer = None if try_get_key(cfg, "tokenization") is not None: + tokenizer_cfg = cfg.tokenization.tokenizer + if "pretrained_model_path" not in tokenizer_cfg: + # If "pretrained_model_path" does not exist in the tokenizer's config, + # set it to default as f"{model_path}/tokenizer.model" + tokenizer_cfg.pretrained_model_path = str( + Path(self.model_path).joinpath("tokenizer.model") + ) tokenizer = DefaultTrainer.build_tokenizer(cfg) return tokenizer diff --git a/libai/utils/distributed.py b/libai/utils/distributed.py index f84313fd7..5fab501de 100644 --- a/libai/utils/distributed.py +++ b/libai/utils/distributed.py @@ -72,6 +72,22 @@ def _init_distributed_env(self, cfg): # Add set device type self._device_type = try_get_key(cfg, "device_type", default="cuda") + if self._device_type == "npu": + try: + import oneflow_npu + except ImportError: + raise ImportError( + "The module 'oneflow_npu' is not installed. Please install it to use NPU devices." + ) + elif self._device_type == "xpu": + try: + import oneflow_xpu + except ImportError: + raise ImportError( + "The module 'oneflow_xpu' is not installed. Please install it to use XPU devices." + ) + elif self._device_type not in ("cuda", "npu", "xpu", "cpu"): + raise NotImplementedError(f"Unsupported device {self._device_type}") def _init_parallel_size(self, cfg): diff --git a/projects/Llama/README.md b/projects/Llama/README.md index 8c6dd862d..f58e416c1 100644 --- a/projects/Llama/README.md +++ b/projects/Llama/README.md @@ -50,11 +50,11 @@ bash tools/infer.sh projects/Llama/pipeline.py 8 - npu ```bash -python projects/Llama/pipeline.py --device=npu --mode=huggingface --config_file=projects/Llama/configs/llama_config_npu.py +python projects/Llama/pipeline.py --device=npu --mode=huggingface --model_path /your/model/path ``` - xpu ```bash -python projects/Llama/pipeline.py --device=xpu --mode=huggingface --config_file=projects/Llama/configs/llama_config_xpu.py +python projects/Llama/pipeline.py --device=xpu --mode=huggingface --model_path /your/model/path ``` diff --git a/projects/Llama/configs/llama_config.py b/projects/Llama/configs/llama_config.py index 01d208016..36f95d126 100644 --- a/projects/Llama/configs/llama_config.py +++ b/projects/Llama/configs/llama_config.py @@ -57,5 +57,5 @@ tokenization = OmegaConf.create() tokenization.make_vocab_size_divisible_by = 1 tokenization.tokenizer = LazyCall(LlamaTokenizer)( - pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" + # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" ) diff --git a/projects/Llama/configs/llama_config_npu.py b/projects/Llama/configs/llama_config_npu.py deleted file mode 100644 index e06ca1bee..000000000 --- a/projects/Llama/configs/llama_config_npu.py +++ /dev/null @@ -1,64 +0,0 @@ -from omegaconf import DictConfig, OmegaConf - -from libai.config import LazyCall -from projects.Llama.llama import LlamaForCausalLM -from projects.Llama.tokenizer import LlamaTokenizer -from configs.common.train import train - -import oneflow_npu - -cfg = dict( - # Model - hidden_act="silu", - hidden_size=4096, - initializer_range=0.02, - intermediate_size=11008, - max_position_embeddings=2048, - num_attention_heads=32, - hidden_layers=32, - pretraining_tp=1, - rms_norm_eps=1e-05, - rope_scaling=None, - tie_word_embeddings=False, - vocab_size=32000, - use_scaled_init_for_output_weights=False, - scale_mask_softmax_fusion=False, - amp_enabled=True, - # Inference - is_encoder_decoder=False, - max_length=256, - min_length=0, - do_sample=False, - early_stopping=False, - num_beams=1, - num_beam_groups=1, - diversity_penalty=0.0, - temperature=0.9, - top_k=50, - top_p=0.6, - typical_p=1.0, - repetition_penalty=1.0, - length_penalty=1.0, - no_repeat_ngram_size=0, - encoder_no_repeat_ngram_size=0, - num_return_sequences=1, - chunk_size_feed_forward=0, - output_scores=False, - use_cache=True, - bos_token_id=1, - eos_token_id=2, - pad_token_id=0, - # train - # pretrained_model_path="meta-llama/Llama-2-7b-hf", - pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf", -) - -cfg = DictConfig(cfg) - -model = LazyCall(LlamaForCausalLM)(cfg=cfg) -tokenization = OmegaConf.create() -tokenization.make_vocab_size_divisible_by = 1 -tokenization.tokenizer = LazyCall(LlamaTokenizer)( - # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" - pretrained_model_path="/data0/hf_models/Llama-2-7b-chat-hf/tokenizer.model" -) diff --git a/projects/Llama/configs/llama_config_xpu.py b/projects/Llama/configs/llama_config_xpu.py deleted file mode 100644 index 0f9fa66c2..000000000 --- a/projects/Llama/configs/llama_config_xpu.py +++ /dev/null @@ -1,64 +0,0 @@ -from omegaconf import DictConfig, OmegaConf - -from libai.config import LazyCall -from projects.Llama.llama import LlamaForCausalLM -from projects.Llama.tokenizer import LlamaTokenizer -from configs.common.train import train - -import oneflow_xpu - -cfg = dict( - # Model - hidden_act="silu", - hidden_size=4096, - initializer_range=0.02, - intermediate_size=11008, - max_position_embeddings=2048, - num_attention_heads=32, - hidden_layers=32, - pretraining_tp=1, - rms_norm_eps=1e-05, - rope_scaling=None, - tie_word_embeddings=False, - vocab_size=32000, - use_scaled_init_for_output_weights=False, - scale_mask_softmax_fusion=False, - amp_enabled=True, - # Inference - is_encoder_decoder=False, - max_length=256, - min_length=0, - do_sample=False, - early_stopping=False, - num_beams=1, - num_beam_groups=1, - diversity_penalty=0.0, - temperature=0.9, - top_k=50, - top_p=0.6, - typical_p=1.0, - repetition_penalty=1.0, - length_penalty=1.0, - no_repeat_ngram_size=0, - encoder_no_repeat_ngram_size=0, - num_return_sequences=1, - chunk_size_feed_forward=0, - output_scores=False, - use_cache=True, - bos_token_id=1, - eos_token_id=2, - pad_token_id=0, - # train - # pretrained_model_path="meta-llama/Llama-2-7b-hf", - pretrained_model_path="/root/models/Llama-2-7b-chat-hf", -) - -cfg = DictConfig(cfg) - -model = LazyCall(LlamaForCausalLM)(cfg=cfg) -tokenization = OmegaConf.create() -tokenization.make_vocab_size_divisible_by = 1 -tokenization.tokenizer = LazyCall(LlamaTokenizer)( - # pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model" - pretrained_model_path="/root/models/Llama-2-7b-chat-hf/tokenizer.model" -) diff --git a/projects/Llama/pipeline.py b/projects/Llama/pipeline.py index 3014f6d40..4b65d2895 100644 --- a/projects/Llama/pipeline.py +++ b/projects/Llama/pipeline.py @@ -95,7 +95,7 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: default="projects/Llama/configs/llama_config.py", help="Path to the configuration file.", ) -@click.option("--model_path", default="", help="Path to the model checkpoint.") +@click.option("--model_path", default=None, help="Path to the model checkpoint.") @click.option( "--mode", default="libai", @@ -105,12 +105,6 @@ def postprocess(self, model_output_dict, **kwargs) -> dict: "--device", default="cuda", help="Device to run the model on, e.g., 'cuda', 'xpu', 'npu'." ) def main(config_file, model_path, mode, device): - if model_path: - print( - "Note: The '--model_path' option is for the model checkpoint only. " - "Please configure 'tokenization.tokenizer.pretrained_model_path' " - "directly in the config file." - ) pipeline = TextGenerationPipeline( config_file, data_parallel=1, From a030a1b0cc8c12827edf458a090bc0c710960859 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 4 Sep 2024 08:45:56 +0000 Subject: [PATCH 12/15] fix import order --- libai/inference/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libai/inference/basic.py b/libai/inference/basic.py index 4de70d010..b869e56cc 100644 --- a/libai/inference/basic.py +++ b/libai/inference/basic.py @@ -15,8 +15,8 @@ import logging from abc import ABCMeta, abstractmethod -from typing import Any, Dict from pathlib import Path +from typing import Any, Dict import oneflow as flow From 942556f157360116797afc6b80998abf5f1e7fc8 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 4 Sep 2024 08:51:51 +0000 Subject: [PATCH 13/15] update --- libai/utils/distributed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libai/utils/distributed.py b/libai/utils/distributed.py index 5fab501de..7fd9996d0 100644 --- a/libai/utils/distributed.py +++ b/libai/utils/distributed.py @@ -77,14 +77,14 @@ def _init_distributed_env(self, cfg): import oneflow_npu except ImportError: raise ImportError( - "The module 'oneflow_npu' is not installed. Please install it to use NPU devices." + "'oneflow_npu' is missing. Install it to use NPU devices." ) elif self._device_type == "xpu": try: import oneflow_xpu except ImportError: raise ImportError( - "The module 'oneflow_xpu' is not installed. Please install it to use XPU devices." + "'oneflow_xpu' is missing. Install it to use NPU devices." ) elif self._device_type not in ("cuda", "npu", "xpu", "cpu"): raise NotImplementedError(f"Unsupported device {self._device_type}") From 8cfd032816a446b3b2283c43597df7d1417fbae5 Mon Sep 17 00:00:00 2001 From: ShawnXuan Date: Wed, 4 Sep 2024 08:54:37 +0000 Subject: [PATCH 14/15] update --- libai/utils/distributed.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/libai/utils/distributed.py b/libai/utils/distributed.py index 7fd9996d0..8b22c8a4c 100644 --- a/libai/utils/distributed.py +++ b/libai/utils/distributed.py @@ -76,16 +76,12 @@ def _init_distributed_env(self, cfg): try: import oneflow_npu except ImportError: - raise ImportError( - "'oneflow_npu' is missing. Install it to use NPU devices." - ) + raise ImportError("'oneflow_npu' is missing. Install it to use NPU devices.") elif self._device_type == "xpu": try: import oneflow_xpu except ImportError: - raise ImportError( - "'oneflow_xpu' is missing. Install it to use NPU devices." - ) + raise ImportError("'oneflow_xpu' is missing. Install it to use NPU devices.") elif self._device_type not in ("cuda", "npu", "xpu", "cpu"): raise NotImplementedError(f"Unsupported device {self._device_type}") From 9aa8b065e2000c15ce1e1919dda7cf0de1da4acc Mon Sep 17 00:00:00 2001 From: 0x404 <871206929@qq.com> Date: Wed, 4 Sep 2024 09:20:11 +0000 Subject: [PATCH 15/15] fix: skip lint on oneflow third-party imports --- libai/utils/distributed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libai/utils/distributed.py b/libai/utils/distributed.py index 8b22c8a4c..f64479210 100644 --- a/libai/utils/distributed.py +++ b/libai/utils/distributed.py @@ -74,12 +74,12 @@ def _init_distributed_env(self, cfg): self._device_type = try_get_key(cfg, "device_type", default="cuda") if self._device_type == "npu": try: - import oneflow_npu + import oneflow_npu # noqa: F401 except ImportError: raise ImportError("'oneflow_npu' is missing. Install it to use NPU devices.") elif self._device_type == "xpu": try: - import oneflow_xpu + import oneflow_xpu # noqa: F401 except ImportError: raise ImportError("'oneflow_xpu' is missing. Install it to use NPU devices.") elif self._device_type not in ("cuda", "npu", "xpu", "cpu"):