使用Qwen2___5-Coder-7B-Instruct进行微调，参数如下，出现如下报错，求助求助！！！！ #293

ychuest · 2024-09-30T05:52:50Z

CUDA_VISIBLE_DEVICES=0 python /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/train/sft_train.py
--model_name_or_path /home/ubuntu/.cache/modelscope/hub/qwen/Qwen2___5-Coder-7B-Instruct
--do_train
--dataset example_text2sql_train
--max_source_length 2048
--max_target_length 512
--template qwen2_5
--finetuning_type lora
--lora_rank 64
--lora_alpha 32
--lora_target q_proj,v_proj
--output_dir /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/output/adapter/Qwen25-7B-Instruct-lora
--overwrite_cache
--overwrite_output_dir
--per_device_train_batch_size 1
--gradient_accumulation_steps 16
--lr_scheduler_type cosine_with_restarts
--logging_steps 500
--save_steps 2000
--learning_rate 2e-4
--num_train_epochs 4
--plot_loss
--bf16

xhh315 · 2024-10-10T06:57:12Z

把data_process 里的data_utils.py 里的895行注释掉

dataset = load_dataset(
data_path,
data_files=data_files,
split=data_args.split,
cache_dir=model_args.cache_dir,
streaming=data_args.streaming,
# use_auth_token=True if model_args.use_auth_token else None, #这里注释掉
)

xiangzhangpang · 2024-10-14T13:43:19Z

CUDA_VISIBLE_DEVICES=0 python /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/train/sft_train.py --model_name_or_path /home/ubuntu/.cache/modelscope/hub/qwen/Qwen2___5-Coder-7B-Instruct --do_train --dataset example_text2sql_train --max_source_length 2048 --max_target_length 512 --template qwen2_5 --finetuning_type lora --lora_rank 64 --lora_alpha 32 --lora_target q_proj,v_proj --output_dir /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/output/adapter/Qwen25-7B-Instruct-lora --overwrite_cache --overwrite_output_dir --per_device_train_batch_size 1 --gradient_accumulation_steps 16 --lr_scheduler_type cosine_with_restarts --logging_steps 500 --save_steps 2000 --learning_rate 2e-4 --num_train_epochs 4 --plot_loss --bf16

你好，我看config.py里只支持这些模型,使用Qwen2___5-Coder-7B-Instruct是自己对代码修改了以适配该模型吗

SUPPORTED_MODELS = {

"LLaMA-7B": "huggyllama/llama-7b",

"LLaMA-13B": "huggyllama/llama-13b",

"LLaMA-30B": "huggyllama/llama-30b",

"LLaMA-65B": "huggyllama/llama-65b",

"LLaMA2-7B": "meta-llama/Llama-2-7b-hf",

"LLaMA2-13B": "meta-llama/Llama-2-13b-hf",

"LLaMA2-70B": "meta-llama/Llama-2-70b-hf",

"LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",

"LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf",

"LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf",

"ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b",

"ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b",

"ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b",

"ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b",

"BLOOM-560M": "bigscience/bloom-560m",

"BLOOM-3B": "bigscience/bloom-3b",

"BLOOM-7B1": "bigscience/bloom-7b1",

"BLOOMZ-560M": "bigscience/bloomz-560m",

"BLOOMZ-3B": "bigscience/bloomz-3b",

"BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt",

"Falcon-7B": "tiiuae/falcon-7b",

"Falcon-7B-Chat": "tiiuae/falcon-7b-instruct",

"Falcon-40B": "tiiuae/falcon-40b",

"Falcon-40B-Chat": "tiiuae/falcon-40b-instruct",

"Baichuan-7B": "baichuan-inc/Baichuan-7B",

"Baichuan-13B": "baichuan-inc/Baichuan-13B-Base",

"Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat",

"Baichuan2-7B": "baichuan-inc/Baichuan2-7B-Base",

"Baichuan2-13B": "baichuan-inc/Baichuan2-13B-Base",

"Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat",

"Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat",

"InternLM-7B": "internlm/internlm-7b",

"InternLM-7B-Chat": "internlm/internlm-chat-7b",

"Qwen-7B": "Qwen/Qwen-7B",

"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",

"XVERSE-13B": "xverse/XVERSE-13B",

"ChatGLM2-6B-Chat": "THUDM/chatglm2-6b",

"ChatGLM3-6B-Base": "THUDM/chatglm3-6b-base",

"ChatGLM3-6B-Chat": "THUDM/chatglm3-6b"

}

ychuest · 2024-10-18T07:02:00Z

CUDA_VISIBLE_DEVICES=0 python /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/train/sft_train.py --model_name_or_path /home/ubuntu/.cache/modelscope/hub/qwen/Qwen2___5-Coder-7B-Instruct --do_train --dataset example_text2sql_train --max_source_length 2048 --max_target_length 512 --template qwen2_5 --finetuning_type lora --lora_rank 64 --lora_alpha 32 --lora_target q_proj,v_proj --output_dir /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/output/adapter/Qwen25-7B-Instruct-lora --overwrite_cache --overwrite_output_dir --per_device_train_batch_size 1 --gradient_accumulation_steps 16 --lr_scheduler_type cosine_with_restarts --logging_steps 500 --save_steps 2000 --learning_rate 2e-4 --num_train_epochs 4 --plot_loss --bf16

你好，我看config.py里只支持这些模型,使用Qwen2___5-Coder-7B-Instruct是自己对代码修改了以适配该模型吗

SUPPORTED_MODELS = {

"LLaMA-7B": "huggyllama/llama-7b",

"LLaMA-13B": "huggyllama/llama-13b",

"LLaMA-30B": "huggyllama/llama-30b",

"LLaMA-65B": "huggyllama/llama-65b",

"LLaMA2-7B": "meta-llama/Llama-2-7b-hf",

"LLaMA2-13B": "meta-llama/Llama-2-13b-hf",

"LLaMA2-70B": "meta-llama/Llama-2-70b-hf",

"LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",

"LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf",

"LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf",

"ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b",

"ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b",

"ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b",

"ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b",

"BLOOM-560M": "bigscience/bloom-560m",

"BLOOM-3B": "bigscience/bloom-3b",

"BLOOM-7B1": "bigscience/bloom-7b1",

"BLOOMZ-560M": "bigscience/bloomz-560m",

"BLOOMZ-3B": "bigscience/bloomz-3b",

"BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt",

"Falcon-7B": "tiiuae/falcon-7b",

"Falcon-7B-Chat": "tiiuae/falcon-7b-instruct",

"Falcon-40B": "tiiuae/falcon-40b",

"Falcon-40B-Chat": "tiiuae/falcon-40b-instruct",

"Baichuan-7B": "baichuan-inc/Baichuan-7B",

"Baichuan-13B": "baichuan-inc/Baichuan-13B-Base",

"Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat",

"Baichuan2-7B": "baichuan-inc/Baichuan2-7B-Base",

"Baichuan2-13B": "baichuan-inc/Baichuan2-13B-Base",

"Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat",

"Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat",

"InternLM-7B": "internlm/internlm-7b",

"InternLM-7B-Chat": "internlm/internlm-chat-7b",

"Qwen-7B": "Qwen/Qwen-7B",

"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",

"XVERSE-13B": "xverse/XVERSE-13B",

"ChatGLM2-6B-Chat": "THUDM/chatglm2-6b",

"ChatGLM3-6B-Base": "THUDM/chatglm3-6b-base",

"ChatGLM3-6B-Chat": "THUDM/chatglm3-6b"

}

是可以适配的。但是你要更新DBGPT- HUB到最新的版本。最新版本里面支持qwen2.5

xiangzhangpang · 2024-10-25T05:03:50Z

你好，请问哪里可以看到所支持的全部模型以及对应的template，我在最新版（主分支）找了好久看到的支持的模型都是好早之前的，如有回复万分感谢

ychuest · 2024-10-25T05:45:24Z

你好，请问哪里可以看到所支持的全部模型以及对应的template，我在最新版（主分支）找了好久看到的支持的模型都是好早之前的，如有回复万分感

这个是我的微调设置，你可以参考一下。

ychuest · 2024-10-25T05:47:01Z

你好，请问哪里可以看到所支持的全部模型以及对应的template，我在最新版（主分支）找了好久看到的支持的模型都是好早之前的，如有回复万分感

这个是我的微调设置，你可以参考一下。

先把qwen25的模型下载到本地缓存，可以通过swift下载，这样快一点。

xiangzhangpang · 2024-10-25T06:03:42Z

你好，请问哪里可以看到所支持的全部模型以及对应的template，我在最新版（主分支）找了好久看到的支持的模型都是好早之前的，如有回复万分感

这个是我的微调设置，你可以参考一下。

先把qwen25的模型下载到本地缓存，可以通过swift下载，这样快一点。

非常感谢你的回复，我会尝试一下的，图片里模型是qwen2.5，template是llama2，使用的模型和template不对应也是可以的吗？
我看之前的命令
CUDA_VISIBLE_DEVICES=0 python /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/train/sft_train.py
--model_name_or_path /home/ubuntu/.cache/modelscope/hub/qwen/Qwen2___5-Coder-7B-Instruct
--do_train
--dataset example_text2sql_train
--max_source_length 2048
--max_target_length 512
--template qwen2_5
以为是官方适配了qwen2.5的template是qwen2_5

ychuest · 2024-10-25T06:05:34Z

你好，请问哪里可以看到所支持的全部模型以及对应的template，我在最新版（主分支）找了好久看到的支持的模型都是好早之前的，如有回复万分感

这个是我的微调设置，你可以参考一下。

先把qwen25的模型下载到本地缓存，可以通过swift下载，这样快一点。

非常感谢你的回复，我会尝试一下的，图片里模型是qwen2.5，template是llama2，使用的模型和template不对应也是可以的吗？我看之前的命令 CUDA_VISIBLE_DEVICES=0 python /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/train/sft_train.py --model_name_or_path /home/ubuntu/.cache/modelscope/hub/qwen/Qwen2___5-Coder-7B-Instruct --do_train --dataset example_text2sql_train --max_source_length 2048 --max_target_length 512 --template qwen2_5 以为是官方适配了qwen2.5的template是qwen2_5

实际测试是可以的。我跑过。

xiangzhangpang · 2024-10-25T06:13:33Z

你好，请问哪里可以看到所支持的全部模型以及对应的template，我在最新版（主分支）找了好久看到的支持的模型都是好早之前的，如有回复万分感

这个是我的微调设置，你可以参考一下。

先把qwen25的模型下载到本地缓存，可以通过swift下载，这样快一点。

非常感谢你的回复，我会尝试一下的，图片里模型是qwen2.5，template是llama2，使用的模型和template不对应也是可以的吗？我看之前的命令 CUDA_VISIBLE_DEVICES=0 python /home/ubuntu/TextToSQL/DB-GPT-Hub/src/dbgpt-hub-sql/dbgpt_hub_sql/train/sft_train.py --model_name_or_path /home/ubuntu/.cache/modelscope/hub/qwen/Qwen2___5-Coder-7B-Instruct --do_train --dataset example_text2sql_train --max_source_length 2048 --max_target_length 512 --template qwen2_5 以为是官方适配了qwen2.5的template是qwen2_5

实际测试是可以的。我跑过。

再次感谢！

使用Qwen2___5-Coder-7B-Instruct进行微调，参数如下，出现如下报错，求助求助！！！！ #293

使用Qwen2___5-Coder-7B-Instruct进行微调，参数如下，出现如下报错，求助求助！！！！ #293

Comments

ychuest commented Sep 30, 2024

xhh315 commented Oct 10, 2024

xiangzhangpang commented Oct 14, 2024 • edited Loading

SUPPORTED_MODELS = {

"LLaMA-7B": "huggyllama/llama-7b",

"LLaMA-13B": "huggyllama/llama-13b",

"LLaMA-30B": "huggyllama/llama-30b",

"LLaMA-65B": "huggyllama/llama-65b",

"LLaMA2-7B": "meta-llama/Llama-2-7b-hf",

"LLaMA2-13B": "meta-llama/Llama-2-13b-hf",

"LLaMA2-70B": "meta-llama/Llama-2-70b-hf",

"LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",

"LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf",

"LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf",

"ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b",

"ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b",

"ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b",

"ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b",

"BLOOM-560M": "bigscience/bloom-560m",

"BLOOM-3B": "bigscience/bloom-3b",

"BLOOM-7B1": "bigscience/bloom-7b1",

"BLOOMZ-560M": "bigscience/bloomz-560m",

"BLOOMZ-3B": "bigscience/bloomz-3b",

"BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt",

"Falcon-7B": "tiiuae/falcon-7b",

"Falcon-7B-Chat": "tiiuae/falcon-7b-instruct",

"Falcon-40B": "tiiuae/falcon-40b",

"Falcon-40B-Chat": "tiiuae/falcon-40b-instruct",

"Baichuan-7B": "baichuan-inc/Baichuan-7B",

"Baichuan-13B": "baichuan-inc/Baichuan-13B-Base",

"Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat",

"Baichuan2-7B": "baichuan-inc/Baichuan2-7B-Base",

"Baichuan2-13B": "baichuan-inc/Baichuan2-13B-Base",

"Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat",

"Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat",

"InternLM-7B": "internlm/internlm-7b",

"InternLM-7B-Chat": "internlm/internlm-chat-7b",

"Qwen-7B": "Qwen/Qwen-7B",

"Qwen-7B-Chat": "Qwen/Qwen-7B-Chat",

"XVERSE-13B": "xverse/XVERSE-13B",

"ChatGLM2-6B-Chat": "THUDM/chatglm2-6b",

"ChatGLM3-6B-Base": "THUDM/chatglm3-6b-base",

"ChatGLM3-6B-Chat": "THUDM/chatglm3-6b"

}

ychuest commented Oct 18, 2024

SUPPORTED_MODELS = {

"LLaMA-7B": "huggyllama/llama-7b",

"LLaMA-13B": "huggyllama/llama-13b",

"LLaMA-30B": "huggyllama/llama-30b",

"LLaMA-65B": "huggyllama/llama-65b",

"LLaMA2-7B": "meta-llama/Llama-2-7b-hf",

"LLaMA2-13B": "meta-llama/Llama-2-13b-hf",

"LLaMA2-70B": "meta-llama/Llama-2-70b-hf",

"LLaMA2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",

"LLaMA2-13B-Chat": "meta-llama/Llama-2-13b-chat-hf",

"LLaMA2-70B-Chat": "meta-llama/Llama-2-70b-chat-hf",

"ChineseLLaMA2-7B": "ziqingyang/chinese-llama-2-7b",

"ChineseLLaMA2-13B": "ziqingyang/chinese-llama-2-13b",

"ChineseLLaMA2-7B-Chat": "ziqingyang/chinese-alpaca-2-7b",

"ChineseLLaMA2-13B-Chat": "ziqingyang/chinese-alpaca-2-13b",

"BLOOM-560M": "bigscience/bloom-560m",

"BLOOM-3B": "bigscience/bloom-3b",

"BLOOM-7B1": "bigscience/bloom-7b1",

"BLOOMZ-560M": "bigscience/bloomz-560m",

"BLOOMZ-3B": "bigscience/bloomz-3b",

"BLOOMZ-7B1-mt": "bigscience/bloomz-7b1-mt",

"Falcon-7B": "tiiuae/falcon-7b",

"Falcon-7B-Chat": "tiiuae/falcon-7b-instruct",

"Falcon-40B": "tiiuae/falcon-40b",

"Falcon-40B-Chat": "tiiuae/falcon-40b-instruct",

"Baichuan-7B": "baichuan-inc/Baichuan-7B",

"Baichuan-13B": "baichuan-inc/Baichuan-13B-Base",

"Baichuan-13B-Chat": "baichuan-inc/Baichuan-13B-Chat",

"Baichuan2-7B": "baichuan-inc/Baichuan2-7B-Base",

"Baichuan2-13B": "baichuan-inc/Baichuan2-13B-Base",

"Baichuan2-7B-Chat": "baichuan-inc/Baichuan2-7B-Chat",

"Baichuan2-13B-Chat": "baichuan-inc/Baichuan2-13B-Chat",

xiangzhangpang commented Oct 14, 2024 •

edited

Loading

xiangzhangpang commented Oct 25, 2024 •

edited

Loading