预训练中文GPT2
train_tokenizer.py # 用于训练BPE tokenizer
train_sentencePiece_tokenizer.py # 用于训练sentencepiece tokenizer
tmp.py # 测试tokenizer
run_clm.py # 训练GPT2
process_data.py # 整合数据
inference.py # 推理
https://download.csdn.net/download/u014403221/88755559 https://download.csdn.net/download/u014403221/88761912