Skip to content

Commit

Permalink
update benchmark scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
tianleiwu committed Oct 25, 2024
1 parent 5aa867a commit fdf7d76
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 156 deletions.
4 changes: 2 additions & 2 deletions onnxruntime/python/tools/transformers/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ def run_pytorch(
else:
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)

max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
max_input_size = tokenizer.model_max_length

logger.debug(f"Model {model}")
logger.debug(f"Number of parameters {model.num_parameters()}")
Expand Down Expand Up @@ -500,7 +500,7 @@ def run_tensorflow(

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)

max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
max_input_size = tokenizer.model_max_length

for batch_size in batch_sizes:
if batch_size <= 0:
Expand Down
18 changes: 7 additions & 11 deletions onnxruntime/python/tools/transformers/dev_benchmark.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
REM Run benchmark in Windows for developing purpose. For official benchmark, please use run_benchmark.sh.
REM Settings are different from run_benchmark.sh: no cli, batch and sequence, input counts, average over 100, no fp16, less models etc.

REM Please install PyTorch (see https://pytorch.org/) before running this benchmark. Like the following:
REM GPU: conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
REM CPU: conda install pytorch torchvision cpuonly -c pytorch
REM Please install PyTorch (see https://pytorch.org/) before running this benchmark.

REM When use_package=true, you need not copy other files to run benchmarks except this sh file.
REM Otherwise, it will use python script (*.py) files in this directory.
Expand All @@ -21,12 +19,12 @@ set run_torchscript=false

REM Devices to test.
REM Attention: You cannot run both CPU and GPU at the same time: gpu need onnxruntime-gpu, and CPU need onnxruntime.
set run_gpu_fp32=false
set run_gpu_fp16=false
set run_cpu_fp32=true
set run_cpu_int8=true
set run_gpu_fp32=true
set run_gpu_fp16=true
set run_cpu_fp32=false
set run_cpu_int8=false

set average_over=100
set average_over=1000

REM Enable optimizer (use script instead of OnnxRuntime for graph optimization)
set use_optimizer=true
Expand All @@ -36,7 +34,7 @@ set sequence_length=8 128

REM Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model.
REM Note that different input count might lead to different performance
set input_counts=1
set input_counts=3

REM Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased
set models_to_test=bert-base-cased
Expand All @@ -57,7 +55,6 @@ if %run_cpu_int8% == true if %run_gpu_fp32% == true echo cannot test cpu and gpu
if %run_cpu_int8% == true if %run_gpu_fp16% == true echo cannot test cpu and gpu at same time & goto :EOF

if %run_install% == true (
pip uninstall --yes ort_nightly
pip uninstall --yes onnxruntime
pip uninstall --yes onnxruntime-gpu
if %run_cpu_fp32% == true (
Expand All @@ -70,7 +67,6 @@ if %run_install% == true (
)
)

pip install --upgrade onnxconverter_common
pip install --upgrade transformers
)

Expand Down
177 changes: 42 additions & 135 deletions onnxruntime/python/tools/transformers/huggingface_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,155 +13,62 @@
"AutoModelForCausalLM",
]

# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
# Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type
# Some models like GPT, T5, Bart etc has its own convert_to_onnx.py in models sub-directory, and they are excluded here.
MODELS = {
# BERT
"bert-base-uncased": (
["input_ids", "attention_mask", "token_type_ids"],
12,
False,
"bert",
),
"bert-large-uncased": (
["input_ids", "attention_mask", "token_type_ids"],
12,
False,
"bert",
),
"bert-base-cased": (
["input_ids", "attention_mask", "token_type_ids"],
12,
False,
"bert",
),
# "bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-base-multilingual-uncased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-base-multilingual-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-base-chinese": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-base-german-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-large-uncased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-large-cased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-large-uncased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask",
# "token_type_ids"], 12, False, "bert"),
# "bert-large-cased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask",
# "token_type_ids"], 12, False, "bert"),
# "bert-base-cased-finetuned-mrpc": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-base-german-dbmdz-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# "bert-base-german-dbmdz-uncased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
# todo: more models to add
# GPT (no past state)
"openai-gpt": (["input_ids"], 11, False, "gpt2"),
# GPT-2 (no past state, use benchmark_gpt2.py for past_key_values)
"gpt2": (["input_ids"], 11, False, "gpt2"),
"gpt2-medium": (["input_ids"], 11, False, "gpt2"),
"gpt2-large": (["input_ids"], 11, True, "gpt2"),
"gpt2-xl": (["input_ids"], 11, True, "gpt2"),
"distilgpt2": (["input_ids"], 11, False, "gpt2"),
# Transformer-XL (Models uses Einsum, which need opset version 12 or later.)
"transfo-xl-wt103": (["input_ids", "mems"], 12, False, "bert"),
"bert-base-cased": (["input_ids", "attention_mask", "token_type_ids"], 16, False, "bert"),
"bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 16, False, "bert"),
# Transformer-XL (Models uses Einsum, which need opset version 16 or later.)
"transfo-xl-wt103": (["input_ids", "mems"], 16, False, "bert"),
# XLNet
"xlnet-base-cased": (["input_ids"], 12, False, "bert"),
"xlnet-large-cased": (["input_ids"], 12, False, "bert"),
"xlnet-base-cased": (["input_ids"], 16, False, "bert"),
"xlnet-large-cased": (["input_ids"], 16, False, "bert"),
# XLM
"xlm-mlm-en-2048": (["input_ids"], 11, True, "bert"),
"xlm-mlm-ende-1024": (["input_ids"], 11, False, "bert"),
"xlm-mlm-enfr-1024": (["input_ids"], 11, False, "bert"),
"xlm-mlm-en-2048": (["input_ids"], 16, True, "bert"),
"xlm-mlm-ende-1024": (["input_ids"], 16, False, "bert"),
"xlm-mlm-enfr-1024": (["input_ids"], 16, False, "bert"),
# RoBERTa
"roberta-base": (["input_ids", "attention_mask"], 12, False, "bert"),
"roberta-large": (["input_ids", "attention_mask"], 12, False, "bert"),
"roberta-large-mnli": (["input_ids", "attention_mask"], 12, False, "bert"),
"deepset/roberta-base-squad2": (["input_ids", "attention_mask"], 11, False, "bert"),
"distilroberta-base": (["input_ids", "attention_mask"], 12, False, "bert"),
"roberta-base": (["input_ids", "attention_mask"], 16, False, "bert"),
"roberta-large": (["input_ids", "attention_mask"], 16, False, "bert"),
"roberta-large-mnli": (["input_ids", "attention_mask"], 16, False, "bert"),
"deepset/roberta-base-squad2": (["input_ids", "attention_mask"], 16, False, "bert"),
"distilroberta-base": (["input_ids", "attention_mask"], 16, False, "bert"),
# DistilBERT
"distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"),
"distilbert-base-uncased-distilled-squad": (
["input_ids", "attention_mask"],
11,
False,
"bert",
),
"distilbert-base-uncased": (["input_ids", "attention_mask"], 16, False, "bert"),
"distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 16, False, "bert"),
# CTRL
"ctrl": (["input_ids"], 11, True, "bert"),
"ctrl": (["input_ids"], 16, True, "bert"),
# CamemBERT
"camembert-base": (["input_ids"], 11, False, "bert"),
"camembert-base": (["input_ids"], 16, False, "bert"),
# ALBERT
"albert-base-v1": (["input_ids"], 12, False, "bert"),
"albert-large-v1": (["input_ids"], 12, False, "bert"),
"albert-xlarge-v1": (["input_ids"], 12, True, "bert"),
# "albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
"albert-base-v2": (["input_ids"], 12, False, "bert"),
"albert-large-v2": (["input_ids"], 12, False, "bert"),
"albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
# "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
# T5 (use benchmark_t5.py instead)
# "t5-small": (["input_ids", "decoder_input_ids"], 12, False, "bert"),
# "t5-base": (["input_ids", "decoder_input_ids"], 12, False, "bert"),
# "t5-large": (["input_ids", "decoder_input_ids"], 12, True, "bert"),
# "t5-3b": (["input_ids", "decoder_input_ids"], 12, True, "bert"),
# "t5-11b": (["input_ids", "decoder_input_ids"], 12, True, "bert"),
# "valhalla/t5-small-qa-qg-hl": (["input_ids"], 12, True, "bert"),
"albert-base-v1": (["input_ids"], 16, False, "bert"),
"albert-large-v1": (["input_ids"], 16, False, "bert"),
"albert-xlarge-v1": (["input_ids"], 16, True, "bert"),
# "albert-xxlarge-v1": (["input_ids"], 16, True, "bert"),
"albert-base-v2": (["input_ids"], 16, False, "bert"),
"albert-large-v2": (["input_ids"], 16, False, "bert"),
"albert-xlarge-v2": (["input_ids"], 16, True, "bert"),
# "albert-xxlarge-v2": (["input_ids"], 16, True, "bert"),
# XLM-RoBERTa
"xlm-roberta-base": (["input_ids"], 11, False, "bert"),
"xlm-roberta-large": (["input_ids"], 11, True, "bert"),
"xlm-roberta-base": (["input_ids"], 16, False, "bert"),
"xlm-roberta-large": (["input_ids"], 16, True, "bert"),
# FlauBERT
"flaubert/flaubert_small_cased": (["input_ids"], 11, False, "bert"),
# "flaubert/flaubert_base_uncased": (["input_ids"], 11, False, "bert"),
"flaubert/flaubert_base_cased": (["input_ids"], 11, False, "bert"),
# "flaubert/flaubert_large_cased": (["input_ids"], 11, False, "bert"),
# Bart
"facebook/bart-large": (["input_ids", "attention_mask"], 11, False, "bart"),
"facebook/bart-base": (["input_ids", "attention_mask"], 11, False, "bart"),
"facebook/bart-large-mnli": (["input_ids", "attention_mask"], 11, False, "bart"),
"facebook/bart-large-cnn": (["input_ids", "attention_mask"], 11, False, "bart"),
# DialoGPT
"microsoft/DialoGPT-small": (["input_ids"], 11, False, "gpt2"),
"microsoft/DialoGPT-medium": (["input_ids"], 11, False, "gpt2"),
# "microsoft/DialoGPT-large": (["input_ids"], 11, True, "gpt2"),
# Reformer
# "google/reformer-enwik8": (["input_ids"], 11, False, "bert"),
# "google/reformer-crime-and-punishment": (["input_ids"], 11, False, "bert"),
# MarianMT
# "Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"),
# Longformer (use benchmark_longformer.py instead)
# "allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"),
# "allenai/longformer-large-4096": (["input_ids"], 12, False, "bert"),
# MBart
"facebook/mbart-large-cc25": (["input_ids"], 11, True, "bert"),
"facebook/mbart-large-en-ro": (["input_ids"], 11, True, "bert"),
# "Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"),
# # Longformer
# "allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"),
# "allenai/longformer-large-4096": (["input_ids"], 12, True, "bert"),
# "funnel-transformer/small": (["input_ids"], 12, False, "bert"),
# "funnel-transformer/small-base": (["input_ids"], 12, False, "bert"),
# "funnel-transformer/medium": (["input_ids"], 12, False, "bert"),
# "funnel-transformer/medium-base": (["input_ids"], 12, False, "bert"),
# "funnel-transformer/intermediate": (["input_ids"], 12, False, "bert"),
# "funnel-transformer/intermediate-base": (["input_ids"], 12, False, "bert"),
# "funnel-transformer/large": (["input_ids"], 12, True, "bert"),
# "funnel-transformer/large-base": (["input_ids"], 12, True, "bert"),
# "funnel-transformer/xlarge": (["input_ids"], 12, True, "bert"),
# "funnel-transformer/xlarge-base": (["input_ids"], 12, True, "bert"),
"flaubert/flaubert_small_cased": (["input_ids"], 16, False, "bert"),
"flaubert/flaubert_base_cased": (["input_ids"], 16, False, "bert"),
# "flaubert/flaubert_large_cased": (["input_ids"], 16, False, "bert"),
# Layoutlm
"microsoft/layoutlm-base-uncased": (["input_ids"], 11, False, "bert"),
"microsoft/layoutlm-large-uncased": (["input_ids"], 11, False, "bert"),
"microsoft/layoutlm-base-uncased": (["input_ids"], 16, False, "bert"),
"microsoft/layoutlm-large-uncased": (["input_ids"], 16, False, "bert"),
# Squeezebert
"squeezebert/squeezebert-uncased": (["input_ids"], 11, False, "bert"),
"squeezebert/squeezebert-mnli": (["input_ids"], 11, False, "bert"),
"squeezebert/squeezebert-mnli-headless": (["input_ids"], 11, False, "bert"),
"unc-nlp/lxmert-base-uncased": (
["input_ids", "visual_feats", "visual_pos"],
11,
False,
"bert",
),
# "google/pegasus-xsum": (["input_ids"], 11, False, "bert"),
# "google/pegasus-large": (["input_ids"], 11, False, "bert"),
"squeezebert/squeezebert-uncased": (["input_ids"], 16, False, "bert"),
"squeezebert/squeezebert-mnli": (["input_ids"], 16, False, "bert"),
"squeezebert/squeezebert-mnli-headless": (["input_ids"], 16, False, "bert"),
"unc-nlp/lxmert-base-uncased": (["input_ids", "visual_feats", "visual_pos"], 16, False, "bert"),
# ViT
"google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"),
"google/vit-base-patch16-224": (["pixel_values"], 16, False, "vit"),
# Swin
"microsoft/swin-base-patch4-window7-224": (["pixel_values"], 12, False, "swin"),
"microsoft/swin-small-patch4-window7-224": (["pixel_values"], 12, False, "swin"),
"microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 12, False, "swin"),
"microsoft/swin-base-patch4-window7-224": (["pixel_values"], 16, False, "swin"),
"microsoft/swin-small-patch4-window7-224": (["pixel_values"], 16, False, "swin"),
"microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 16, False, "swin"),
}
4 changes: 2 additions & 2 deletions onnxruntime/python/tools/transformers/onnx_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,7 @@ def export_onnx_model_from_pt(
example_inputs = image_processor(data, return_tensors="pt")
else:
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
max_input_size = tokenizer.model_max_length
example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt")

example_inputs = filter_inputs(example_inputs, input_names)
Expand Down Expand Up @@ -596,7 +596,7 @@ def export_onnx_model_from_tf(
# Fix "Using pad_token, but it is not set yet" error.
if tokenizer.pad_token is None:
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
max_input_size = tokenizer.model_max_length

config, model = load_tf_model(model_name, model_class, cache_dir, config_modifier)
model.resize_token_embeddings(len(tokenizer))
Expand Down
8 changes: 2 additions & 6 deletions onnxruntime/python/tools/transformers/run_benchmark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
# license information.
# --------------------------------------------------------------------------
# This measures the performance of OnnxRuntime, PyTorch and TorchScript on transformer models.
# Please install PyTorch (see https://pytorch.org/) before running this benchmark. Like the following:
# GPU: conda install pytorch torchvision cudatoolkit=11.0 -c pytorch
# CPU: conda install pytorch torchvision cpuonly -c pytorch
# To use torch2, please install the nightly PyTorch by replacing pytorch with pytorch-nightly.
# Please install PyTorch (see https://pytorch.org/) before running this benchmark.

# When use_package=true, you need not copy other files to run benchmarks except this sh file.
# Otherwise, it will use python script (*.py) files in this directory.
Expand Down Expand Up @@ -60,7 +57,6 @@ sequence_lengths="8 16 32 64 128 256 512 1024"
# Here we only test one input (input_ids) for fair comparison with PyTorch.
input_counts=1

# Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased
models_to_test="bert-base-cased roberta-base distilbert-base-uncased"

# If you have multiple GPUs, you can choose one GPU for test. Here is an example to use the second GPU:
Expand Down Expand Up @@ -99,7 +95,7 @@ if [ "$run_install" = true ] ; then
else
pip install onnxruntime-gpu
fi
pip install --upgrade onnx coloredlogs packaging psutil py3nvml onnxconverter_common numpy transformers sympy
pip install --upgrade onnx coloredlogs packaging psutil py3nvml numpy transformers sympy
fi

if [ "$use_package" = true ] ; then
Expand Down

0 comments on commit fdf7d76

Please sign in to comment.