Skip to content

Commit

Permalink
remove unnecessary change
Browse files Browse the repository at this point in the history
  • Loading branch information
yangw1234 committed Oct 25, 2024
1 parent bdca9a8 commit 6d7e1fa
Showing 1 changed file with 20 additions and 27 deletions.
47 changes: 20 additions & 27 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,40 +133,33 @@ def run_vllm(
sampling_params.append(
SamplingParams(
n=n,
temperature=0.0,
temperature=1.0,
top_p=1.0,
ignore_eos=True,
max_tokens=output_len,
))
# from vllm.utils import Device
# for i in range(5):
# start = time.perf_counter()
# llm.generate(prompts, sampling_params, use_tqdm=True)
# end = time.perf_counter()
# print(llm.llm_engine.scheduler[0].block_manager.block_allocator._allocators[Device.GPU]._free_block_indices)

use_beam_search = False

for i in range(3):
if not use_beam_search:
start = time.perf_counter()
llm.generate(prompts, sampling_params, use_tqdm=True)
end = time.perf_counter()
else:
prompts = [prompt for prompt, _, _ in requests]
# output_len should be the same for all requests.
output_len = requests[0][2]
for prompt, input_len, _output_len in requests:
assert _output_len == output_len
start = time.perf_counter()
llm.beam_search(
prompts,
BeamSearchParams(
beam_width=n,
max_tokens=output_len,
ignore_eos=True,
))
end = time.perf_counter()
if not use_beam_search:
start = time.perf_counter()
llm.generate(prompts, sampling_params, use_tqdm=True)
end = time.perf_counter()
else:
prompts = [prompt for prompt, _, _ in requests]
# output_len should be the same for all requests.
output_len = requests[0][2]
for prompt, input_len, _output_len in requests:
assert _output_len == output_len
start = time.perf_counter()
llm.beam_search(
prompts,
BeamSearchParams(
beam_width=n,
max_tokens=output_len,
ignore_eos=True,
))
end = time.perf_counter()
return end - start


Expand Down

0 comments on commit 6d7e1fa

Please sign in to comment.