Skip to content

Commit

Permalink
ENH: [benchmark] Add api-key support (xorbitsai#1961)
Browse files Browse the repository at this point in the history
  • Loading branch information
frostyplanet authored Jul 30, 2024
1 parent 38eeae4 commit aafd36e
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 6 deletions.
7 changes: 6 additions & 1 deletion benchmark/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging
import random
import time
from typing import List, Tuple
from typing import List, Tuple, Optional

import numpy as np
from utils import get_tokenizer, sample_requests, send_request
Expand All @@ -32,6 +32,7 @@ async def benchmark(
api_url: str,
model_uid: str,
input_requests: List[Tuple[str, int, int]],
api_key: Optional[str] = None,
) -> None:
for request in input_requests:
prompt, prompt_len, output_len = request
Expand Down Expand Up @@ -60,6 +61,7 @@ def main(args: argparse.Namespace):
api_url,
model_uid,
input_requests,
api_key=args.api_key,
)
)

Expand Down Expand Up @@ -106,6 +108,9 @@ def main(args: argparse.Namespace):
help="Trust remote code from huggingface.",
)
parser.add_argument("--model-uid", type=str, help="Xinference model UID.")
parser.add_argument(
"--api-key", type=str, default=None, help="Authorization api key",
)

args = parser.parse_args()
main(args)
9 changes: 8 additions & 1 deletion benchmark/benchmark_long.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging
import random
import time
from typing import List, Tuple
from typing import List, Tuple, Optional

import numpy as np

Expand All @@ -38,6 +38,7 @@ def __init__(
model_uid: str,
input_requests: List[Tuple[str, int, int]],
concurrency: int,
api_key: Optional[str]=None,
):

self.api_url = api_url
Expand All @@ -46,6 +47,7 @@ def __init__(
self.concurrency = concurrency
self.sent = 0
self.left = len(input_requests)
self.api_key = api_key

async def run(self):
tasks = []
Expand All @@ -68,6 +70,7 @@ async def worker(self, i: int):
prompt_len,
output_len,
REQUEST_LATENCY,
api_key=self.api_key,
)
self.left -= 1
# pring longer space to overwrite the previous when left decrease
Expand Down Expand Up @@ -101,6 +104,7 @@ def main(args: argparse.Namespace):
model_uid,
input_requests,
concurrency=args.concurrency,
api_key=args.api_key,
)
asyncio.run(benchmark.run())
benchmark_end_time = time.time()
Expand Down Expand Up @@ -160,5 +164,8 @@ def main(args: argparse.Namespace):
help="Trust remote code from huggingface.",
)
parser.add_argument("--model-uid", type=str, help="Xinference model UID.")
parser.add_argument(
"--api-key", type=str, default=None, help="Authorization api key",
)
args = parser.parse_args()
main(args)
13 changes: 11 additions & 2 deletions benchmark/benchmark_rerank.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import random
import time
import aiohttp
from typing import List, Dict
from typing import List, Dict, Optional
from datasets import load_dataset


Expand All @@ -36,6 +36,7 @@ def __init__(
input_requests: List[Dict],
top_n: int,
concurrency: int,
api_key: Optional[str]=None,
):
self.api_url = api_url
self.model_uid = model_uid
Expand All @@ -44,6 +45,7 @@ def __init__(
self.concurrency = concurrency
self.sent = 0
self.left = len(input_requests)
self.api_key = api_key

async def run(self):
tasks = []
Expand Down Expand Up @@ -73,7 +75,8 @@ async def worker(self, i: int):
print("")

async def send_request(
self, api_url: str, model_uid: str, prompt: str, documents: List[str]
self, api_url: str, model_uid: str, prompt: str, documents: List[str],
api_key: Optional[str]=None,
):
request_start_time = time.time()

Expand All @@ -85,6 +88,8 @@ async def send_request(
}

headers = {"User-Agent": "Benchmark Client"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"

timeout = aiohttp.ClientTimeout(total=3 * 3600)
async with aiohttp.ClientSession(timeout=timeout) as session:
Expand Down Expand Up @@ -121,6 +126,7 @@ def main(args: argparse.Namespace):
input_requests,
top_n=args.top_n,
concurrency=args.concurrency,
api_key=args.api_key,
)
asyncio.run(benchmark.run())
benchmark_end_time = time.time()
Expand Down Expand Up @@ -161,5 +167,8 @@ def main(args: argparse.Namespace):
help="Trust remote code from huggingface.",
)
parser.add_argument("--model-uid", type=str, help="Xinference model UID.")
parser.add_argument(
"--api-key", type=str, default=None, help="Authorization api key",
)
args = parser.parse_args()
main(args)
9 changes: 8 additions & 1 deletion benchmark/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging
import random
import time
from typing import List, Tuple
from typing import List, Tuple, Optional

import numpy as np

Expand All @@ -39,6 +39,7 @@ def __init__(
input_requests: List[Tuple[str, int, int]],
request_rate: float,
concurrency: int,
api_key: Optional[str] = None,
):

self.api_url = api_url
Expand All @@ -48,6 +49,7 @@ def __init__(
self.request_rate = request_rate
self.queue = asyncio.Queue(concurrency or 100)
self.left = len(input_requests)
self.api_key = api_key

async def run(self):
tasks = []
Expand Down Expand Up @@ -78,6 +80,7 @@ async def worker(self):
prompt_len,
output_len,
REQUEST_LATENCY,
api_key=self.api_key,
)
self.left -= 1
# pring longer space to overwrite the previous when left decrease
Expand Down Expand Up @@ -111,6 +114,7 @@ def main(args: argparse.Namespace):
input_requests,
request_rate=args.request_rate,
concurrency=args.concurrency,
api_key=args.api_key,
)
asyncio.run(benchmark.run())
benchmark_end_time = time.time()
Expand Down Expand Up @@ -156,6 +160,9 @@ def main(args: argparse.Namespace):
parser.add_argument(
"--prompt-len-limit", type=int, default=1024, help="Prompt length limitation."
)
parser.add_argument(
"--api-key", type=str, default=None, help="Authorization api key",
)
parser.add_argument(
"--concurrency",
"-c",
Expand Down
5 changes: 4 additions & 1 deletion benchmark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging
import random
import time
from typing import TYPE_CHECKING, List, Tuple
from typing import TYPE_CHECKING, List, Tuple, Optional

import openai
from transformers import AutoTokenizer, PreTrainedTokenizerFast
Expand Down Expand Up @@ -173,6 +173,7 @@ async def send_request(
prompt_len: int,
output_len: int,
stats: List[Tuple[int, int, float]], # output.
api_key: Optional[str]=None,
) -> None:
request_start_time = time.time()

Expand All @@ -187,6 +188,8 @@ async def send_request(
}

headers = {"User-Agent": "Benchmark Client"}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"

timeout = aiohttp.ClientTimeout(total=3 * 3600)
async with aiohttp.ClientSession(timeout=timeout) as session:
Expand Down

0 comments on commit aafd36e

Please sign in to comment.