diff --git a/benchmark/benchmark_latency.py b/benchmark/benchmark_latency.py
index 7b607cef51..20c96ae04e 100644
--- a/benchmark/benchmark_latency.py
+++ b/benchmark/benchmark_latency.py
@@ -17,7 +17,7 @@
 import logging
 import random
 import time
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
 import numpy as np
 from utils import get_tokenizer, sample_requests, send_request
@@ -32,6 +32,7 @@ async def benchmark(
     api_url: str,
     model_uid: str,
     input_requests: List[Tuple[str, int, int]],
+    api_key: Optional[str] = None,
 ) -> None:
     for request in input_requests:
         prompt, prompt_len, output_len = request
@@ -60,6 +61,7 @@ def main(args: argparse.Namespace):
             api_url,
             model_uid,
             input_requests,
+            api_key=args.api_key,
         )
     )
 
@@ -106,6 +108,9 @@ def main(args: argparse.Namespace):
         help="Trust remote code from huggingface.",
     )
     parser.add_argument("--model-uid", type=str, help="Xinference model UID.")
+    parser.add_argument(
+        "--api-key", type=str, default=None, help="Authorization api key",
+    )
 
     args = parser.parse_args()
     main(args)
diff --git a/benchmark/benchmark_long.py b/benchmark/benchmark_long.py
index 13a899dea2..531a8c6858 100644
--- a/benchmark/benchmark_long.py
+++ b/benchmark/benchmark_long.py
@@ -17,7 +17,7 @@
 import logging
 import random
 import time
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
 import numpy as np
 
@@ -38,6 +38,7 @@ def __init__(
         model_uid: str,
         input_requests: List[Tuple[str, int, int]],
         concurrency: int,
+        api_key: Optional[str]=None,
     ):
 
         self.api_url = api_url
@@ -46,6 +47,7 @@ def __init__(
         self.concurrency = concurrency
         self.sent = 0
         self.left = len(input_requests)
+        self.api_key = api_key
 
     async def run(self):
         tasks = []
@@ -68,6 +70,7 @@ async def worker(self, i: int):
                 prompt_len,
                 output_len,
                 REQUEST_LATENCY,
+                api_key=self.api_key,
             )
             self.left -= 1
             # pring longer space to overwrite the previous when left decrease
@@ -101,6 +104,7 @@ def main(args: argparse.Namespace):
         model_uid,
         input_requests,
         concurrency=args.concurrency,
+        api_key=args.api_key,
     )
     asyncio.run(benchmark.run())
     benchmark_end_time = time.time()
@@ -160,5 +164,8 @@ def main(args: argparse.Namespace):
         help="Trust remote code from huggingface.",
     )
     parser.add_argument("--model-uid", type=str, help="Xinference model UID.")
+    parser.add_argument(
+        "--api-key", type=str, default=None, help="Authorization api key",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/benchmark/benchmark_rerank.py b/benchmark/benchmark_rerank.py
index 0b7a2438e6..8e6e416fac 100644
--- a/benchmark/benchmark_rerank.py
+++ b/benchmark/benchmark_rerank.py
@@ -18,7 +18,7 @@
 import random
 import time
 import aiohttp
-from typing import List, Dict
+from typing import List, Dict, Optional
 from datasets import load_dataset
 
 
@@ -36,6 +36,7 @@ def __init__(
         input_requests: List[Dict],
         top_n: int,
         concurrency: int,
+        api_key: Optional[str]=None,
     ):
         self.api_url = api_url
         self.model_uid = model_uid
@@ -44,6 +45,7 @@ def __init__(
         self.concurrency = concurrency
         self.sent = 0
         self.left = len(input_requests)
+        self.api_key = api_key
 
     async def run(self):
         tasks = []
@@ -73,7 +75,8 @@ async def worker(self, i: int):
         print("")
 
     async def send_request(
-        self, api_url: str, model_uid: str, prompt: str, documents: List[str]
+        self, api_url: str, model_uid: str, prompt: str, documents: List[str],
+            api_key: Optional[str]=None,
     ):
         request_start_time = time.time()
 
@@ -85,6 +88,8 @@ async def send_request(
         }
 
         headers = {"User-Agent": "Benchmark Client"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
 
         timeout = aiohttp.ClientTimeout(total=3 * 3600)
         async with aiohttp.ClientSession(timeout=timeout) as session:
@@ -121,6 +126,7 @@ def main(args: argparse.Namespace):
         input_requests,
         top_n=args.top_n,
         concurrency=args.concurrency,
+        api_key=args.api_key,
     )
     asyncio.run(benchmark.run())
     benchmark_end_time = time.time()
@@ -161,5 +167,8 @@ def main(args: argparse.Namespace):
         help="Trust remote code from huggingface.",
     )
     parser.add_argument("--model-uid", type=str, help="Xinference model UID.")
+    parser.add_argument(
+        "--api-key", type=str, default=None, help="Authorization api key",
+    )
     args = parser.parse_args()
     main(args)
diff --git a/benchmark/benchmark_serving.py b/benchmark/benchmark_serving.py
index 0440e0a722..d60b6da7bb 100644
--- a/benchmark/benchmark_serving.py
+++ b/benchmark/benchmark_serving.py
@@ -17,7 +17,7 @@
 import logging
 import random
 import time
-from typing import List, Tuple
+from typing import List, Tuple, Optional
 
 import numpy as np
 
@@ -39,6 +39,7 @@ def __init__(
         input_requests: List[Tuple[str, int, int]],
         request_rate: float,
         concurrency: int,
+        api_key: Optional[str] = None,
     ):
 
         self.api_url = api_url
@@ -48,6 +49,7 @@ def __init__(
         self.request_rate = request_rate
         self.queue = asyncio.Queue(concurrency or 100)
         self.left = len(input_requests)
+        self.api_key = api_key
 
     async def run(self):
         tasks = []
@@ -78,6 +80,7 @@ async def worker(self):
                 prompt_len,
                 output_len,
                 REQUEST_LATENCY,
+                api_key=self.api_key,
             )
             self.left -= 1
             # pring longer space to overwrite the previous when left decrease
@@ -111,6 +114,7 @@ def main(args: argparse.Namespace):
         input_requests,
         request_rate=args.request_rate,
         concurrency=args.concurrency,
+        api_key=args.api_key,
     )
     asyncio.run(benchmark.run())
     benchmark_end_time = time.time()
@@ -156,6 +160,9 @@ def main(args: argparse.Namespace):
     parser.add_argument(
         "--prompt-len-limit", type=int, default=1024, help="Prompt length limitation."
     )
+    parser.add_argument(
+        "--api-key", type=str, default=None, help="Authorization api key",
+    )
     parser.add_argument(
         "--concurrency",
         "-c",
diff --git a/benchmark/utils.py b/benchmark/utils.py
index 3a393adc62..345dcaa437 100644
--- a/benchmark/utils.py
+++ b/benchmark/utils.py
@@ -17,7 +17,7 @@
 import logging
 import random
 import time
-from typing import TYPE_CHECKING, List, Tuple
+from typing import TYPE_CHECKING, List, Tuple, Optional
 
 import openai
 from transformers import AutoTokenizer, PreTrainedTokenizerFast
@@ -173,6 +173,7 @@ async def send_request(
     prompt_len: int,
     output_len: int,
     stats: List[Tuple[int, int, float]],  # output.
+    api_key: Optional[str]=None,
 ) -> None:
     request_start_time = time.time()
 
@@ -187,6 +188,8 @@ async def send_request(
     }
 
     headers = {"User-Agent": "Benchmark Client"}
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
 
     timeout = aiohttp.ClientTimeout(total=3 * 3600)
     async with aiohttp.ClientSession(timeout=timeout) as session: