diff --git a/examples/inference-deployments/mpt/mpt_30b.yaml b/examples/inference-deployments/mpt/mpt_30b.yaml
index 3ba43376c..8cfa6c52f 100644
--- a/examples/inference-deployments/mpt/mpt_30b.yaml
+++ b/examples/inference-deployments/mpt/mpt_30b.yaml
@@ -2,7 +2,7 @@ name: mpt-30b
 compute:
   gpus: 2
   gpu_type: a100_40gb
-image: mosaicml/inference:0.1.37
+image: mosaicml/inference:0.1.40
 replicas: 1
 default_model:
   model_type: mpt-30b
diff --git a/examples/inference-deployments/mpt/mpt_30b_chat.yaml b/examples/inference-deployments/mpt/mpt_30b_chat.yaml
index 771eed921..abd8e4735 100644
--- a/examples/inference-deployments/mpt/mpt_30b_chat.yaml
+++ b/examples/inference-deployments/mpt/mpt_30b_chat.yaml
@@ -2,7 +2,7 @@ name: mpt-30b-chat
 compute:
   gpus: 2
   gpu_type: a100_40gb
-image: mosaicml/inference:0.1.37
+image: mosaicml/inference:0.1.40
 replicas: 1
 default_model:
   model_type: mpt-30b-chat
diff --git a/examples/inference-deployments/mpt/mpt_30b_instruct.yaml b/examples/inference-deployments/mpt/mpt_30b_instruct.yaml
index ad0e0b37a..43bea739f 100644
--- a/examples/inference-deployments/mpt/mpt_30b_instruct.yaml
+++ b/examples/inference-deployments/mpt/mpt_30b_instruct.yaml
@@ -2,7 +2,7 @@ name: mpt-30b-instruct
 compute:
   gpus: 2
   gpu_type: a100_40gb
-image: mosaicml/inference:0.1.37
+image: mosaicml/inference:0.1.40
 replicas: 1
 default_model:
   model_type: mpt-30b-instruct
diff --git a/examples/inference-deployments/mpt/mpt_7b.yaml b/examples/inference-deployments/mpt/mpt_7b.yaml
index ecb195de5..06d8e8446 100644
--- a/examples/inference-deployments/mpt/mpt_7b.yaml
+++ b/examples/inference-deployments/mpt/mpt_7b.yaml
@@ -2,7 +2,7 @@ name: mpt-7b
 compute:
   gpus: 1
   instance: oci.vm.gpu.a10.1
-image: mosaicml/inference:0.1.37
+image: mosaicml/inference:0.1.40
 replicas: 1
 default_model:
   model_type: mpt-7b
diff --git a/examples/inference-deployments/mpt/mpt_7b_instruct.yaml b/examples/inference-deployments/mpt/mpt_7b_instruct.yaml
index 01f93a777..886d342b1 100644
--- a/examples/inference-deployments/mpt/mpt_7b_instruct.yaml
+++ b/examples/inference-deployments/mpt/mpt_7b_instruct.yaml
@@ -2,7 +2,7 @@ name: mpt-7b-instruct
 compute:
   gpus: 1
   instance: oci.vm.gpu.a10.1
-image: mosaicml/inference:0.1.37
+image: mosaicml/inference:0.1.40
 replicas: 1
 default_model:
   model_type: mpt-7b-instruct
diff --git a/examples/inference-deployments/mpt/mpt_7b_storywriter.yaml b/examples/inference-deployments/mpt/mpt_7b_storywriter.yaml
index 9c235c1bf..11a012adb 100644
--- a/examples/inference-deployments/mpt/mpt_7b_storywriter.yaml
+++ b/examples/inference-deployments/mpt/mpt_7b_storywriter.yaml
@@ -2,7 +2,7 @@ name: mpt-7b-storywriter
 compute:
   gpus: 1
   instance: oci.vm.gpu.a10.1
-image: mosaicml/inference:0.1.37
+image: mosaicml/inference:0.1.40
 replicas: 1
 default_model:
   model_type: mpt-7b-storywriter