From cae8a5e8236c265f4a212ee848b523a642b7462d Mon Sep 17 00:00:00 2001
From: Jing Fang <fajin@microsoft.com>
Date: Fri, 25 Oct 2024 23:26:14 +0000
Subject: [PATCH] fixed fp16 alloc bug

---
 onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
index 0ca98751e5b32..45d3a6177bd44 100644
--- a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
+++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
@@ -615,9 +615,10 @@ Status MatMulNBits<MLFloat16>::ComputeBUnpacked(const Tensor* a,
   const size_t ldb = helper.Ldb(true);
 
   float* scales_ptr = nullptr;
+  IAllocatorUniquePtr<float> temp_scales;
   if (!scales_fp32_) {
     auto scales_size = static_cast<size_t>(scales->Shape().Size());
-    auto temp_scales = IAllocator::MakeUniquePtr<float>(allocator, scales_size, true);
+    temp_scales = IAllocator::MakeUniquePtr<float>(allocator, scales_size, true);
     MlasConvertHalfToFloatBuffer(scales_data, temp_scales.get(), scales_size);
     scales_ptr = temp_scales.get();
   } else {
@@ -698,8 +699,9 @@ Status MatMulNBits<MLFloat16>::ComputeBUnpacked(const Tensor* a,
   if (bias) {
     float* bias_ptr = nullptr;
     const size_t bias_size = static_cast<size_t>(bias->Shape().Size());
+    IAllocatorUniquePtr<float> bias_temp;
     if (!bias_fp32_) {
-      auto bias_temp = IAllocator::MakeUniquePtr<float>(allocator, bias_size, true);
+      bias_temp = IAllocator::MakeUniquePtr<float>(allocator, bias_size, true);
       MlasConvertHalfToFloatBuffer(bias->Data<MLFloat16>(), bias_temp.get(), bias_size);
       bias_ptr = bias_temp.get();
     } else {