Skip to content

Commit

Permalink
fixed fp16 alloc bug
Browse files Browse the repository at this point in the history
  • Loading branch information
fajin-corp committed Oct 25, 2024
1 parent 0768ad5 commit cae8a5e
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc
Original file line number Diff line number Diff line change
Expand Up @@ -615,9 +615,10 @@ Status MatMulNBits<MLFloat16>::ComputeBUnpacked(const Tensor* a,
const size_t ldb = helper.Ldb(true);

float* scales_ptr = nullptr;
IAllocatorUniquePtr<float> temp_scales;
if (!scales_fp32_) {
auto scales_size = static_cast<size_t>(scales->Shape().Size());
auto temp_scales = IAllocator::MakeUniquePtr<float>(allocator, scales_size, true);
temp_scales = IAllocator::MakeUniquePtr<float>(allocator, scales_size, true);
MlasConvertHalfToFloatBuffer(scales_data, temp_scales.get(), scales_size);
scales_ptr = temp_scales.get();
} else {
Expand Down Expand Up @@ -698,8 +699,9 @@ Status MatMulNBits<MLFloat16>::ComputeBUnpacked(const Tensor* a,
if (bias) {
float* bias_ptr = nullptr;
const size_t bias_size = static_cast<size_t>(bias->Shape().Size());
IAllocatorUniquePtr<float> bias_temp;
if (!bias_fp32_) {
auto bias_temp = IAllocator::MakeUniquePtr<float>(allocator, bias_size, true);
bias_temp = IAllocator::MakeUniquePtr<float>(allocator, bias_size, true);
MlasConvertHalfToFloatBuffer(bias->Data<MLFloat16>(), bias_temp.get(), bias_size);
bias_ptr = bias_temp.get();
} else {
Expand Down

0 comments on commit cae8a5e

Please sign in to comment.