Merge branch 'main' into kylesayrs/fix-use-cache

vllm-project · Oct 20, 2024 · 42c192f · 42c192f
2 parents 34e2150 + 11af4b2
commit 42c192f
Showing 1 changed file with 3 additions and 5 deletions.
diff --git a/src/llmcompressor/modifiers/obcq/utils/sgpt_wrapper.py b/src/llmcompressor/modifiers/obcq/utils/sgpt_wrapper.py
@@ -1,8 +1,6 @@
 import time
 
-from compressed_tensors.quantization.lifecycle.forward import (
-    maybe_calibrate_or_quantize,
-)
+from compressed_tensors.quantization.lifecycle.forward import forward_quantize
 
 from llmcompressor.modifiers.utils.compression_wrapper import ModuleCompressionWrapper
 from llmcompressor.utils import getattr_chain
@@ -95,7 +93,7 @@ def compress(
         args_loc = "quantization_scheme.weights"
         weight_quant_args = getattr_chain(self.layer, args_loc, None)
         if weight_quant_args is not None:
-            W = maybe_calibrate_or_quantize(self.layer, W, "weight", weight_quant_args)
+            W = forward_quantize(self.layer, W, "weight", weight_quant_args)
 
         if isinstance(self.layer, nn.Conv2d):
             W = W.flatten(1)
@@ -213,7 +211,7 @@ def compress(
             W = W.t()
         W = W.reshape(final_shape).to(final_dtype)
         if weight_quant_args is not None:
-            W = maybe_calibrate_or_quantize(self.layer, W, "weight", weight_quant_args)
+            W = forward_quantize(self.layer, W, "weight", weight_quant_args)
 
         # This is a bit hacky, but FSDP updates only work if we change the weight in
         # place, clone() or direct assignment won't work