fix comment

vllm-project · Oct 18, 2024 · 5e21639 · 5e21639
1 parent 841780d
commit 5e21639
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 2 deletions.
diff --git a/src/llmcompressor/modifiers/calibration.py b/src/llmcompressor/modifiers/calibration.py
@@ -69,7 +69,6 @@ def call_observer(module: Module, base_name: str, value: torch.Tensor):
     :param value: torch.Tensor to be passed to the observer
     """
     observer = getattr(module, f"{base_name}_observer")
-    # TODO: what cases require the g_idx?
     g_idx = getattr(module, "weight_g_idx", None)
 
     updated_scale, updated_zero_point = observer(value, g_idx=g_idx)

diff --git a/src/llmcompressor/modifiers/quantization/quantization/base.py b/src/llmcompressor/modifiers/quantization/quantization/base.py
@@ -230,7 +230,7 @@ def _calibrate_if_possible(self, module: Module):
         register_calibration_hooks():
             if input activation (used to call observers before intput QDQ):
                 - pre_hook_handle = module.register_forward_pre_hook(calibrate_input_hook())
-            if output activation (used to call observers before output QDQ and fake_quantize):
+            if output activation (used to call observers before output QDQ):
                 - post_hook_handle = module.register_forward_hook(calibrate_kv_cache_output_hook())
             if kv_cache quantization (used to set kv_cache to QuantizedKVParameterCache and update k_scale/v_scale)
                 - pre_hook_handle = module.register_forward_pre_hook(calibrate_kv_cache_input_hook(), with_kwargs=True)