diff --git a/tests/tlm/test_properties.py b/tests/tlm/test_properties.py
index 814b1e7a..4a660f63 100644
--- a/tests/tlm/test_properties.py
+++ b/tests/tlm/test_properties.py
@@ -15,7 +15,6 @@
 valid_tlm_models = [model for model in _VALID_TLM_MODELS if model not in excluded_tlm_models]
 models_with_no_perplexity_score = ["claude-3-haiku", "claude-3-sonnet", "claude-3.5-sonnet"]
 
-valid_tlm_models = ["gpt-4o"]
 
 def _test_log(response: Dict[str, Any], options: Dict[str, Any]) -> None:
     """Tests the log dictionary in the response based on the options dictionary."""
@@ -131,7 +130,6 @@ def test_prompt(tlm_dict: Dict[str, Any], model: str, quality_preset: str) -> No
         options,
         allow_null_trustworthiness_score=allow_null_trustworthiness_score,
     )
-    return False
 
 
 @pytest.mark.parametrize("model", valid_tlm_models)