From c400ce21c9edfd0ffc3b7914bd2c859aa4ae9cb3 Mon Sep 17 00:00:00 2001 From: Billy Hu Date: Thu, 24 Oct 2024 16:16:48 -0700 Subject: [PATCH] [Evaluation] Adding initial TSG and error message enhancement for remote tracking failure (#38062) * Handle storage access error * add tsg link * update * add initial tsg * fix lint issues * Fix the link issues and broken tests * fix the link issue again * address comments * update --- .../azure-ai-evaluation/CHANGELOG.md | 5 +- .../azure-ai-evaluation/TROUBLESHOOTING.md | 50 +++++++++++++++++++ .../ai/evaluation/_evaluate/_eval_run.py | 25 ++++++++-- .../ai/evaluation/_evaluate/_evaluate.py | 48 ++++++++++++++---- .../azure/ai/evaluation/_exceptions.py | 14 ++++++ 5 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 14e10073054e..be694e330f8f 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -9,11 +9,11 @@ - Renamed environment variable `PF_EVALS_BATCH_USE_ASYNC` to `AI_EVALS_BATCH_USE_ASYNC`. - AdversarialScenario enum does not include `ADVERSARIAL_INDIRECT_JAILBREAK`, invoking IndirectJailbreak or XPIA should be done with `IndirectAttackSimulator` - Outputs of `Simulator` and `AdversarialSimulator` previously had `to_eval_qa_json_lines` and now has `to_eval_qr_json_lines`. Where `to_eval_qa_json_lines` had: -```json +```json {"question": , "answer": } ``` `to_eval_qr_json_lines` now has: -```json +```json {"query": , "response": assistant_message} ``` @@ -32,6 +32,7 @@ - `GroundednessEvaluator` - `SimilarityEvaluator` - `RetrievalEvaluator` +- Improved the error message for storage access permission issues to provide clearer guidance for users. ## 1.0.0b4 (2024-10-16) diff --git a/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md new file mode 100644 index 000000000000..fe0ba312624b --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/TROUBLESHOOTING.md @@ -0,0 +1,50 @@ +# Troubleshoot AI Evaluation SDK Issues + +This guide walks you through how to investigate failures, common errors in the `azure-ai-evaluation` SDK, and steps to mitigate these issues. + +## Table of Contents + +- [Handle Evaluate API Errors](#handle-evaluate-api-errors) + - [Troubleshoot Remote Tracking Issues](#troubleshoot-remote-tracking-issues) + - [Safety Metric Supported Regions](#safety-metric-supported-regions) +- [Handle Simulation Errors](#handle-simulation-errors) + - [Adversarial Simulation Supported Regions](#adversarial-simulation-supported-regions) +- [Logging](#logging) +- [Get additional help](#get-additional-help) + +## Handle Evaluate API Errors + +### Troubleshoot Remote Tracking Issues + +- Before running `evaluate()`, to ensure that you can enable logging and tracing to your Azure AI project, make sure you are first logged in by running `az login`. +- Then install the following sub-package: + + ```Shell + pip install azure-ai-evaluation[remote] + ``` + +- Ensure that you assign the proper permissions to the storage account linked to your Azure AI Studio hub. This can be done with the following command. More information can be found [here](https://review.learn.microsoft.com/azure/ai-studio/how-to/disable-local-auth). + + ```Shell + az role assignment create --role "Storage Blob Data Contributor" --scope /subscriptions//resourceGroups/ --assignee-principal-type User --assignee-object-id "" + ``` + +- Additionally, if you're using a virtual network or private link, and your evaluation run upload fails because of that, check out this [guide](https://docs.microsoft.com/azure/machine-learning/how-to-enable-studio-virtual-network#access-data-using-the-studio). + +### Safety Metric Supported Regions + +Risk and safety evaluators depend on the Azure AI Studio safety evaluation backend service. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaisafetyeval-regionsupport). + +## Handle Simulation Errors + +### Adversarial Simulation Supported Regions + +Adversarial simulators use Azure AI Studio safety evaluation backend service to generate an adversarial dataset against your application. For a list of supported regions, please refer to the documentation [here](https://aka.ms/azureaiadvsimulator-regionsupport). + +## Logging + +You can set logging level via environment variable `PF_LOGGING_LEVEL`, valid values includes `CRITICAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`, default to `INFO`. + +## Get Additional Help + +Additional information on ways to reach out for support can be found in the [SUPPORT.md](https://github.com/Azure/azure-sdk-for-python/blob/main/SUPPORT.md) at the root of the repo. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py index 8b6c4cf8c339..b52c285b34c7 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py @@ -21,6 +21,7 @@ from azure.ai.evaluation._version import VERSION from azure.core.pipeline.policies import RetryPolicy from azure.core.rest import HttpResponse +from azure.core.exceptions import HttpResponseError LOGGER = logging.getLogger(__name__) @@ -443,10 +444,26 @@ def log_artifact(self, artifact_folder: str, artifact_name: str = EVALUATION_ART datastore = self._ml_client.datastores.get_default(include_secrets=True) account_url = f"{datastore.account_name}.blob.{datastore.endpoint}" svc_client = BlobServiceClient(account_url=account_url, credential=self._get_datastore_credential(datastore)) - for local, remote in zip(local_paths, remote_paths["paths"]): - blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"]) - with open(local, "rb") as fp: - blob_client.upload_blob(fp, overwrite=True) + try: + for local, remote in zip(local_paths, remote_paths["paths"]): + blob_client = svc_client.get_blob_client(container=datastore.container_name, blob=remote["path"]) + with open(local, "rb") as fp: + blob_client.upload_blob(fp, overwrite=True) + except HttpResponseError as ex: + if ex.status_code == 403: + msg = ( + "Failed to upload evaluation run to the cloud due to insufficient permission to access the storage." + " Please ensure that the necessary access rights are granted." + ) + raise EvaluationException( + message=msg, + target=ErrorTarget.EVAL_RUN, + category=ErrorCategory.FAILED_REMOTE_TRACKING, + blame=ErrorBlame.USER_ERROR, + tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot", + ) from ex + + raise ex # To show artifact in UI we will need to register it. If it is a promptflow run, # we are rewriting already registered artifact and need to skip this step. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py index efc434148182..b5aa3fcc0a59 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py @@ -9,7 +9,7 @@ import pandas as pd from promptflow._sdk._constants import LINE_NUMBER -from promptflow._sdk._errors import MissingAzurePackage +from promptflow._sdk._errors import MissingAzurePackage, UserAuthenticationError, UploadInternalError from promptflow.client import PFClient from promptflow.entities import Run @@ -416,15 +416,31 @@ def _apply_target_to_data( _run_name = kwargs.get("_run_name") upload_target_snaphot = kwargs.get("_upload_target_snapshot", False) - with TargetRunContext(upload_target_snaphot): - run: Run = pf_client.run( - flow=target, - display_name=evaluation_name, - data=data, - properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"}, - stream=True, - name=_run_name, - ) + try: + with TargetRunContext(upload_target_snaphot): + run: Run = pf_client.run( + flow=target, + display_name=evaluation_name, + data=data, + properties={EvaluationRunProperties.RUN_TYPE: "eval_run", "isEvaluatorRun": "true"}, + stream=True, + name=_run_name, + ) + except (UserAuthenticationError, UploadInternalError) as ex: + if "Failed to upload run" in ex.message: + msg = ( + "Failed to upload the target run to the cloud. " + "This may be caused by insufficient permission to access storage or other errors." + ) + raise EvaluationException( + message=msg, + target=ErrorTarget.EVALUATE, + category=ErrorCategory.FAILED_REMOTE_TRACKING, + blame=ErrorBlame.USER_ERROR, + tsg_link="https://aka.ms/azsdk/python/evaluation/remotetracking/troubleshoot", + ) from ex + + raise ex target_output: pd.DataFrame = pf_client.runs.get_details(run, all_results=True) # Remove input and output prefix @@ -620,7 +636,17 @@ def evaluate( internal_message=error_message, target=ErrorTarget.EVALUATE, category=ErrorCategory.FAILED_EXECUTION, - blame=ErrorBlame.UNKNOWN, + blame=ErrorBlame.USER_ERROR, + ) from e + + # Ensure a consistent user experience when encountering errors by converting + # all other exceptions to EvaluationException. + if not isinstance(e, EvaluationException): + raise EvaluationException( + message=str(e), + target=ErrorTarget.EVALUATE, + category=ErrorCategory.FAILED_EXECUTION, + blame=ErrorBlame.SYSTEM_ERROR, ) from e raise e diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py index 1280fbe38072..4536734adc62 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_exceptions.py @@ -22,6 +22,7 @@ class ErrorCategory(Enum): * FAILED_EXECUTION -> Execution failed * SERVICE_UNAVAILABLE -> Service is unavailable * MISSING_PACKAGE -> Required package is missing + * FAILED_REMOTE_TRACKING -> Remote tracking failed * UNKNOWN -> Undefined placeholder. Avoid using. """ @@ -33,6 +34,7 @@ class ErrorCategory(Enum): FAILED_EXECUTION = "FAILED_EXECUTION" SERVICE_UNAVAILABLE = "SERVICE UNAVAILABLE" MISSING_PACKAGE = "MISSING PACKAGE" + FAILED_REMOTE_TRACKING = "FAILED REMOTE TRACKING" UNKNOWN = "UNKNOWN" @@ -90,6 +92,8 @@ class EvaluationException(AzureError): :type category: ~azure.ai.evaluation._exceptions.ErrorCategory :param blame: The source of blame for the error, defaults to Unknown. :type balance: ~azure.ai.evaluation._exceptions.ErrorBlame + :param tsg_link: A link to the TSG page for troubleshooting the error. + :type tsg_link: str """ def __init__( @@ -100,10 +104,20 @@ def __init__( target: ErrorTarget = ErrorTarget.UNKNOWN, category: ErrorCategory = ErrorCategory.UNKNOWN, blame: ErrorBlame = ErrorBlame.UNKNOWN, + tsg_link: Optional[str] = None, **kwargs, ) -> None: self.category = category self.target = target self.blame = blame self.internal_message = internal_message + self.tsg_link = tsg_link super().__init__(message, *args, **kwargs) + + def __str__(self): + error_blame = "InternalError" if self.blame != ErrorBlame.USER_ERROR else "UserError" + msg = f"({error_blame}) {super().__str__()}" + if self.tsg_link: + msg += f"\nVisit {self.tsg_link} to troubleshoot this issue." + + return msg