-
Notifications
You must be signed in to change notification settings - Fork 31
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Using a test structure to execute the scenarios
- Loading branch information
1 parent
e8d1b35
commit ddd8142
Showing
17 changed files
with
6,777 additions
and
2,082 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,10 @@ | |
.idea/ | ||
redbox-core/models/ | ||
|
||
# AI Test Data | ||
|
||
ai-tests/data/ | ||
|
||
# data files | ||
elk/elasticsearch/data | ||
data/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"editor.formatOnSave": true, | ||
"[python]": { | ||
"editor.codeActionsOnSave": { | ||
"source.organizeImports": "explicit" | ||
}, | ||
"editor.defaultFormatter": "ms-python.black-formatter" | ||
}, | ||
"python.analysis.autoImportCompletions": true, | ||
"python.testing.unittestEnabled": false, | ||
"python.testing.pytestEnabled": true, | ||
"python.testing.pytestArgs": [ | ||
".", | ||
"-v", | ||
], | ||
"python.testing.pytestPath": "venv/bin/python -m pytest" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# AI Tests | ||
|
||
This workspace runs Redbox over a set of prompts and documents to allow investigating decision making and responses. | ||
|
||
The setup is: | ||
|
||
* Create a csv with prompts,documents fields in data/ | ||
* Drop all your test documents in data/documents | ||
* Fill out the csv with a prompt and list of documents per row (is a list of document names separated by |) | ||
* Run the test to produce traces in Langfuse and logs in data/output | ||
|
||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
[tool.ruff] | ||
line-length = 120 | ||
target-version = 'py312' | ||
|
||
[tool.poetry] | ||
name = "redbox-ai-tests" | ||
version = "0.1.0" | ||
description = "AI Tests for Redbox" | ||
authors = ["i.AI <[email protected]>"] | ||
license = "MIT" | ||
readme = "../README.md" | ||
|
||
[tool.poetry.dependencies] | ||
redbox = {path="../redbox-core", develop=true} | ||
python = ">=3.12,<3.13" | ||
|
||
|
||
[tool.poetry.group.dev.dependencies] | ||
pytest = "^8.3.2" | ||
moto = "^5.0.12" | ||
pytest-cov = "^5.0.0" | ||
pytest-asyncio = "^0.23.6" | ||
jsonlines = "^4.0.0" | ||
deepeval = "^1.0.3" | ||
pytest-mock = "^3.14.0" | ||
boto3-stubs = {extras = ["essential"], version = "^1.35.28"} | ||
requests-mock = "^1.12.1" | ||
langfuse = "^2.53.9" | ||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" | ||
|
||
[tool.pytest.ini_options] | ||
addopts = "--import-mode=importlib" | ||
env_override_existing_values = 1 | ||
env_files = [ | ||
"tests/.env.test", | ||
".env" | ||
] | ||
markers = [ | ||
] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
|
||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class AITestCase(BaseModel): | ||
id: str #Has to be file path valid | ||
prompts: list[str] | ||
documents: list[str] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
from logging import getLogger | ||
from typing_extensions import Generator | ||
from pathlib import Path | ||
import pytest | ||
import csv | ||
|
||
from .cases import AITestCase | ||
|
||
TEST_CASES_FILE=Path("data/cases.csv") | ||
DOCUMENTS_DIR=Path("data/documents") | ||
DOCUMENT_UPLOAD_USER = "ai_tests" | ||
|
||
logger = getLogger() | ||
|
||
def test_cases() -> Generator[None, None, AITestCase]: | ||
with open(f"{TEST_CASES_FILE}") as cases_file: | ||
reader = csv.DictReader(cases_file) | ||
all_cases = [ | ||
AITestCase( | ||
id=row["ID"], | ||
prompts=row["Prompts"].split("|"), | ||
documents=[f"{DOCUMENT_UPLOAD_USER}/{doc_name}" for doc_name in row["Documents"].split("|")] | ||
) | ||
for row in reader | ||
] | ||
missing_documents = set(d for case in all_cases for d in case.documents) - set(d.name for d in DOCUMENTS_DIR.iterdir()) | ||
if len(missing_documents) > 0: | ||
logger.warning(f"Missing {len(missing_documents)} documents - {",".join(missing_documents)}") | ||
return all_cases | ||
|
||
|
||
|
||
def pytest_generate_tests(metafunc): | ||
if "test_case" in metafunc.fixturenames: | ||
metafunc.parametrize("test_case", test_cases(), ids=lambda t: t.id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
|
||
import csv | ||
from pathlib import Path | ||
import io | ||
import sys | ||
from typing import Generator | ||
from uuid import uuid4 | ||
|
||
from langfuse.callback import CallbackHandler | ||
import pytest | ||
|
||
from redbox.models.settings import Settings, get_settings | ||
from redbox.models.chain import RedboxQuery, RedboxState, AISettings | ||
from redbox.app import Redbox | ||
from redbox.loader.ingester import ingest_file | ||
|
||
from .cases import AITestCase | ||
from .conftest import DOCUMENT_UPLOAD_USER | ||
|
||
|
||
|
||
def file_to_s3(file_path: Path, s3_client, env: Settings) -> str: | ||
file_name = f"{DOCUMENT_UPLOAD_USER}/{file_path.name}" | ||
file_type = file_path.suffix | ||
|
||
with file_path.open("rb") as f: | ||
s3_client.put_object( | ||
Bucket=env.bucket_name, | ||
Body=f.read(), | ||
Key=file_name, | ||
Tagging=f"file_type={file_type}", | ||
) | ||
|
||
return file_name | ||
|
||
|
||
|
||
def get_state(user_uuid, prompts, documents): | ||
q = RedboxQuery( | ||
question=f"@gadget {prompts[-1]}", | ||
s3_keys=documents, | ||
user_uuid=user_uuid, | ||
chat_history=prompts[:-1], | ||
ai_settings=AISettings(), | ||
permitted_s3_keys=documents, | ||
) | ||
|
||
return RedboxState( | ||
request=q, | ||
) | ||
|
||
def run_app(app, state) -> RedboxState: | ||
langfuse_handler = CallbackHandler() | ||
return app.graph.invoke(state, config={"callbacks": [langfuse_handler]}) | ||
|
||
@pytest.fixture | ||
def settings(): | ||
return get_settings() | ||
|
||
@pytest.fixture | ||
def all_loaded_doc_uris(settings: Settings): | ||
es = settings.elasticsearch_client() | ||
response = es.search( | ||
index=f"{settings.elastic_root_index}-chunk-current", | ||
query={ | ||
"term": { | ||
"metadata.chunk_resolution": "largest" | ||
} | ||
} | ||
) | ||
hits = response.get("hits", {}).get("hits", []) | ||
return set(d["_source"]["metadata"]["uri"] for d in hits) | ||
|
||
@pytest.fixture | ||
def loaded_docs(all_loaded_doc_uris: set[str], settings: Settings): | ||
for doc in Path("data/documents").iterdir(): | ||
uri = f"{DOCUMENT_UPLOAD_USER}/{doc.name}" | ||
if not uri in all_loaded_doc_uris: | ||
print(f"Loading missing document: {uri}") | ||
file_to_s3(doc, settings.s3_client(), settings) | ||
ingest_file(uri) | ||
return all_loaded_doc_uris | ||
|
||
|
||
|
||
def test_usecases(test_case: AITestCase, loaded_docs: set[str], output_dir: Path = Path("data/output")): | ||
env = get_settings() | ||
app = Redbox(debug=True, env=env) | ||
|
||
buffer = io.StringIO() | ||
sys.stdout = buffer | ||
save_path = output_dir / test_case.id | ||
# call agent | ||
try: | ||
redbox_state = get_state(user_uuid=uuid4(), prompts=test_case.prompts, documents=test_case.documents) | ||
with open(save_path, "w") as file: | ||
sys.stdout = file | ||
response = run_app(app, redbox_state) | ||
|
||
except Exception as e: | ||
print(f"Error in {e}") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.