Skip to content

Commit

Permalink
Using a test structure to execute the scenarios
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesrichards4 committed Nov 15, 2024
1 parent e8d1b35 commit ddd8142
Show file tree
Hide file tree
Showing 17 changed files with 6,777 additions and 2,082 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
.idea/
redbox-core/models/

# AI Test Data

ai-tests/data/

# data files
elk/elasticsearch/data
data/
Expand Down
4 changes: 4 additions & 0 deletions .vscode/redbox.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
"name": "Redbox Core",
"path": "../redbox-core"
},
{
"name": "AI Tests",
"path": "../ai-tests"
},
{
"name": "Integration Tests",
"path": "../tests"
Expand Down
17 changes: 17 additions & 0 deletions ai-tests/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"editor.formatOnSave": true,
"[python]": {
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
"editor.defaultFormatter": "ms-python.black-formatter"
},
"python.analysis.autoImportCompletions": true,
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": [
".",
"-v",
],
"python.testing.pytestPath": "venv/bin/python -m pytest"
}
12 changes: 12 additions & 0 deletions ai-tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# AI Tests

This workspace runs Redbox over a set of prompts and documents to allow investigating decision making and responses.

The setup is:

* Create a csv with prompts,documents fields in data/
* Drop all your test documents in data/documents
* Fill out the csv with a prompt and list of documents per row (is a list of document names separated by |)
* Run the test to produce traces in Langfuse and logs in data/output


4,770 changes: 4,770 additions & 0 deletions ai-tests/poetry.lock

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions ai-tests/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
[tool.ruff]
line-length = 120
target-version = 'py312'

[tool.poetry]
name = "redbox-ai-tests"
version = "0.1.0"
description = "AI Tests for Redbox"
authors = ["i.AI <[email protected]>"]
license = "MIT"
readme = "../README.md"

[tool.poetry.dependencies]
redbox = {path="../redbox-core", develop=true}
python = ">=3.12,<3.13"


[tool.poetry.group.dev.dependencies]
pytest = "^8.3.2"
moto = "^5.0.12"
pytest-cov = "^5.0.0"
pytest-asyncio = "^0.23.6"
jsonlines = "^4.0.0"
deepeval = "^1.0.3"
pytest-mock = "^3.14.0"
boto3-stubs = {extras = ["essential"], version = "^1.35.28"}
requests-mock = "^1.12.1"
langfuse = "^2.53.9"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
addopts = "--import-mode=importlib"
env_override_existing_values = 1
env_files = [
"tests/.env.test",
".env"
]
markers = [
]
Empty file added ai-tests/tests/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions ai-tests/tests/cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@


from pydantic import BaseModel


class AITestCase(BaseModel):
id: str #Has to be file path valid
prompts: list[str]
documents: list[str]
36 changes: 36 additions & 0 deletions ai-tests/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@

from logging import getLogger
from typing_extensions import Generator
from pathlib import Path
import pytest
import csv

from .cases import AITestCase

TEST_CASES_FILE=Path("data/cases.csv")
DOCUMENTS_DIR=Path("data/documents")
DOCUMENT_UPLOAD_USER = "ai_tests"

logger = getLogger()

def test_cases() -> Generator[None, None, AITestCase]:
with open(f"{TEST_CASES_FILE}") as cases_file:
reader = csv.DictReader(cases_file)
all_cases = [
AITestCase(
id=row["ID"],
prompts=row["Prompts"].split("|"),
documents=[f"{DOCUMENT_UPLOAD_USER}/{doc_name}" for doc_name in row["Documents"].split("|")]
)
for row in reader
]
missing_documents = set(d for case in all_cases for d in case.documents) - set(d.name for d in DOCUMENTS_DIR.iterdir())
if len(missing_documents) > 0:
logger.warning(f"Missing {len(missing_documents)} documents - {",".join(missing_documents)}")
return all_cases



def pytest_generate_tests(metafunc):
if "test_case" in metafunc.fixturenames:
metafunc.parametrize("test_case", test_cases(), ids=lambda t: t.id)
102 changes: 102 additions & 0 deletions ai-tests/tests/test_ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@

import csv
from pathlib import Path
import io
import sys
from typing import Generator
from uuid import uuid4

from langfuse.callback import CallbackHandler
import pytest

from redbox.models.settings import Settings, get_settings
from redbox.models.chain import RedboxQuery, RedboxState, AISettings
from redbox.app import Redbox
from redbox.loader.ingester import ingest_file

from .cases import AITestCase
from .conftest import DOCUMENT_UPLOAD_USER



def file_to_s3(file_path: Path, s3_client, env: Settings) -> str:
file_name = f"{DOCUMENT_UPLOAD_USER}/{file_path.name}"
file_type = file_path.suffix

with file_path.open("rb") as f:
s3_client.put_object(
Bucket=env.bucket_name,
Body=f.read(),
Key=file_name,
Tagging=f"file_type={file_type}",
)

return file_name



def get_state(user_uuid, prompts, documents):
q = RedboxQuery(
question=f"@gadget {prompts[-1]}",
s3_keys=documents,
user_uuid=user_uuid,
chat_history=prompts[:-1],
ai_settings=AISettings(),
permitted_s3_keys=documents,
)

return RedboxState(
request=q,
)

def run_app(app, state) -> RedboxState:
langfuse_handler = CallbackHandler()
return app.graph.invoke(state, config={"callbacks": [langfuse_handler]})

@pytest.fixture
def settings():
return get_settings()

@pytest.fixture
def all_loaded_doc_uris(settings: Settings):
es = settings.elasticsearch_client()
response = es.search(
index=f"{settings.elastic_root_index}-chunk-current",
query={
"term": {
"metadata.chunk_resolution": "largest"
}
}
)
hits = response.get("hits", {}).get("hits", [])
return set(d["_source"]["metadata"]["uri"] for d in hits)

@pytest.fixture
def loaded_docs(all_loaded_doc_uris: set[str], settings: Settings):
for doc in Path("data/documents").iterdir():
uri = f"{DOCUMENT_UPLOAD_USER}/{doc.name}"
if not uri in all_loaded_doc_uris:
print(f"Loading missing document: {uri}")
file_to_s3(doc, settings.s3_client(), settings)
ingest_file(uri)
return all_loaded_doc_uris



def test_usecases(test_case: AITestCase, loaded_docs: set[str], output_dir: Path = Path("data/output")):
env = get_settings()
app = Redbox(debug=True, env=env)

buffer = io.StringIO()
sys.stdout = buffer
save_path = output_dir / test_case.id
# call agent
try:
redbox_state = get_state(user_uuid=uuid4(), prompts=test_case.prompts, documents=test_case.documents)
with open(save_path, "w") as file:
sys.stdout = file
response = run_app(app, redbox_state)

except Exception as e:
print(f"Error in {e}")

2 changes: 1 addition & 1 deletion django_app/.vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestPath": "venv/bin/python -m pytest",
"python.testing.pytestPath": ".venv/bin/python -m pytest",
}
Loading

0 comments on commit ddd8142

Please sign in to comment.