Using a test structure to execute the scenarios

i-dot-ai · Nov 15, 2024 · ddd8142 · ddd8142
1 parent e8d1b35
commit ddd8142
Show file tree

Hide file tree

Showing 17 changed files with 6,777 additions and 2,082 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,10 @@
 .idea/
 redbox-core/models/
 
+# AI Test Data
+
+ai-tests/data/
+
 # data files
 elk/elasticsearch/data
 data/

diff --git a/.vscode/redbox.code-workspace b/.vscode/redbox.code-workspace
@@ -12,6 +12,10 @@
 			"name": "Redbox Core",
 			"path": "../redbox-core"
 		},
+		{
+			"name": "AI Tests",
+			"path": "../ai-tests"
+		},
 		{
 			"name": "Integration Tests",
 			"path": "../tests"

diff --git a/ai-tests/.vscode/settings.json b/ai-tests/.vscode/settings.json
@@ -0,0 +1,17 @@
+{
+    "editor.formatOnSave": true,
+    "[python]": {
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": "explicit"
+        },
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.analysis.autoImportCompletions": true,
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.testing.pytestArgs": [
+        ".",
+        "-v",
+    ],
+    "python.testing.pytestPath": "venv/bin/python -m pytest"
+}
diff --git a/ai-tests/README.md b/ai-tests/README.md
@@ -0,0 +1,12 @@
+# AI Tests
+
+This workspace runs Redbox over a set of prompts and documents to allow investigating decision making and responses.
+
+The setup is:
+
+* Create a csv with prompts,documents fields in data/
+* Drop all your test documents in data/documents
+* Fill out the csv with a prompt and list of documents per row (is a list of document names separated by |)
+* Run the test to produce traces in Langfuse and logs in data/output
+
+
diff --git a/ai-tests/poetry.lock b/ai-tests/poetry.lock
diff --git a/ai-tests/pyproject.toml b/ai-tests/pyproject.toml
@@ -0,0 +1,42 @@
+[tool.ruff]
+line-length = 120
+target-version = 'py312'
+
+[tool.poetry]
+name = "redbox-ai-tests"
+version = "0.1.0"
+description = "AI Tests for Redbox"
+authors = ["i.AI <[email protected]>"]
+license = "MIT"
+readme = "../README.md"
+
+[tool.poetry.dependencies]
+redbox = {path="../redbox-core", develop=true}
+python = ">=3.12,<3.13"
+
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.3.2"
+moto = "^5.0.12"
+pytest-cov = "^5.0.0"
+pytest-asyncio = "^0.23.6"
+jsonlines = "^4.0.0"
+deepeval = "^1.0.3"
+pytest-mock = "^3.14.0"
+boto3-stubs = {extras = ["essential"], version = "^1.35.28"}
+requests-mock = "^1.12.1"
+langfuse = "^2.53.9"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.pytest.ini_options]
+addopts = "--import-mode=importlib"
+env_override_existing_values = 1
+env_files = [
+    "tests/.env.test",
+    ".env"
+]
+markers = [
+]
diff --git a/ai-tests/tests/__init__.py b/ai-tests/tests/__init__.py
diff --git a/ai-tests/tests/cases.py b/ai-tests/tests/cases.py
@@ -0,0 +1,9 @@
+
+
+from pydantic import BaseModel
+
+
+class AITestCase(BaseModel):
+    id: str #Has to be file path valid
+    prompts: list[str]
+    documents: list[str]
diff --git a/ai-tests/tests/conftest.py b/ai-tests/tests/conftest.py
@@ -0,0 +1,36 @@
+
+from logging import getLogger
+from typing_extensions import Generator
+from pathlib import Path
+import pytest
+import csv
+
+from .cases import AITestCase
+
+TEST_CASES_FILE=Path("data/cases.csv")
+DOCUMENTS_DIR=Path("data/documents")
+DOCUMENT_UPLOAD_USER = "ai_tests"
+
+logger = getLogger()
+
+def test_cases() -> Generator[None, None, AITestCase]:
+    with open(f"{TEST_CASES_FILE}") as cases_file:
+        reader = csv.DictReader(cases_file)
+        all_cases = [
+            AITestCase(
+                id=row["ID"],
+                prompts=row["Prompts"].split("|"),
+                documents=[f"{DOCUMENT_UPLOAD_USER}/{doc_name}" for doc_name in row["Documents"].split("|")]
+            )
+            for row in reader
+        ]
+    missing_documents = set(d for case in all_cases for d in case.documents) - set(d.name for d in DOCUMENTS_DIR.iterdir())
+    if len(missing_documents) > 0:
+        logger.warning(f"Missing {len(missing_documents)} documents - {",".join(missing_documents)}")
+    return all_cases
+
+
+
+def pytest_generate_tests(metafunc):
+    if "test_case" in metafunc.fixturenames:
+        metafunc.parametrize("test_case", test_cases(), ids=lambda t: t.id)
diff --git a/ai-tests/tests/test_ai.py b/ai-tests/tests/test_ai.py
@@ -0,0 +1,102 @@
+
+import csv
+from pathlib import Path
+import io
+import sys
+from typing import Generator
+from uuid import uuid4
+
+from langfuse.callback import CallbackHandler
+import pytest
+
+from redbox.models.settings import Settings, get_settings
+from redbox.models.chain import RedboxQuery, RedboxState, AISettings
+from redbox.app import Redbox
+from redbox.loader.ingester import ingest_file
+
+from .cases import AITestCase
+from .conftest import DOCUMENT_UPLOAD_USER
+
+
+
+def file_to_s3(file_path: Path, s3_client, env: Settings) -> str:
+    file_name = f"{DOCUMENT_UPLOAD_USER}/{file_path.name}"
+    file_type = file_path.suffix
+
+    with file_path.open("rb") as f:
+        s3_client.put_object(
+            Bucket=env.bucket_name,
+            Body=f.read(),
+            Key=file_name,
+            Tagging=f"file_type={file_type}",
+        )
+
+    return file_name
+
+
+
+def get_state(user_uuid, prompts, documents):
+    q = RedboxQuery(
+        question=f"@gadget {prompts[-1]}",
+        s3_keys=documents,
+        user_uuid=user_uuid,
+        chat_history=prompts[:-1],
+        ai_settings=AISettings(),
+        permitted_s3_keys=documents,
+    )
+
+    return RedboxState(
+        request=q,
+    )
+
+def run_app(app, state) -> RedboxState:
+    langfuse_handler = CallbackHandler()
+    return app.graph.invoke(state, config={"callbacks": [langfuse_handler]})
+
+@pytest.fixture
+def settings():
+    return get_settings()
+
+@pytest.fixture
+def all_loaded_doc_uris(settings: Settings):
+    es = settings.elasticsearch_client()
+    response = es.search(
+        index=f"{settings.elastic_root_index}-chunk-current", 
+        query={
+            "term": {
+                "metadata.chunk_resolution": "largest"
+            }
+        }
+    )
+    hits = response.get("hits", {}).get("hits", [])
+    return set(d["_source"]["metadata"]["uri"] for d in hits)
+
+@pytest.fixture
+def loaded_docs(all_loaded_doc_uris: set[str], settings: Settings):
+    for doc in Path("data/documents").iterdir():
+        uri = f"{DOCUMENT_UPLOAD_USER}/{doc.name}"
+        if not uri in all_loaded_doc_uris:
+            print(f"Loading missing document: {uri}")
+            file_to_s3(doc, settings.s3_client(), settings)
+            ingest_file(uri)
+    return all_loaded_doc_uris
+
+
+
+def test_usecases(test_case: AITestCase, loaded_docs: set[str], output_dir: Path = Path("data/output")):
+    env = get_settings()
+    app = Redbox(debug=True, env=env)
+
+    buffer = io.StringIO()
+    sys.stdout = buffer
+    save_path = output_dir / test_case.id
+    # call agent
+    try:
+        redbox_state = get_state(user_uuid=uuid4(), prompts=test_case.prompts, documents=test_case.documents)
+        with open(save_path, "w") as file:
+            sys.stdout = file
+            response = run_app(app, redbox_state)
+
+    except Exception as e:
+        print(f"Error in {e}")
+
diff --git a/django_app/.vscode/settings.json b/django_app/.vscode/settings.json
@@ -4,5 +4,5 @@
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
-    "python.testing.pytestPath": "venv/bin/python -m pytest",
+    "python.testing.pytestPath": ".venv/bin/python -m pytest",
 }