diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..e95af47 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,16 @@ +{ + "dockerComposeFile": "../docker-compose.yaml", + "service": "chromegpt", + "workspaceFolder": "/app", + "customizations": { + "vscode": { + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance", + "ms-toolsai.jupyter" + ] + } + }, + "forwardPorts": [3000,4444,7900,5900], + "shutdownAction": "stopCompose" +} diff --git a/.env b/.env new file mode 100644 index 0000000..2eca185 --- /dev/null +++ b/.env @@ -0,0 +1,3 @@ +OPENAI_API_KEY=sk- +REQUEST="Find me a bar that can host a 20 person event near Chelsea, Manhattan evening of Apr 30th. Fill out contact us form if they have one with info: Name Richard, email he@hrichard.com." +TARGET=base \ No newline at end of file diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c91fdb9..76cfd71 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,13 +1,12 @@ name: lint - +env: + TARGET: 'test' + MAKE: 'lint' on: push: branches: [main] pull_request: -env: - POETRY_VERSION: "1.4.2" - jobs: build: runs-on: ubuntu-latest @@ -16,18 +15,14 @@ jobs: python-version: - "3.10" steps: - - uses: actions/checkout@v3 - - name: Install poetry - run: | - pipx install poetry==$POETRY_VERSION - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: poetry - - name: Install dependencies - run: | - poetry install - - name: Analysing the code with our lint + - name: Checkout + uses: actions/checkout@v3 + - name: Free disk space run: | - make lint + df --human-readable + sudo apt clean + docker 2>/dev/null 1>&2 rmi $(docker image ls --all --quiet) || true + rm --recursive --force "$AGENT_TOOLSDIRECTORY" + df --human-readable + - name: Test with pytest + run: docker-compose up --abort-on-container-exit diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cc077d5..fcac1a2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,13 +1,12 @@ name: tests - +env: + TARGET: 'test' + MAKE: 'tests' on: push: branches: [main] pull_request: -env: - POETRY_VERSION: "1.4.2" - jobs: build: runs-on: ubuntu-latest @@ -16,18 +15,14 @@ jobs: python-version: - "3.10" steps: - - uses: actions/checkout@v3 - - name: Install poetry - run: | - pipx install poetry==$POETRY_VERSION - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - cache: poetry - - name: Install dependencies - run: | - poetry install - - name: Run all tests + - name: Checkout + uses: actions/checkout@v3 + - name: Free disk space run: | - make tests + df --human-readable + sudo apt clean + docker 2>/dev/null 1>&2 rmi $(docker image ls --all --quiet) || true + rm --recursive --force "$AGENT_TOOLSDIRECTORY" + df --human-readable + - name: Test with pytest + run: docker-compose up --abort-on-container-exit diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..3e99ede --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..2dbaed5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +FROM python:3.8 as base + +WORKDIR /app + +# setup code +COPY . . +RUN pip install poetry==1.4.2 + +# Install dependencies using Poetry +RUN poetry config virtualenvs.create false && \ + poetry install --no-interaction --no-ansi + +CMD python -m chromegpt -v -t "${REQUEST}" + +# image to dev +FROM base as dev +CMD sh -c "while sleep 1000; do :; done" + +# image to run tests +FROM base as test +ARG MAKE="tests" +CMD make $MAKE diff --git a/README.md b/README.md index 54dd9bc..ccde9a2 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![lint](https://github.com/richardyc/chrome-gpt/actions/workflows/lint.yml/badge.svg)](https://github.com/richardyc/chrome-gpt/actions/workflows/lint.yml) [![test](https://github.com/richardyc/chrome-gpt/actions/workflows/tests.yml/badge.svg)](https://github.com/richardyc/chrome-gpt/actions/workflows/tests.yml) [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/RealRichomie.svg?style=social&label=Follow%20%40RealRichomie)](https://twitter.com/RealRichomie) + ⚠️This is an experimental AutoGPT agent that might take incorrect actions and could lead to serious consequences. Please use it at your own discretion⚠️ Chrome-GPT is an AutoGPT experiment that utilizes [Langchain](https://github.com/hwchase17/langchain) and [Selenium](https://github.com/SeleniumHQ/selenium) to enable an AutoGPT agent take control of an entire Chrome session. With the ability to interactively scroll, click, and input text on web pages, the AutoGPT agent can navigate and manipulate web content. @@ -41,6 +42,11 @@ Demo made by [Richard He](https://twitter.com/RealRichomie) 3. Open a poetry shell `poetry shell` 4. Run chromegpt via `python -m chromegpt` + +You can start in you own codespace here: + +[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://github.com/erlichsefi/Chrome-GPT/codespaces) +

🧠 Usage

- GPT-3.5 Usage (Default): `python -m chromegpt -v -t "{your request}"` @@ -63,6 +69,10 @@ Options: --help Show this message and exit. ``` +Or Just update .env and + +`source .env & docker-compose up` +

⭐ Star History

[![Star History Chart](https://api.star-history.com/svg?repos=richardyc/Chrome-GPT&type=Date)](https://star-history.com/#richardyc/Chrome-GPT&Date) diff --git a/chromegpt/tools/driver.py b/chromegpt/tools/driver.py new file mode 100644 index 0000000..f65d539 --- /dev/null +++ b/chromegpt/tools/driver.py @@ -0,0 +1,15 @@ +from typing import Any, Callable + +from chromegpt.tools.selenium import SeleniumWrapper + + +def execute_with_driver(test_function: Callable[[SeleniumWrapper], None]) -> Callable: + def wrapper(*args: Any, **kwargs: Any) -> None: + try: + client = SeleniumWrapper(headless=True) + test_function(client, *args, **kwargs) + finally: + # release the driver + del client + + return wrapper diff --git a/chromegpt/tools/selenium.py b/chromegpt/tools/selenium.py index 2755b40..cf908f4 100644 --- a/chromegpt/tools/selenium.py +++ b/chromegpt/tools/selenium.py @@ -37,19 +37,27 @@ class SeleniumWrapper: selenium = SeleniumWrapper() """ - def __init__(self, headless: bool = False) -> None: + def __init__(self, headless: bool = False, docker: bool = True) -> None: """Initialize Selenium and start interactive session.""" chrome_options = Options() if headless: chrome_options.add_argument("--headless") else: chrome_options.add_argument("--start-maximized") - self.driver = webdriver.Chrome(options=chrome_options) + if docker: + self.driver = webdriver.Remote( + "http://selenium-chrome:4444/wd/hub", + options=chrome_options, + ) + else: + self.driver = webdriver.Chrome(options=chrome_options) self.driver.implicitly_wait(5) # Wait 5 seconds for elements to load def __del__(self) -> None: """Close Selenium session.""" - self.driver.close() + if hasattr(self, "driver") and self.driver is not None: + self.driver.close() + self.driver.quit() def previous_webpage(self) -> str: """Go back in browser history.""" @@ -305,7 +313,7 @@ def fill_out_form(self, form_input: Optional[str] = None, **kwargs: Any) -> str: " website did not change after filling out form." ) except WebDriverException as e: - print(e) + # print(e) return f"Error filling out form with input {form_input}, message: {e.msg}" def scroll(self, direction: str) -> str: diff --git a/chromegpt/tools/utils.py b/chromegpt/tools/utils.py index d42c3e9..3285058 100644 --- a/chromegpt/tools/utils.py +++ b/chromegpt/tools/utils.py @@ -1,10 +1,11 @@ """Utils for chromegpt tools.""" import re -from typing import List, Optional +from typing import List, Optional, Union from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.common.by import By +from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from selenium.webdriver.remote.webelement import WebElement from unidecode import unidecode @@ -14,7 +15,7 @@ def is_complete_sentence(text: str) -> bool: return re.search(r"[.!?]\s*$", text) is not None -def get_all_text_elements(driver: WebDriver) -> List[str]: +def get_all_text_elements(driver: Union[WebDriver, RemoteWebDriver]) -> List[str]: xpath = ( "//*[not(self::script or self::style or" " self::noscript)][string-length(normalize-space(text())) > 0]" @@ -30,7 +31,7 @@ def get_all_text_elements(driver: WebDriver) -> List[str]: return texts -def find_interactable_elements(driver: WebDriver) -> List[str]: +def find_interactable_elements(driver: Union[WebDriver, RemoteWebDriver]) -> List[str]: """Find all interactable elements on the page.""" # Extract interactable components (buttons and links) buttons = driver.find_elements(By.XPATH, "//button") @@ -62,7 +63,9 @@ def prettify_text(text: str, limit: Optional[int] = None) -> str: return text -def element_completely_viewable(driver: WebDriver, elem: WebElement) -> bool: +def element_completely_viewable( + driver: Union[WebDriver, RemoteWebDriver], elem: WebElement +) -> bool: """Check if an element is completely viewable in the browser window.""" elem_left_bound = elem.location.get("x") elem_top_bound = elem.location.get("y") diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..d640779 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,35 @@ +version: '3' +services: + chromegpt: + build: + context: . + dockerfile: Dockerfile + target: ${TARGET:-dev} # Default value is "dev" + args: + - MAKE=${MAKE} + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - REQUEST=${REQUEST} + depends_on: + # make sure the the image will be created only after Selenium is stable + # otherwise, tests a run before Selenium is accessible + selenium-chrome: + condition: service_healthy + + + selenium-chrome: + image: selenium/standalone-chrome:latest + ports: + - "4444:4444" + - "7900:7900" + - "5900:5900" + environment: + - SE_NODE_MAX_SESSIONS=10 + - SE_NODE_SESSION_TIMEOUT=30000 + shm_size: "2g" + # define Selenium stable 'Healthy' only after it's accessible. + healthcheck: + test: ["CMD", "curl", "-f", "http://selenium-chrome:4444/wd/hub/status"] + interval: 10s # Check every 30 seconds + timeout: 5s # Timeout after 10 seconds + retries: 3 # Retry 3 times before considering the container unhealthy diff --git a/tests/test_selenium.py b/tests/test_selenium.py index 49a91dc..b3d61fb 100644 --- a/tests/test_selenium.py +++ b/tests/test_selenium.py @@ -1,47 +1,51 @@ """Integration test for Selenium API Wrapper.""" -import pytest - -from chromegpt.tools.selenium import SeleniumWrapper - - -@pytest.fixture -def client() -> SeleniumWrapper: - return SeleniumWrapper(headless=True) +from chromegpt.tools.driver import SeleniumWrapper, execute_with_driver +@execute_with_driver def test_describe_website(client: SeleniumWrapper) -> None: """Test that SeleniumWrapper returns correct website""" - + output = None output = client.describe_website("https://example.com") - assert "this domain is for use in illu" in output + assert output is not None and "this domain is for use in illu" in output +@execute_with_driver def test_click(client: SeleniumWrapper) -> None: """Test that SeleniumWrapper click works""" + output = None client.describe_website("https://example.com") output = client.click_button_by_text('link with title "More information..."') - assert "Clicked interactable element and the website changed" in output + assert ( + output is not None + and "Clicked interactable element and the website changed" in output + ) +@execute_with_driver def test_google_input(client: SeleniumWrapper) -> None: """Test that SeleniumWrapper can find input form""" - + output = None output = client.find_form_inputs("https://google.com") - assert "q" in output + assert output is not None and "q" in output +@execute_with_driver def test_google_fill(client: SeleniumWrapper) -> None: """Test that SeleniumWrapper can fill input form""" - + output = None client.find_form_inputs("https://google.com") output = client.fill_out_form(q="hello world") - assert "website changed after filling out form" in output + + assert output is not None and "website changed after filling out form" in output +@execute_with_driver def test_google_search(client: SeleniumWrapper) -> None: """Test google search functionality""" - res = client.google_search("hello world") - assert "hello" in res - assert "Which url would you like to goto" in res + output = None + output = client.google_search("hello world") + assert output is not None and "hello" in output + assert output is not None and "Which url would you like to goto" in output