diff --git a/agbenchmark/challenges/verticals/code/1_password_generator/artifacts_out/__init__.py b/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/1_password_generator/artifacts_out/__init__.py rename to agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/__init__.py diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_in/sample_code.py b/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/sample_code.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_in/sample_code.py rename to agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/sample_code.py diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_in/test.py b/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_in/test.py rename to agbenchmark/challenges/deprecated/d2.1_guided/artifacts_in/test.py diff --git a/agbenchmark/challenges/verticals/code/2_file_organizer/artifacts_out/__init__.py b/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/2_file_organizer/artifacts_out/__init__.py rename to agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_out/sample_code.py b/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/sample_code.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_out/sample_code.py rename to agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/sample_code.py diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_out/test.py b/agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_out/test.py rename to agbenchmark/challenges/deprecated/d2.1_guided/artifacts_out/test.py diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/data.json b/agbenchmark/challenges/deprecated/d2.1_guided/data.json similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/data.json rename to agbenchmark/challenges/deprecated/d2.1_guided/data.json diff --git a/agbenchmark/challenges/verticals/code/3_url_shortener/artifacts_out/__init__.py b/agbenchmark/challenges/verticals/code/1_three_sum/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/3_url_shortener/artifacts_out/__init__.py rename to agbenchmark/challenges/verticals/code/1_three_sum/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/d3.1_three_sum/artifacts_out/sample_code.py b/agbenchmark/challenges/verticals/code/1_three_sum/artifacts_out/sample_code.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d3.1_three_sum/artifacts_out/sample_code.py rename to agbenchmark/challenges/verticals/code/1_three_sum/artifacts_out/sample_code.py diff --git a/agbenchmark/challenges/verticals/code/d3.1_three_sum/custom_python/test.py b/agbenchmark/challenges/verticals/code/1_three_sum/custom_python/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d3.1_three_sum/custom_python/test.py rename to agbenchmark/challenges/verticals/code/1_three_sum/custom_python/test.py diff --git a/agbenchmark/challenges/verticals/code/d3.1_three_sum/data.json b/agbenchmark/challenges/verticals/code/1_three_sum/data.json similarity index 96% rename from agbenchmark/challenges/verticals/code/d3.1_three_sum/data.json rename to agbenchmark/challenges/verticals/code/1_three_sum/data.json index 7dedf7a4b0e..e3b875738dd 100644 --- a/agbenchmark/challenges/verticals/code/d3.1_three_sum/data.json +++ b/agbenchmark/challenges/verticals/code/1_three_sum/data.json @@ -14,7 +14,7 @@ } }, "info": { - "difficulty": "advanced", + "difficulty": "basic", "description": "Tests ability for the agent to create the three_sum function.", "side_effects": [] } diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/__init__.py b/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/__init__.py rename to agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/1_password_generator/artifacts_out/password_generator.py b/agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py similarity index 100% rename from agbenchmark/challenges/verticals/code/1_password_generator/artifacts_out/password_generator.py rename to agbenchmark/challenges/verticals/code/2_password_generator/artifacts_out/password_generator.py diff --git a/agbenchmark/challenges/verticals/code/1_password_generator/custom_python/test.py b/agbenchmark/challenges/verticals/code/2_password_generator/custom_python/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/1_password_generator/custom_python/test.py rename to agbenchmark/challenges/verticals/code/2_password_generator/custom_python/test.py diff --git a/agbenchmark/challenges/verticals/code/1_password_generator/data.json b/agbenchmark/challenges/verticals/code/2_password_generator/data.json similarity index 100% rename from agbenchmark/challenges/verticals/code/1_password_generator/data.json rename to agbenchmark/challenges/verticals/code/2_password_generator/data.json diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/__init__.py b/agbenchmark/challenges/verticals/code/3_file_organizer/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/__init__.py rename to agbenchmark/challenges/verticals/code/3_file_organizer/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/2_file_organizer/artifacts_out/organize_files.py b/agbenchmark/challenges/verticals/code/3_file_organizer/artifacts_out/organize_files.py similarity index 100% rename from agbenchmark/challenges/verticals/code/2_file_organizer/artifacts_out/organize_files.py rename to agbenchmark/challenges/verticals/code/3_file_organizer/artifacts_out/organize_files.py diff --git a/agbenchmark/challenges/verticals/code/2_file_organizer/custom_python/test.py b/agbenchmark/challenges/verticals/code/3_file_organizer/custom_python/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/2_file_organizer/custom_python/test.py rename to agbenchmark/challenges/verticals/code/3_file_organizer/custom_python/test.py diff --git a/agbenchmark/challenges/verticals/code/2_file_organizer/data.json b/agbenchmark/challenges/verticals/code/3_file_organizer/data.json similarity index 100% rename from agbenchmark/challenges/verticals/code/2_file_organizer/data.json rename to agbenchmark/challenges/verticals/code/3_file_organizer/data.json diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/__init__.py b/agbenchmark/challenges/verticals/code/4_url_shortener/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/__init__.py rename to agbenchmark/challenges/verticals/code/4_url_shortener/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/3_url_shortener/artifacts_out/test.py b/agbenchmark/challenges/verticals/code/4_url_shortener/artifacts_out/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/3_url_shortener/artifacts_out/test.py rename to agbenchmark/challenges/verticals/code/4_url_shortener/artifacts_out/test.py diff --git a/agbenchmark/challenges/verticals/code/3_url_shortener/artifacts_out/url_shortener.py b/agbenchmark/challenges/verticals/code/4_url_shortener/artifacts_out/url_shortener.py similarity index 100% rename from agbenchmark/challenges/verticals/code/3_url_shortener/artifacts_out/url_shortener.py rename to agbenchmark/challenges/verticals/code/4_url_shortener/artifacts_out/url_shortener.py diff --git a/agbenchmark/challenges/verticals/code/3_url_shortener/data.json b/agbenchmark/challenges/verticals/code/4_url_shortener/data.json similarity index 100% rename from agbenchmark/challenges/verticals/code/3_url_shortener/data.json rename to agbenchmark/challenges/verticals/code/4_url_shortener/data.json diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_in/__init__.py b/agbenchmark/challenges/verticals/code/5_tic_tac_toe/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_in/__init__.py rename to agbenchmark/challenges/verticals/code/5_tic_tac_toe/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/tic_tac_toe.py b/agbenchmark/challenges/verticals/code/5_tic_tac_toe/artifacts_out/tic_tac_toe.py similarity index 100% rename from agbenchmark/challenges/verticals/code/4_tic_tac_toe/artifacts_out/tic_tac_toe.py rename to agbenchmark/challenges/verticals/code/5_tic_tac_toe/artifacts_out/tic_tac_toe.py diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/custom_python/test.py b/agbenchmark/challenges/verticals/code/5_tic_tac_toe/custom_python/test.py similarity index 100% rename from agbenchmark/challenges/verticals/code/4_tic_tac_toe/custom_python/test.py rename to agbenchmark/challenges/verticals/code/5_tic_tac_toe/custom_python/test.py diff --git a/agbenchmark/challenges/verticals/code/4_tic_tac_toe/data_draft.json b/agbenchmark/challenges/verticals/code/5_tic_tac_toe/data_draft.json similarity index 100% rename from agbenchmark/challenges/verticals/code/4_tic_tac_toe/data_draft.json rename to agbenchmark/challenges/verticals/code/5_tic_tac_toe/data_draft.json diff --git a/agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_out/__init__.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d2.1_guided/artifacts_out/__init__.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/__init__.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/abstract_class.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/abstract_class.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/abstract_class.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/abstract_class.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/conftest.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/conftest.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/conftest.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/conftest.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/product_requirements.txt b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/product_requirements.txt similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/product_requirements.txt rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/product_requirements.txt diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/test_negative.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/test_negative.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/test_negative.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/test_negative.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/test_positive.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/test_positive.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/test_positive.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/test_positive.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/user_stories.txt b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/user_stories.txt similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_in/user_stories.txt rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_in/user_stories.txt diff --git a/agbenchmark/challenges/verticals/code/d3.1_three_sum/artifacts_out/__init__.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/__init__.py similarity index 100% rename from agbenchmark/challenges/verticals/code/d3.1_three_sum/artifacts_out/__init__.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/__init__.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/abstract_class.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/abstract_class.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/abstract_class.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/abstract_class.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/battleship.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/battleship.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/battleship.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/battleship.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/conftest.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/conftest.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/conftest.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/conftest.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/test_negative.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/test_negative.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/test_negative.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/test_negative.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/test_positive.py b/agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/test_positive.py similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/artifacts_out/test_positive.py rename to agbenchmark/challenges/verticals/code/6_battleship/artifacts_out/test_positive.py diff --git a/agbenchmark/challenges/verticals/code/5_battleship/data_draft.json b/agbenchmark/challenges/verticals/code/6_battleship/data_draft.json similarity index 100% rename from agbenchmark/challenges/verticals/code/5_battleship/data_draft.json rename to agbenchmark/challenges/verticals/code/6_battleship/data_draft.json diff --git a/paper/agent_action_regex.py b/paper/agent_action_regex.py index 6bd55f9d13c..abe4a8fdd2a 100644 --- a/paper/agent_action_regex.py +++ b/paper/agent_action_regex.py @@ -1,5 +1,5 @@ -import re import json +import re def is_action_auto_gpt(log): diff --git a/paper/combined_data.ipynb b/paper/combined_data.ipynb index ca5eddfeb2b..15b3876d4d8 100644 --- a/paper/combined_data.ipynb +++ b/paper/combined_data.ipynb @@ -630,10 +630,10 @@ "evalue": "loads() missing 1 required positional argument: 's'", "output_type": "error", "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[61], line 43\u001b[0m\n\u001b[0;32m 40\u001b[0m total_rows \u001b[39m=\u001b[39m \u001b[39mlen\u001b[39m(group)\n\u001b[0;32m 42\u001b[0m \u001b[39mfor\u001b[39;00m i, (_, row) \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(group\u001b[39m.\u001b[39miterrows()):\n\u001b[1;32m---> 43\u001b[0m response \u001b[39m=\u001b[39m json\u001b[39m.\u001b[39;49mloads()\n\u001b[0;32m 44\u001b[0m response_nested \u001b[39m=\u001b[39m nested_json(row[request_type])\n\u001b[0;32m 46\u001b[0m response_dict[\u001b[39mstr\u001b[39m(total_rows\u001b[39m-\u001b[39mi)] \u001b[39m=\u001b[39m response \u001b[39m# Starting from 1 as you mentioned\u001b[39;00m\n", - "\u001b[1;31mTypeError\u001b[0m: loads() missing 1 required positional argument: 's'" + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mTypeError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[1;32mIn[61], line 43\u001B[0m\n\u001B[0;32m 40\u001B[0m total_rows \u001B[39m=\u001B[39m \u001B[39mlen\u001B[39m(group)\n\u001B[0;32m 42\u001B[0m \u001B[39mfor\u001B[39;00m i, (_, row) \u001B[39min\u001B[39;00m \u001B[39menumerate\u001B[39m(group\u001B[39m.\u001B[39miterrows()):\n\u001B[1;32m---> 43\u001B[0m response \u001B[39m=\u001B[39m json\u001B[39m.\u001B[39;49mloads()\n\u001B[0;32m 44\u001B[0m response_nested \u001B[39m=\u001B[39m nested_json(row[request_type])\n\u001B[0;32m 46\u001B[0m response_dict[\u001B[39mstr\u001B[39m(total_rows\u001B[39m-\u001B[39mi)] \u001B[39m=\u001B[39m response \u001B[39m# Starting from 1 as you mentioned\u001B[39;00m\n", + "\u001B[1;31mTypeError\u001B[0m: loads() missing 1 required positional argument: 's'" ] } ],