Merge branch 'main' into lg/3354

vercel · Oct 24, 2024 · 57a7170 · 57a7170
2 parents 7e3af22 + 3b1b69a
commit 57a7170
Show file tree

Hide file tree

Showing 45 changed files with 1,722 additions and 648 deletions.
diff --git a/.changeset/early-ravens-bathe.md b/.changeset/early-ravens-bathe.md
@@ -0,0 +1,5 @@
+---
+'@ai-sdk/anthropic': patch
+---
+
+feat (provider/anthropic): add computer use tools
diff --git a/.changeset/light-moles-give.md b/.changeset/light-moles-give.md
@@ -0,0 +1,14 @@
+---
+'@ai-sdk/amazon-bedrock': patch
+'@ai-sdk/google-vertex': patch
+'@ai-sdk/anthropic': patch
+'@ai-sdk/provider': patch
+'@ai-sdk/mistral': patch
+'@ai-sdk/cohere': patch
+'@ai-sdk/google': patch
+'@ai-sdk/openai': patch
+'@ai-sdk/groq': patch
+'ai': patch
+---
+
+feat: provider-defined tools
diff --git a/content/docs/02-foundations/02-providers-and-models.mdx b/content/docs/02-foundations/02-providers-and-models.mdx
@@ -62,6 +62,7 @@ Here are the capabilities of popular models:
 | [OpenAI](/providers/ai-sdk-providers/openai)                             | `gpt-4`                      | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [OpenAI](/providers/ai-sdk-providers/openai)                             | `o1-preview`                 | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> |
 | [OpenAI](/providers/ai-sdk-providers/openai)                             | `o1-mini`                    | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> |
+| [Anthropic](/providers/ai-sdk-providers/anthropic)                       | `claude-3-5-sonnet-20241022` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [Anthropic](/providers/ai-sdk-providers/anthropic)                       | `claude-3-5-sonnet-20240620` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [Mistral](/providers/ai-sdk-providers/mistral)                           | `mistral-large-latest`       | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [Mistral](/providers/ai-sdk-providers/mistral)                           | `mistral-small-latest`       | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |

diff --git a/content/docs/03-ai-sdk-core/20-prompt-engineering.mdx b/content/docs/03-ai-sdk-core/20-prompt-engineering.mdx
@@ -53,6 +53,21 @@ const result = await generateObject({
 
 ## Debugging
 
+### Inspecting Warnings
+
+Not all providers support all AI SDK features.
+Providers either throw exceptions or return warnings when they do not support a feature.
+To check if your prompt, tools, and settings are handled correctly by the provider, you can check the call warnings:
+
+```ts
+const result = await generateText({
+  model: openai('gpt-4o'),
+  prompt: 'Hello, world!',
+});
+
+console.log(result.warnings);
+```
+
 ### HTTP Request Bodies
 
 You can inspect the raw HTTP request bodies for models that expose them, e.g. [OpenAI](/providers/ai-sdk-providers/openai).

diff --git a/content/providers/01-ai-sdk-providers/05-anthropic.mdx b/content/providers/01-ai-sdk-providers/05-anthropic.mdx
@@ -82,8 +82,6 @@ The following optional settings are available for Anthropic models:
 
   You can then use provider metadata to set cache control breakpoints ([example](#example-cache-control))
 
-### Example: Generate Text
-
 You can use Anthropic language models to generate text with the `generateText` function:
 
 ```ts
@@ -99,7 +97,7 @@ const { text } = await generateText({
 Anthropic language models can also be used in the `streamText`, `generateObject`, `streamObject`, and `streamUI` functions
 (see [AI SDK Core](/docs/ai-sdk-core) and [AI SDK RSC](/docs/ai-sdk-rsc)).
 
-### Example: Cache Control
+### Cache Control
 
 You can enable the cache control beta by setting the `cacheControl` option to `true` when creating the model instance.
 
@@ -172,16 +170,100 @@ const result = await generateText({
 });
 ```
 
+### Computer Use
+
+Anthropic provides three built-in tools that can be used to interact with external systems:
+
+1. **Bash Tool**: Allows running bash commands.
+2. **Text Editor Tool**: Provides functionality for viewing and editing text files.
+3. **Computer Tool**: Enables control of keyboard and mouse actions on a computer.
+
+They are available via the `tools` property of the provider instance.
+
+#### Bash Tool
+
+The Bash Tool allows running bash commands. Here's how to create and use it:
+
+```ts
+const bashTool = anthropic.tools.bash_20241022({
+  execute: async ({ command, restart }) => {
+    // Implement your bash command execution logic here
+    // Return the result of the command execution
+  },
+});
+```
+
+Parameters:
+
+- `command` (string): The bash command to run. Required unless the tool is being restarted.
+- `restart` (boolean, optional): Specifying true will restart this tool.
+
+#### Text Editor Tool
+
+The Text Editor Tool provides functionality for viewing and editing text files:
+
+```ts
+const textEditorTool = anthropic.tools.textEditor_20241022({
+  execute: async ({
+    command,
+    path,
+    file_text,
+    insert_line,
+    new_str,
+    old_str,
+    view_range,
+  }) => {
+    // Implement your text editing logic here
+    // Return the result of the text editing operation
+  },
+});
+```
+
+Parameters:
+
+- `command` ('view' | 'create' | 'str_replace' | 'insert' | 'undo_edit'): The command to run.
+- `path` (string): Absolute path to file or directory, e.g. `/repo/file.py` or `/repo`.
+- `file_text` (string, optional): Required for `create` command, with the content of the file to be created.
+- `insert_line` (number, optional): Required for `insert` command. The line number after which to insert the new string.
+- `new_str` (string, optional): New string for `str_replace` or `insert` commands.
+- `old_str` (string, optional): Required for `str_replace` command, containing the string to replace.
+- `view_range` (number[], optional): Optional for `view` command to specify line range to show.
+
+#### Computer Tool
+
+The Computer Tool enables control of keyboard and mouse actions on a computer:
+
+```ts
+const computerTool = anthropic.tools.computer_20241022({
+  displayWidthPx: 1920,
+  displayHeightPx: 1080,
+  displayNumber: 0, // Optional, for X11 environments
+  execute: async ({ action, coordinate, text }) => {
+    // Implement your computer control logic here
+    // Return the result of the action
+  },
+});
+```
+
+Parameters:
+
+- `action` ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
+- `coordinate` (number[], optional): Required for `mouse_move` and `left_click_drag` actions. Specifies the (x, y) coordinates.
+- `text` (string, optional): Required for `type` and `key` actions.
+
+These tools can be used in conjunction with the `sonnet-3-5-sonnet-20240620` model to enable more complex interactions and tasks.
+
 ### Model Capabilities
 
 See also [Anthropic Model Comparison](https://docs.anthropic.com/en/docs/about-claude/models#model-comparison).
 
-| Model                        | Image Input         | Object Generation   | Tool Usage          | Tool Streaming      |
+| Model                        | Image Input         | Object Generation   | Tool Usage          | Computer Use        |
 | ---------------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
-| `claude-3-5-sonnet-20240620` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `claude-3-opus-20240229`     | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `claude-3-sonnet-20240229`   | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
-| `claude-3-haiku-20240307`    | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `claude-3-5-sonnet-20241022` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
+| `claude-3-5-sonnet-20240620` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
+| `claude-3-opus-20240229`     | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
+| `claude-3-sonnet-20240229`   | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
+| `claude-3-haiku-20240307`    | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Cross size={18} /> |
 
 <Note>
   The table above lists popular models. You can also pass any available provider

diff --git a/content/providers/01-ai-sdk-providers/index.mdx b/content/providers/01-ai-sdk-providers/index.mdx
@@ -25,6 +25,7 @@ Not all providers support all AI SDK features. Here's a quick comparison of the
 | [OpenAI](/providers/ai-sdk-providers/openai)                             | `gpt-4`                      | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [OpenAI](/providers/ai-sdk-providers/openai)                             | `o1-preview`                 | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> |
 | [OpenAI](/providers/ai-sdk-providers/openai)                             | `o1-mini`                    | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> | <Cross size={18} /> |
+| [Anthropic](/providers/ai-sdk-providers/anthropic)                       | `claude-3-5-sonnet-20241022` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [Anthropic](/providers/ai-sdk-providers/anthropic)                       | `claude-3-5-sonnet-20240620` | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [Mistral](/providers/ai-sdk-providers/mistral)                           | `mistral-large-latest`       | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |
 | [Mistral](/providers/ai-sdk-providers/mistral)                           | `mistral-small-latest`       | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> | <Check size={18} /> |

diff --git a/examples/ai-core/src/generate-text/anthropic-computer-use.ts b/examples/ai-core/src/generate-text/anthropic-computer-use.ts
@@ -0,0 +1,50 @@
+import { anthropic } from '@ai-sdk/anthropic';
+import { generateText } from 'ai';
+import 'dotenv/config';
+
+async function main() {
+  const result = await generateText({
+    model: anthropic('claude-3-5-sonnet-20241022'),
+    tools: {
+      computer: anthropic.tools.computer_20241022({
+        displayWidthPx: 100,
+        displayHeightPx: 100,
+        async execute({ action, coordinate, text }) {
+          return '';
+        },
+      }),
+      bash: anthropic.tools.bash_20241022({
+        async execute({ command }) {
+          return `
+          ❯ ls
+          README.md     build         data          node_modules  package.json  src           tsconfig.json
+`;
+        },
+      }),
+      str_replace_editor: anthropic.tools.textEditor_20241022({
+        async execute({ command, path, old_str, new_str }) {
+          return '';
+        },
+      }),
+    },
+    prompt: 'List the files in my home directory.',
+    maxSteps: 2,
+  });
+
+  for (const toolResult of result.toolResults) {
+    switch (toolResult.toolName) {
+      case 'bash': {
+        toolResult.args.command; // string
+        toolResult.result; // string
+        break;
+      }
+    }
+  }
+
+  console.log(result.text);
+  console.log(result.finishReason);
+  console.log(JSON.stringify(result.toolCalls, null, 2));
+  console.log(JSON.stringify(result.steps, null, 2));
+}
+
+main().catch(console.error);
diff --git a/packages/ai/core/prompt/prepare-tools-and-tool-choice.test.ts b/packages/ai/core/prompt/prepare-tools-and-tool-choice.test.ts
@@ -13,6 +13,18 @@ const mockTools: Record<string, CoreTool> = {
   }),
 };
 
+const mockProviderDefinedTool: CoreTool = {
+  type: 'provider-defined',
+  id: 'provider.tool-id',
+  args: { key: 'value' },
+  parameters: z.object({}),
+};
+
+const mockToolsWithProviderDefined: Record<string, CoreTool> = {
+  ...mockTools,
+  providerTool: mockProviderDefinedTool,
+};
+
 it('should return undefined for both tools and toolChoice when tools is not provided', () => {
   const result = prepareToolsAndToolChoice({
     tools: undefined,
@@ -83,3 +95,18 @@ it('should correctly map tool properties', () => {
     },
   });
 });
+
+it('should handle provider-defined tool type', () => {
+  const result = prepareToolsAndToolChoice({
+    tools: mockToolsWithProviderDefined,
+    toolChoice: undefined,
+    activeTools: undefined,
+  });
+  expect(result.tools).toHaveLength(3);
+  expect(result.tools?.[2]).toEqual({
+    type: 'provider-defined',
+    name: 'providerTool',
+    id: 'provider.tool-id',
+    args: { key: 'value' },
+  });
+});
diff --git a/packages/ai/core/prompt/prepare-tools-and-tool-choice.ts b/packages/ai/core/prompt/prepare-tools-and-tool-choice.ts
@@ -1,5 +1,6 @@
 import {
   LanguageModelV1FunctionTool,
+  LanguageModelV1ProviderDefinedTool,
   LanguageModelV1ToolChoice,
 } from '@ai-sdk/provider';
 import { asSchema } from '@ai-sdk/ui-utils';
@@ -18,7 +19,9 @@ export function prepareToolsAndToolChoice<
   toolChoice: CoreToolChoice<TOOLS> | undefined;
   activeTools: Array<keyof TOOLS> | undefined;
 }): {
-  tools: LanguageModelV1FunctionTool[] | undefined;
+  tools:
+    | Array<LanguageModelV1FunctionTool | LanguageModelV1ProviderDefinedTool>
+    | undefined;
   toolChoice: LanguageModelV1ToolChoice | undefined;
 } {
   if (!isNonEmptyObject(tools)) {
@@ -37,12 +40,30 @@ export function prepareToolsAndToolChoice<
       : Object.entries(tools);
 
   return {
-    tools: filteredTools.map(([name, tool]) => ({
-      type: 'function' as const,
-      name,
-      description: tool.description,
-      parameters: asSchema(tool.parameters).jsonSchema,
-    })),
+    tools: filteredTools.map(([name, tool]) => {
+      const toolType = tool.type;
+      switch (toolType) {
+        case undefined:
+        case 'function':
+          return {
+            type: 'function' as const,
+            name,
+            description: tool.description,
+            parameters: asSchema(tool.parameters).jsonSchema,
+          };
+        case 'provider-defined':
+          return {
+            type: 'provider-defined' as const,
+            name,
+            id: tool.id,
+            args: tool.args,
+          };
+        default: {
+          const exhaustiveCheck: never = toolType;
+          throw new Error(`Unsupported tool type: ${exhaustiveCheck}`);
+        }
+      }
+    }),
     toolChoice:
       toolChoice == null
         ? { type: 'auto' }

diff --git a/packages/ai/core/tool/tool.ts b/packages/ai/core/tool/tool.ts
@@ -16,12 +16,7 @@ This enables the language model to generate the input.
 
 The tool can also contain an optional execute function for the actual execution function of the tool.
  */
-export interface CoreTool<PARAMETERS extends Parameters = any, RESULT = any> {
-  /**
-An optional description of what the tool does. Will be used by the language model to decide whether to use the tool.
-   */
-  description?: string;
-
+export type CoreTool<PARAMETERS extends Parameters = any, RESULT = any> = {
   /**
 The schema of the input that the tool expects. The language model will use this to generate the input.
 It is also used to validate the output of the language model.
@@ -40,7 +35,35 @@ If not provided, the tool will not be executed automatically.
     args: inferParameters<PARAMETERS>,
     options: { abortSignal?: AbortSignal },
   ) => PromiseLike<RESULT>;
-}
+} & (
+  | {
+      /**
+Function tool.
+       */
+      type?: undefined | 'function';
+
+      /**
+An optional description of what the tool does. Will be used by the language model to decide whether to use the tool.
+   */
+      description?: string;
+    }
+  | {
+      /**
+Provider-defined tool.
+       */
+      type: 'provider-defined';
+
+      /**
+The ID of the tool. Should follow the format `<provider-name>.<tool-name>`.
+       */
+      id: `${string}.${string}`;
+
+      /**
+The arguments for configuring the tool. Must match the expected arguments defined by the provider for this tool.
+       */
+      args: Record<string, unknown>;
+    }
+);
 
 /**
 Helper function for inferring the execute args of a tool.