From cf1326cd77bfa2271155853c3e4bf421a1056397 Mon Sep 17 00:00:00 2001 From: Gregor Adams <1148334+pixelass@users.noreply.github.com> Date: Thu, 6 Jun 2024 18:31:35 +0200 Subject: [PATCH] feat: add tools to assistant (#329) ## Motivation - adds ComfyUI tool to allow assistants to generate images. - images are stored and multiple images can be generated in one call (e.g. `prompt: "Write a story with two images"` - adds RAG as tool and removes previous RAG implementation ## Issues closed closes #328 --- package-lock.json | 12 +- package.json | 3 +- resources/actions/assistant/readme.md | 26 +- resources/actions/comfy-ui/readme.md | 23 +- resources/actions/dashboard/readme.md | 21 +- resources/actions/explorer/readme.md | 21 +- resources/actions/live-painting/readme.md | 21 +- resources/actions/marketplace/readme.md | 21 +- resources/actions/settings/readme.md | 21 +- resources/actions/story/readme.md | 21 +- resources/actions/text-to-image/readme.md | 25 +- .../apps/assistant/components/chat-list.tsx | 233 ++--- .../apps/assistant/components/messages.tsx | 43 +- .../hooks/{assistant.tsx => assistant.ts} | 39 +- src/client/apps/assistant/index.tsx | 24 +- src/client/apps/shared/markdown.tsx | 24 +- src/client/public/locales/de/labels.json | 4 + src/client/public/locales/en/labels.json | 4 + src/client/public/locales/es/labels.json | 4 + src/client/public/locales/fr/labels.json | 4 + src/client/public/locales/he/labels.json | 4 + src/client/public/locales/it/labels.json | 4 + src/client/public/locales/ja/labels.json | 4 + src/client/public/locales/nl/labels.json | 4 + src/client/public/locales/pl/labels.json | 4 + src/client/public/locales/pt/labels.json | 4 + src/client/public/locales/ru/labels.json | 4 + src/client/public/locales/zh/labels.json | 4 + src/electron/helpers/ipc/global.ts | 110 +-- src/electron/helpers/ipc/sdk/assistant.ts | 855 ++++++++++++++---- src/electron/helpers/png.ts | 105 +++ src/electron/helpers/services/comfyui.ts | 6 +- 32 files changed, 1046 insertions(+), 656 deletions(-) rename src/client/apps/assistant/hooks/{assistant.tsx => assistant.ts} (85%) create mode 100644 src/electron/helpers/png.ts diff --git a/package-lock.json b/package-lock.json index 5bc92a938..9097decf5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "captain", - "version": "1.0.0-alpha.98", + "version": "1.0.0-alpha.99", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "captain", - "version": "1.0.0-alpha.98", + "version": "1.0.0-alpha.99", "hasInstallScript": true, "license": "AGPL 3.0", "dependencies": { @@ -166,7 +166,8 @@ "uuid": "^9.0.1", "webpack": "^5.91.0", "ws": "^8.17.0", - "yaml": "^2.4.2" + "yaml": "^2.4.2", + "zod": "^3.23.8" }, "optionalDependencies": { "@swc/core-darwin-arm64": "^1.5.3", @@ -27803,9 +27804,10 @@ } }, "node_modules/zod": { - "version": "3.22.4", + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", "dev": true, - "license": "MIT", "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/package.json b/package.json index 8f3f5d9dc..c47964b55 100644 --- a/package.json +++ b/package.json @@ -204,7 +204,8 @@ "uuid": "^9.0.1", "webpack": "^5.91.0", "ws": "^8.17.0", - "yaml": "^2.4.2" + "yaml": "^2.4.2", + "zod": "^3.23.8" }, "optionalDependencies": { "@swc/core-darwin-arm64": "^1.5.3", diff --git a/resources/actions/assistant/readme.md b/resources/actions/assistant/readme.md index 3a9cbdc36..e5be638af 100644 --- a/resources/actions/assistant/readme.md +++ b/resources/actions/assistant/readme.md @@ -18,24 +18,10 @@ icon: Assistant iconColor: "#4D822F" --- -I need AI assistance. -I want to chat with AI. -Open the AI chat. -Launch the AI assistant. -Start a conversation with the AI. -Access AI chat functionality. -Engage in a chat with the AI assistant. -Initiate AI-based chat. -Connect with the AI assistant. -Open the AI-based conversation tool. -Begin a session with the AI assistant. -Request AI chat support. -Activate the AI assistant for help. -Start chatting with the AI now. -Open the AI for interactive chat. -Access the intelligent assistant for conversation. -Begin a dialogue with the AI. -Open the virtual assistant chat. -Start the AI-powered assistant. -Get AI assistance for my queries. +App, application, chat with your personal assistant with RAG and tools like generating images. +The AI Assistant app with Retrieval-Augmented Generation and integrated tools is a solution to enhance productivity and streamline tasks. supported Models include: +OpenAI: gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo. +Anthropic: claude-3-opus, claude-3-sonnet, claude-3-haiku. +Mistral: mistral-large, mistral-small. +Google: gemini-1.5-pro. diff --git a/resources/actions/comfy-ui/readme.md b/resources/actions/comfy-ui/readme.md index 26711fce9..4f3128a0c 100644 --- a/resources/actions/comfy-ui/readme.md +++ b/resources/actions/comfy-ui/readme.md @@ -18,24 +18,5 @@ icon: ResistorNodeIcon iconColor: "#353535" --- -Open the app comfyUI. -I want to generate images with stable diffusion. -Accelerate creative workflows with ComfyUI’s modular stable diffusion GUI. -Unleash image synthesis power using ComfyUI for txt2img and img2img tasks. -Navigate advanced node-based interfaces with ease in ComfyUI. -Optimize image generation with ComfyUI's intuitive stable diffusion tools. -Harness the full potential of stable diffusion models with ComfyUI. -Transform digital art processes with ComfyUI’s robust features. -Empower your image projects with ComfyUI’s dynamic GUI. -Enhance productivity using ComfyUI's streamlined image creation system. -Customize stable diffusion operations efficiently with ComfyUI. -Achieve superior image results through ComfyUI’s innovative technology. -Deploy ComfyUI for quick, effective stable diffusion model setups. -Leverage ComfyUI’s flexible architecture for diverse imaging needs. -Maximize creative output with ComfyUI’s powerful imaging solutions. -Simplify complex workflows in image processing with ComfyUI. -Engage ComfyUI for high-performance image generation. -Utilize ComfyUI for detailed and artistic stable diffusion applications. -versatile image synthesis options within ComfyUI. -stable diffusion pipeline using ComfyUI’s smart interface. -advanced imaging techniques effortlessly with ComfyUI. +App, application, generate images with stable diffusion. +ComfyUI is a powerful and modular app designed for managing Stable Diffusion workflows through a node-based graphical user interface (GUI). This app allows users to create and execute complex image generation pipelines without needing to code, using a flowchart-style interface that is intuitive and highly customizable. ComfyUI supports various versions of Stable Diffusion, including SD1.x, SD2.x, and SDXL, and integrates features such as inpainting, control nets, and multiple upscale models. It offers both free online access and enhanced features through the RunComfy service, making it accessible for both casual users and professionals. diff --git a/resources/actions/dashboard/readme.md b/resources/actions/dashboard/readme.md index 9eeb95714..d4def6591 100644 --- a/resources/actions/dashboard/readme.md +++ b/resources/actions/dashboard/readme.md @@ -22,23 +22,4 @@ icon: Dashboard iconColor: "#20827c" --- -Search for applications with AI precision. -Display documents relevant to my current project. -Show images from my latest vacation. -Find tools and apps that boost my productivity. -Activate dark mode for evening use. -Switch to a light theme during the day. -Recommend new tools based on my usage patterns. -Organize my files and documents smartly. -Adjust settings for optimal widget display. -Customize the dashboard to my visual preferences. -Update my preferences for more personalized suggestions. -Guide me through the latest marketplace offerings. -Enhance privacy settings for secure data processing. -Integrate seamlessly with installed applications. -Sort media files by relevance to my interests. -Use natural language queries for intuitive search. -Discover hidden features in commonly used apps. -Tailor the dashboard display to my work habits. -Explore content with AI-generated summaries. -Prioritize dashboard content based on my behavior. +App, application, manage all your creative tools in one place with the Dashboard. This app provides an organized and intuitive interface, showing all installed applications and acting as a central launcher. Users can easily access ComfyUI, Assistant, Generate Images, Live Painting, Explorer, and third-party apps from a single, streamlined dashboard. The Dashboard enhances productivity by allowing quick launching and switching between tools. With its user-friendly design, this app ensures that all your creative and productivity tools are just a click away, simplifying workflow management and boosting efficiency. diff --git a/resources/actions/explorer/readme.md b/resources/actions/explorer/readme.md index 7b42af10b..0310b8d77 100644 --- a/resources/actions/explorer/readme.md +++ b/resources/actions/explorer/readme.md @@ -18,24 +18,5 @@ icon: Folder iconColor: "#6C5EF5" --- -Launch the explorer to browse my documents. -Show me how to access the finder for photos. -Open the file explorer to manage my folders. -Guide me to open the finder for organizing files. -Command to launch the explorer for viewing images. -How do I start the explorer with a keyboard shortcut? -Open my documents directly in the file explorer. -Find steps to access external storage in the finder. -Shortcut to open the downloads folder in the explorer. -Launch the explorer to search for specific file names. -Discover the quickest way to open recent items in the finder. -Open the explorer at a particular directory for quick access. -Use search commands to open specific folders in the explorer. -Navigate to my cloud files directly through the finder. -Tips for opening and managing files with the explorer. -Access shared documents through the explorer. -Open multiple finder windows for efficient file management. -Reveal hidden items on the first launch of the explorer. -Employ search functionalities in the finder for specific queries. -Organize and search for specific content using the explorer. +App, application, explore and manage your digital assets efficiently with Explorer. This app features an integrated vector search that allows users to quickly find images, apps, stories, and other documents. Leveraging advanced AI technology, the vector search function provides highly accurate results based on content similarity, making it easy to locate specific items in a large collection. Explorer's intuitive interface ensures seamless navigation and organization of files, enhancing productivity and efficiency. Whether you're searching for creative assets, applications, or documents, this app offers a powerful and user-friendly solution to keep all your resources easily accessible and well-organized. diff --git a/resources/actions/live-painting/readme.md b/resources/actions/live-painting/readme.md index 73dcdd468..e9fe3e5fb 100644 --- a/resources/actions/live-painting/readme.md +++ b/resources/actions/live-painting/readme.md @@ -19,23 +19,4 @@ icon: Brush iconColor: "#D3410C" --- -I want to draw an image with AI. -Make live art from my sketch. -Create an image from my doodle. -Let's live paint a picture together. -Turn my drawing into digital art. -Help me paint a scene with AI. -I wish to create art in real-time. -Transform my sketches into paintings. -Assist me in making digital illustrations. -I want to experiment with AI art creation. -Guide me in drawing with AI technology. -Enable AI to enhance my painting process. -Create vibrant images from my concepts. -I’m looking to design art with AI. -Draw a masterpiece with AI input. -How to make a painting with AI collaboration? -I desire to produce art in fantasy style with AI. -Let me paint digitally with live AI generations. -Craft an image based on my basic doodles. -I aim to live paint with AI enhancements. +App, application, transform your scribbles into stunning AI-generated art with stable diffusion Live-Paint. This innovative app allows users to live-paint by sketching directly on the screen, which is then enhanced and transformed into detailed artwork using advanced AI. Leveraging ComfyUI's robust capabilities, this application supports predefined styles and LoRAs, enabling users to customize their creations effortlessly. The user-friendly interface ensures an intuitive experience, making it accessible for both beginners and professional artists. With real-time transformation, users can see their ideas come to life instantly, blending creativity and technology seamlessly for an engaging artistic journey. diff --git a/resources/actions/marketplace/readme.md b/resources/actions/marketplace/readme.md index 33acc3377..50f110e8c 100644 --- a/resources/actions/marketplace/readme.md +++ b/resources/actions/marketplace/readme.md @@ -19,23 +19,4 @@ icon: ShoppingBag iconColor: "#20827c" --- -Install the latest productivity apps directly from our app store. -Extend your device’s capabilities with specialized apps available in our store. -Open the app store to explore new and exciting applications. -Boost your efficiency: download our top-rated apps today. -Explore new features by installing apps from our digital marketplace. -Enhance your experience with our curated selection of utility apps. -Navigate our app store for the latest in software innovations. -Upgrade your toolkit by installing versatile apps from our store. -Visit our app store to find apps that simplify your daily tasks. -Discover how to extend your app functionality with additions from our store. -Access new tools and features by downloading apps from our marketplace. -Unlock new possibilities—install our featured apps today. -Step into our app store to enhance your software suite. -Browse and install the latest apps to keep your workflow smooth. -Expand your app capabilities by exploring our extensive app catalog. -Get the newest apps and updates by visiting our app store now. -Find and install essential apps to maximize your productivity. -Enhance your device’s performance with powerful apps from our marketplace. -Open our app store to start transforming your digital environment. -Experience the latest in app technology—download from our store today. +App, application, explore a comprehensive marketplace with Stable Diffusion models, LoRAs, ComfyUI extensions, and third-party apps. The Marketplace app is designed to be a one-stop solution for all your creative needs. Users can browse and acquire various Stable Diffusion models to enhance their image generation capabilities. The marketplace also offers a wide range of LoRAs (Low Rank Adaptation) for specialized tasks and stylistic modifications. Additionally, ComfyUI extensions and third-party applications are available, allowing users to expand their workflow and integrate new functionalities seamlessly. With a user-friendly interface and diverse offerings, the ComfyUI Marketplace ensures that artists and developers have access to the best tools and resources to fuel their creativity and productivity. diff --git a/resources/actions/settings/readme.md b/resources/actions/settings/readme.md index f0b9ab721..df531d221 100644 --- a/resources/actions/settings/readme.md +++ b/resources/actions/settings/readme.md @@ -19,23 +19,4 @@ icon: Settings iconColor: "#717676" --- -Adjust the language settings. -I want dark mode enabled. -Change API keys for OpenAI integration. -How can I switch the language? -Enable light theme for the app. -Update preferences for visual theme. -Set application to system default color mode. -Configure OpenAI API settings. -Personalize my app experience with preferences. -Modify configuration for better usability. -Show me how to enter OpenAI API keys. -Switch to dark mode for nighttime use. -Select my preferred language. -Access advanced settings options. -Save my custom settings configuration. -Guide to customizing application interface. -Steps for changing the app's visual theme. -Activate dark theme in Captain. -Update user preferences and keys. -Customize interface language. +App, application, customize your Captain experience with the versatile Settings app. This app allows users to change the color theme, toggle between light and dark modes, and select from 11 included locales: German (de), English (en), Spanish (es), French (fr), Italian (it), Japanese (ja), Dutch (nl), Polish (pl), Portuguese (pt), Russian (ru), and Chinese (zh). Additionally, it provides functionality to manage API keys for OpenAI, Anthropic, Google, and Mistral. The Settings app is also the hub for keeping your system up-to-date, allowing you to check for and install the latest updates for Captain. This comprehensive settings management tool ensures a personalized and optimized experience for all users. diff --git a/resources/actions/story/readme.md b/resources/actions/story/readme.md index bc66984a3..adccecd32 100644 --- a/resources/actions/story/readme.md +++ b/resources/actions/story/readme.md @@ -23,23 +23,4 @@ icon: MenuBook iconColor: "#9F6B00" --- -Generate a story from this image. -Tell me a story based on my photo. -Help me write a story for my kids using this picture. -Create narratives from my gallery. -How to turn vacation photos into a storybook. -Inspire me with a story from my artwork. -Convert my drawings into digital narratives. -Transform my photo into a fantasy story. -Use my picture to write a mystery story. -Make a story for my classroom presentation. -Turn this series of images into a connected narrative. -Craft a love story from our anniversary photo. -Develop a sci-fi tale from my space-themed image. -How to create a superhero story with my drawing. -Share a fairy tale based on my child’s artwork. -I want a historical narrative for my vintage photo. -How can I get a motivational story from my personal achievements? -Create a bedtime story from my kids' drawings. -Can you generate an adventure story from my travel photos? -Turn my pet's photos into a funny tale. +App, application, create captivating stories with Captain Story. This app utilizes a powerful Language Learning Model (LLM) and previously created images to craft engaging narratives, which can be saved for future use. With a user-friendly interface, Captain Story makes story creation accessible and enjoyable. It offers various presets to help users get started quickly, allowing for easy customization and personalization. Whether you're a writer looking for inspiration or simply want to create a visual story, this app provides the tools you need to bring your ideas to life in a seamless and intuitive manner. diff --git a/resources/actions/text-to-image/readme.md b/resources/actions/text-to-image/readme.md index c151f87ef..9671bc123 100644 --- a/resources/actions/text-to-image/readme.md +++ b/resources/actions/text-to-image/readme.md @@ -18,27 +18,4 @@ icon: Images iconColor: "#D3410C" --- -I want to draw an image. -Generate an image from my prompt. -Perform Text to image conversion. -Open the txt2img app. -Create a landscape from my description. -Turn poetry into visual art. -Generate a painting based on my dream. -Visualize historical events from text. -Convert my short story into an illustration. -Create fantasy world maps from descriptions. -Illustrate characters from my novel. -Generate artwork for my music album cover. -Turn my ideas into comic book scenes. -Visualize scientific concepts through art. -Create abstract art from emotional descriptions. -Design fashion sketches from style descriptions. -Generate portraits from character traits. -Illustrate recipes in a whimsical style. -Visualize architectural designs from descriptions. -Create visual metaphors from poetic phrases. -Generate scenes from movie script excerpts. -Illustrate concepts for educational materials. -Create thematic artwork for my blog posts. -Visualize futuristic technology from sci-fi descriptions. +App, application, generate stunning images with stable diffusion, featuring a user-friendly interface. The Generate Images app leverages the power of ComfyUI to create visually appealing images with ease. Users can utilize predefined styles and LoRAs (Low Rank Adaptation) to enhance and customize their image generation process. This application simplifies complex workflows into an intuitive interface, allowing users to experiment with different styles and elements without needing advanced technical knowledge. Whether you are a beginner or an experienced artist, this app offers a seamless experience in producing high-quality images tailored to your specific needs. diff --git a/src/client/apps/assistant/components/chat-list.tsx b/src/client/apps/assistant/components/chat-list.tsx index 22b4eb4e5..f42ac659b 100644 --- a/src/client/apps/assistant/components/chat-list.tsx +++ b/src/client/apps/assistant/components/chat-list.tsx @@ -3,6 +3,7 @@ import DeleteForeverIcon from "@mui/icons-material/DeleteForever"; import MoreVertIcon from "@mui/icons-material/MoreVert"; import PushPinIcon from "@mui/icons-material/PushPin"; import StarIcon from "@mui/icons-material/Star"; +import Box from "@mui/joy/Box"; import Dropdown from "@mui/joy/Dropdown"; import IconButton from "@mui/joy/IconButton"; import List from "@mui/joy/List"; @@ -36,137 +37,139 @@ export function ChatList({ const { t } = useTranslation(["labels", "common"]); const { send } = useSDK(APP_ID, {}); return ( - - {chats - .sort((a, b) => { - if (a.pinned && b.pinned) { - return 0; - } + + {chats + .sort((a, b) => { + if (a.pinned && b.pinned) { + return 0; + } - return a.pinned ? -1 : 1; - }) - .map(chat => ( - - ( + + - - - - { - send({ - action: "assistant:update", - payload: { - id: chat.id, - update: { pinned: !chat.pinned }, - }, - }); }} > - ({ - "--Icon-color": chat.pinned - ? theme.vars.palette.primary["500"] - : "currentColor", - })} + + + + { + send({ + action: "assistant:update", + payload: { + id: chat.id, + update: { pinned: !chat.pinned }, + }, + }); + }} > - - {" "} - {t("labels:pin")} - - { - send({ - action: "assistant:update", - payload: { - id: chat.id, - update: { - favorite: !chat.favorite, + ({ + "--Icon-color": chat.pinned + ? theme.vars.palette.primary["500"] + : "currentColor", + })} + > + + {" "} + {t("labels:pin")} + + { + send({ + action: "assistant:update", + payload: { + id: chat.id, + update: { + favorite: !chat.favorite, + }, }, - }, - }); - }} - > - ({ - "--Icon-color": chat.favorite - ? theme.vars.palette.primary["500"] - : "currentColor", - })} + }); + }} > - - {" "} - {t("labels:favorite")} - - { - if (onDelete) { - onDelete(chat.id); - } - }} - > - ({ - "--Icon-color": theme.vars.palette.red["500"], - })} + ({ + "--Icon-color": chat.favorite + ? theme.vars.palette.primary["500"] + : "currentColor", + })} + > + + {" "} + {t("labels:favorite")} + + { + if (onDelete) { + onDelete(chat.id); + } + }} > - - {" "} - {t("labels:delete")} - - - - } - > - { - if (onChatSelect) { - onChatSelect(chat.id); - } - }} + ({ + "--Icon-color": theme.vars.palette.red["500"], + })} + > + + {" "} + {t("labels:delete")} + + + + } > - {chat.label} - - - ))} - + { + if (onChatSelect) { + onChatSelect(chat.id); + } + }} + > + {chat.label} + + + ))} + + ); } diff --git a/src/client/apps/assistant/components/messages.tsx b/src/client/apps/assistant/components/messages.tsx index 29c857694..d16304997 100644 --- a/src/client/apps/assistant/components/messages.tsx +++ b/src/client/apps/assistant/components/messages.tsx @@ -2,12 +2,15 @@ import AssistantIcon from "@mui/icons-material/Assistant"; import PersonIcon from "@mui/icons-material/Person"; import Avatar from "@mui/joy/Avatar"; import Box from "@mui/joy/Box"; +import Chip from "@mui/joy/Chip"; +import CircularProgress from "@mui/joy/CircularProgress"; import ListItem from "@mui/joy/ListItem"; import ListItemContent from "@mui/joy/ListItemContent"; import ListItemDecorator from "@mui/joy/ListItemDecorator"; import Sheet from "@mui/joy/Sheet"; import type { ColorPaletteProp } from "@mui/joy/styles"; import Tooltip from "@mui/joy/Tooltip"; +import Typography from "@mui/joy/Typography"; import { useTranslation } from "next-i18next"; import type { CSSProperties, LegacyRef } from "react"; import { useEffect, useRef } from "react"; @@ -22,10 +25,36 @@ import type { MessageModel } from "@/client/apps/assistant/types"; import { Markdown } from "@/client/apps/shared/markdown"; import { useResizeObserver } from "@/client/ions/hooks/resize-observer"; +export function ActiveTool({ tool }: { tool: "textToImage" | "vectorSearch" }) { + const { t } = useTranslation(["labels"]); + return ( + + } + sx={{ + "--Chip-decoratorChildHeight": "16px", + }} + > + {t(`labels:tools.${tool}`)} + + ); +} + export const MessageRenderer = forwardRef< HTMLLIElement, { - message: MessageModel; + message: MessageModel & { activeTool?: null | "textToImage" | "vectorSearch" }; color: ColorPaletteProp; style?: CSSProperties; onResize?(): void; @@ -80,7 +109,7 @@ export const MessageRenderer = forwardRef< sx={{ flexDirection: message.role === "user" ? "row-reverse" : "row", alignItems: "flex-start", - alignSelf: message.role === "user" ? "flex-end" : "flex-start", + alignSelf: message.role === "user" ? "flex-end" : "stretch", gap: 2, }} > @@ -124,7 +153,11 @@ export const MessageRenderer = forwardRef< borderRadius: "sm", }} > - + {message.activeTool ? ( + + ) : ( + + )} @@ -141,11 +174,12 @@ export const Messages = forwardRef< model: string; modelPrefix: string; message: string; + activeTool: null | "textToImage" | "vectorSearch"; color: ColorPaletteProp; onScroll: ListProps["onScroll"]; scrollToIndex: ListProps["scrollToIndex"]; } ->(({ messages, model, message, color, onScroll, scrollToIndex }, reference) => { +>(({ messages, model, message, color, onScroll, scrollToIndex, activeTool }, reference) => { const cache = useMemo( () => new CellMeasurerCache({ @@ -190,6 +224,7 @@ export const Messages = forwardRef< role: "assistant", content: message, model, + activeTool, }; return ( ([]); - const [message, setMesssage] = useState(""); + const [message, setMessage] = useState(""); const [chatName, setChatName] = useState(""); const [isGenerating, setIsGenerating] = useState(false); const [input, setInput] = useState(""); + const [activeTool, setActiveTool] = useState(null); const [chatId, setChatId] = useState(""); const [model, setModel] = useState(models[0].id); const [error, setError] = useState(""); const [chats, setChats] = useState<(Except & { id: string })[]>([]); const [isWarningOpen, setIsWarningOpen] = useState(false); - const [dataTypes, setDataTypes] = useState(availableDataTypes); const [modelPrefix] = model.split("-"); @@ -28,6 +27,7 @@ export function useAssistant() { const { send } = useSDK(APP_ID, { onMessage(message) { + setError(""); switch (message.action) { case "assistant:nameSuggestion": { const { chatName: chatName_ } = message.payload as { @@ -53,7 +53,7 @@ export function useAssistant() { label?: string; }; if (done) { - setMesssage(""); + setMessage(""); setMessages(previousState => [ ...previousState, { @@ -69,9 +69,11 @@ export function useAssistant() { } } else { setIsGenerating(true); - setMesssage(output); + setMessage(output); } + setActiveTool(null); + break; } @@ -80,15 +82,6 @@ export function useAssistant() { chat: Except & { messages: MessageModel[] }; }; setMessages(chat.messages); - setDataTypes( - availableDataTypes.map(dataType => { - const chatDataTypes = chat.dataTypes ?? []; - const overwrite = chatDataTypes.find( - dataType_ => dataType_.id === dataType.id - ); - return overwrite ?? dataType; - }) - ); setModel(chat.model); setChatName(chat.label); @@ -100,6 +93,20 @@ export function useAssistant() { error: string; }; setError(error_); + setActiveTool(null); + + break; + } + + case "assistant:tool": { + const { tool, status } = message.payload as { + tool: "textToImage" | "vectorSearch"; + status: "loading" | "done"; + }; + if (status === "loading") { + setActiveTool(tool); + setMessage(`${tool}: ${status}`); + } break; } @@ -150,12 +157,12 @@ export function useAssistant() { setError, setChatName, setChatId, + activeTool, chatId, chats, model, setModel, - dataTypes, - setDataTypes, + modelPrefix, chatName, isGenerating, diff --git a/src/client/apps/assistant/index.tsx b/src/client/apps/assistant/index.tsx index f7bbac6cd..84160103f 100644 --- a/src/client/apps/assistant/index.tsx +++ b/src/client/apps/assistant/index.tsx @@ -18,7 +18,6 @@ import { APP_ID } from "./constants"; import { ChatList } from "@/client/apps/assistant/components/chat-list"; import { ChatName } from "@/client/apps/assistant/components/chat-name"; -import { DataTypeSelector } from "@/client/apps/assistant/components/data-type-selector"; import { MessageStack } from "@/client/apps/assistant/components/message-stack"; import { Messages } from "@/client/apps/assistant/components/messages"; import { ModelSelector } from "@/client/apps/assistant/components/model-selector"; @@ -34,8 +33,7 @@ export function Assistant({ color }: { color: ColorPaletteProp }) { error, model, setModel, - dataTypes, - setDataTypes, + activeTool, provider, isWarningOpen, setError, @@ -74,7 +72,6 @@ export function Assistant({ color }: { color: ColorPaletteProp }) { id: chatId, model, maxHistory: 10, - dataTypes, }, }); } @@ -100,7 +97,7 @@ export function Assistant({ color }: { color: ColorPaletteProp }) { flexShrink: 0, }} > - + { @@ -108,6 +105,7 @@ export function Assistant({ color }: { color: ColorPaletteProp }) { setChatId(v4()); }} /> + - { - setDataTypes(previousState => - previousState.map(dataType_ => - dataType_.id === dataTypeId - ? { - ...dataType_, - active: isChecked, - } - : dataType_ - ) - ); - }} - /> } messages={messages} message={message} + activeTool={activeTool} model={model} modelPrefix={modelPrefix} color={color} diff --git a/src/client/apps/shared/markdown.tsx b/src/client/apps/shared/markdown.tsx index ef3d5eeec..cc4c592f0 100644 --- a/src/client/apps/shared/markdown.tsx +++ b/src/client/apps/shared/markdown.tsx @@ -225,19 +225,28 @@ export const components: Partial = { sx={{ display: "flex", flexDirection: "column", - height: 700, + p: 0, - ml: 0, - mr: 1, + m: 0, my: 0.5, }} > - + + + {alt} + + + = { src={src} alt={alt} sx={{ - position: "absolute", - inset: 0, + height: { xs: 300, md: 500, lg: 700 }, width: "100%", - height: "100%", objectFit: "contain", objectPosition: "center", }} /> - - {alt} - ); }, diff --git a/src/client/public/locales/de/labels.json b/src/client/public/locales/de/labels.json index 312a3370f..d20aec058 100644 --- a/src/client/public/locales/de/labels.json +++ b/src/client/public/locales/de/labels.json @@ -124,6 +124,10 @@ "text": "Text", "textToImage": "Text zu Bild", "toEnd": "Zum Ende", + "tools": { + "textToImage": "Bild wird generiert", + "vectorSearch": "Dateien werden durchsucht" + }, "undefined": "Unbekannt", "unpacking": "Entpacken", "update": "Aktualisieren", diff --git a/src/client/public/locales/en/labels.json b/src/client/public/locales/en/labels.json index 316baa342..f1b3a0573 100644 --- a/src/client/public/locales/en/labels.json +++ b/src/client/public/locales/en/labels.json @@ -124,6 +124,10 @@ "text": "Text", "textToImage": "Text to Image", "toEnd": "To End", + "tools": { + "textToImage": "Generating Image", + "vectorSearch": "Searching files" + }, "undefined": "Unknown", "unpacking": "Unpacking", "update": "Update", diff --git a/src/client/public/locales/es/labels.json b/src/client/public/locales/es/labels.json index 7aacf4815..aa99ac8e9 100644 --- a/src/client/public/locales/es/labels.json +++ b/src/client/public/locales/es/labels.json @@ -124,6 +124,10 @@ "text": "Texto", "textToImage": "Texto a Imagen", "toEnd": "Al Final", + "tools": { + "textToImage": "Generando imagen", + "vectorSearch": "Buscando archivos" + }, "undefined": "Desconocido", "unpacking": "Desempaquetando", "update": "Actualizar", diff --git a/src/client/public/locales/fr/labels.json b/src/client/public/locales/fr/labels.json index 572bd741b..2dbdd6a92 100644 --- a/src/client/public/locales/fr/labels.json +++ b/src/client/public/locales/fr/labels.json @@ -124,6 +124,10 @@ "text": "Texte", "textToImage": "Texte en Image", "toEnd": "À la Fin", + "tools": { + "textToImage": "Génération de l'image", + "vectorSearch": "Recherche de fichiers" + }, "undefined": "Inconnu", "unpacking": "Déballage", "update": "Mettre à jour", diff --git a/src/client/public/locales/he/labels.json b/src/client/public/locales/he/labels.json index fdb24d6e4..ae50173c0 100644 --- a/src/client/public/locales/he/labels.json +++ b/src/client/public/locales/he/labels.json @@ -124,6 +124,10 @@ "text": "טקסט", "textToImage": "טקסט לתמונה", "toEnd": "לסוף", + "tools": { + "textToImage": "וצר תמונה", + "vectorSearch": "מחפש קבצים" + }, "undefined": "לא ידוע", "unpacking": "פותח", "update": "עדכון", diff --git a/src/client/public/locales/it/labels.json b/src/client/public/locales/it/labels.json index 5568b8646..d7147d439 100644 --- a/src/client/public/locales/it/labels.json +++ b/src/client/public/locales/it/labels.json @@ -124,6 +124,10 @@ "text": "Testo", "textToImage": "Testo in Immagine", "toEnd": "Alla Fine", + "tools": { + "textToImage": "Generazione dell'immagine", + "vectorSearch": "Ricerca dei file" + }, "undefined": "Sconosciuto", "unpacking": "Estrazione", "update": "Aggiorna", diff --git a/src/client/public/locales/ja/labels.json b/src/client/public/locales/ja/labels.json index 9a8fe8bac..6b8f8fc3a 100644 --- a/src/client/public/locales/ja/labels.json +++ b/src/client/public/locales/ja/labels.json @@ -124,6 +124,10 @@ "text": "テキスト", "textToImage": "テキストから画像へ", "toEnd": "最後へ", + "tools": { + "textToImage": "画像を生成中", + "vectorSearch": "ファイルを検索中" + }, "undefined": "不明", "unpacking": "解凍中", "update": "更新", diff --git a/src/client/public/locales/nl/labels.json b/src/client/public/locales/nl/labels.json index 5de34dca3..14351b6b8 100644 --- a/src/client/public/locales/nl/labels.json +++ b/src/client/public/locales/nl/labels.json @@ -124,6 +124,10 @@ "text": "Tekst", "textToImage": "Tekst naar Afbeelding", "toEnd": "Naar het Einde", + "tools": { + "textToImage": "Afbeelding genereren", + "vectorSearch": "Bestanden doorzoeken" + }, "undefined": "Onbekend", "unpacking": "Uitpakken", "update": "Bijwerken", diff --git a/src/client/public/locales/pl/labels.json b/src/client/public/locales/pl/labels.json index cfcd203d9..851be6f6a 100644 --- a/src/client/public/locales/pl/labels.json +++ b/src/client/public/locales/pl/labels.json @@ -124,6 +124,10 @@ "text": "Tekst", "textToImage": "Tekst na Obraz", "toEnd": "Na koniec", + "tools": { + "textToImage": "Generowanie obrazu", + "vectorSearch": "Wyszukiwanie plików" + }, "undefined": "Nieznany", "unpacking": "Rozpakowywanie", "update": "Aktualizuj", diff --git a/src/client/public/locales/pt/labels.json b/src/client/public/locales/pt/labels.json index 150912ca2..afe2e8a2f 100644 --- a/src/client/public/locales/pt/labels.json +++ b/src/client/public/locales/pt/labels.json @@ -124,6 +124,10 @@ "text": "Texto", "textToImage": "Texto para Imagem", "toEnd": "Para o Fim", + "tools": { + "textToImage": "Gerando imagem", + "vectorSearch": "Procurando arquivos" + }, "undefined": "Desconhecido", "unpacking": "Desembalando", "update": "Atualizar", diff --git a/src/client/public/locales/ru/labels.json b/src/client/public/locales/ru/labels.json index b1ab7b512..ade6f707c 100644 --- a/src/client/public/locales/ru/labels.json +++ b/src/client/public/locales/ru/labels.json @@ -124,6 +124,10 @@ "text": "Текст", "textToImage": "Текст в Изображение", "toEnd": "В конец", + "tools": { + "textToImage": "Генерация изображения", + "vectorSearch": "Поиск файлов" + }, "undefined": "Неизвестно", "unpacking": "Распаковка", "update": "Обновить", diff --git a/src/client/public/locales/zh/labels.json b/src/client/public/locales/zh/labels.json index 9c5a762e4..98af0dd63 100644 --- a/src/client/public/locales/zh/labels.json +++ b/src/client/public/locales/zh/labels.json @@ -124,6 +124,10 @@ "text": "文本", "textToImage": "文字转图片", "toEnd": "到结尾", + "tools": { + "textToImage": "生成图像", + "vectorSearch": "搜索文件" + }, "undefined": "未知", "unpacking": "解压中", "update": "更新", diff --git a/src/electron/helpers/ipc/global.ts b/src/electron/helpers/ipc/global.ts index 989ba0764..433d6ac79 100644 --- a/src/electron/helpers/ipc/global.ts +++ b/src/electron/helpers/ipc/global.ts @@ -4,34 +4,29 @@ import path from "node:path"; import type { ComfyUIFileMetaData } from "@captn/utils"; import { - WINDOW_CLOSE_KEY, - WINDOW_MAXIMIZE_KEY, - WINDOW_MINIMIZE_KEY, - VECTOR_STORE_SAVED_KEY, DownloadState, ERROR_KEY, USER_THEME_KEY, + VECTOR_STORE_SAVED_KEY, + WINDOW_CLOSE_KEY, + WINDOW_MAXIMIZE_KEY, + WINDOW_MINIMIZE_KEY, } from "@captn/utils/constants"; import type { VectorStoreDocument } from "@captn/utils/types"; import { BrowserWindow, dialog, ipcMain } from "electron"; import { download } from "electron-dl"; import type ElectronStore from "electron-store"; -import { execa } from "execa"; import { v4 } from "uuid"; import { apps } from "@/electron/apps"; -import { logError, logger } from "@/electron/services/logger"; +import { copyWithMetaData, writePngMetaData } from "@/electron/png"; +import { logError } from "@/electron/services/logger"; import { VectorStore } from "@/electron/services/vector-store"; -import { inventoryStore, downloadsStore, userStore, appSettingsStore } from "@/electron/stores"; +import { appSettingsStore, downloadsStore, inventoryStore, userStore } from "@/electron/stores"; import { pushToStore } from "@/electron/stores/utils"; import type { DownloadsSettings } from "@/electron/types"; import { createDirectory } from "@/electron/utils/fs"; -import { - getCaptainData, - getCaptainDownloads, - getDirectory, - getPythonEmbedded, -} from "@/electron/utils/path-helpers"; +import { getCaptainData, getCaptainDownloads, getDirectory } from "@/electron/utils/path-helpers"; import { splitDocument } from "@/electron/utils/split-documents"; import { unpack } from "@/electron/utils/unpack"; import { buildKey } from "@/shared/build-key"; @@ -104,38 +99,6 @@ ipcMain.handle(buildKey([ID.FILE], { prefix: "path:", suffix: ":get" }), async ( } }); -async function writePngMetaData(filePath: string, metadata: ComfyUIFileMetaData) { - const pythonBinaryPath = getPythonEmbedded("python.exe"); - const scriptPath = getDirectory("python/png/main.py"); - const scriptArguments = ["--image_path", filePath]; - - if (metadata.positivePrompt) { - scriptArguments.push("--positive_prompt", metadata.positivePrompt); - } - - if (metadata.caption) { - scriptArguments.push("--caption", metadata.caption); - } - - if (metadata.description) { - scriptArguments.push("--description", metadata.description); - } - - if (metadata.negativePrompt) { - scriptArguments.push("--negative_prompt", metadata.negativePrompt); - } - - if (metadata.tags) { - scriptArguments.push("--tags", metadata.tags); - } - - if (metadata.prompt) { - scriptArguments.push("--prompt", metadata.prompt); - } - - return execa(pythonBinaryPath, ["-u", scriptPath, ...scriptArguments]); -} - /** * Handles an IPC event to write content to a file at a specified subpath within the application's data directory. * This handler determines the file's directory and type, ensures the directory exists (creating it if necessary), @@ -283,65 +246,14 @@ ipcMain.handle( ipcMain.handle( buildKey([ID.FILE], { suffix: ":copy-image-with-metadata" }), async ( - event, - { - source, - destination, - metadata, - }: { + _event, + data: { source: string; destination: string; metadata?: ComfyUIFileMetaData; } ) => { - const filePath = getCaptainData("files", cleanPath(destination)); - const { dir: directory, ext } = path.parse(filePath); - - try { - createDirectory(directory); - // Copy the file - await fsp.copyFile(source, filePath); - } catch (error) { - logError(error, "createDirectory:copy-image-with-metadata"); - } - - logger.info(`copy image from ${source} to ${filePath}`); - const id = v4(); - - const fileName = `${id}${ext}`; - const destinationPath = path.join(directory, fileName); - await fsp.rename(filePath, destinationPath); - logger.info(`renamed image from ${filePath} to ${destinationPath}`); - if (metadata?.positivePrompt) { - logger.info( - `add metadata to image at ${destinationPath} with description: ${metadata.description}` - ); - try { - await writePngMetaData(destinationPath, metadata); - } catch (error) { - logError(error, "writePngMetaData:copy-image-with-metadata"); - } - - try { - const vectorStore = VectorStore.getInstance; - return await vectorStore.upsert(VECTOR_STORE_COLLECTION, [ - { - content: metadata.positivePrompt, - payload: { - id: v4(), - label: fileName, - type: "image", - fileType: ext.replace(".", ""), - language: "en", - filePath: destinationPath, - content: metadata.positivePrompt, - }, - }, - ]); - } catch (error) { - logError(error, "vectorStore.upsert:copy-image-with-metadata"); - } - } + await copyWithMetaData(data); } ); diff --git a/src/electron/helpers/ipc/sdk/assistant.ts b/src/electron/helpers/ipc/sdk/assistant.ts index 0a90cbfb5..bae88916d 100644 --- a/src/electron/helpers/ipc/sdk/assistant.ts +++ b/src/electron/helpers/ipc/sdk/assistant.ts @@ -9,25 +9,31 @@ import { createOpenAI } from "@ai-sdk/openai"; import type { LanguageModelV1 } from "@ai-sdk/provider"; import type { SDKMessage } from "@captn/react/types"; import { APP_MESSAGE_KEY } from "@captn/utils/constants"; -import type { VectorStoreDocument } from "@captn/utils/types"; +import type { ComfyUIUpdate } from "@captn/utils/types"; import type { Schemas as QdrantSchemas } from "@qdrant/js-client-rest"; -import type { CoreMessage } from "ai"; +import type { CoreMessage, CoreTool, ToolCallPart, ToolResultPart } from "ai"; +import { tool } from "ai"; import { streamText } from "ai"; import { ipcMain, type IpcMainEvent } from "electron"; import Store from "electron-store"; import type { Except } from "type-fest"; import { v4 } from "uuid"; +import { z } from "zod"; +import { copyWithMetaData } from "@/electron/png"; +import { ComfyUI } from "@/electron/services/comfyui"; import { logError, logger } from "@/electron/services/logger"; import { VectorStore } from "@/electron/services/vector-store"; -import { keyStore } from "@/electron/stores"; +import { inventoryStore, keyStore } from "@/electron/stores"; import { normalizePath } from "@/electron/utils/normalize-path"; -import { getCaptainData } from "@/electron/utils/path-helpers"; +import { getCaptainData, getComfyUIPath } from "@/electron/utils/path-helpers"; import { availableDataTypes } from "@/shared/assistant"; import { buildKey } from "@/shared/build-key"; import { VECTOR_STORE_COLLECTION } from "@/shared/constants"; import { KEY } from "@/shared/enums"; +import { randomSeed } from "@/shared/number"; import type { ChatModel, DataTypeItem } from "@/shared/types/assistant"; +import type { NodeChain } from "@/shared/types/comfyui"; export const assistantStore = new Store>({ name: buildKey([KEY.STORE, KEY.ASSISTANT]), @@ -41,7 +47,8 @@ interface RAGDocument { label: string; score: number; } -function optimizeContext(context: RAGDocument[]) { + +export function optimizeContext(context: RAGDocument[]) { if (context.length === 0) { return "\n\n"; } @@ -69,6 +76,13 @@ export function extendCaptainData(content: string) { ); } +export function replaceCaptainData(content: string) { + return normalizePath(content).replaceAll( + new RegExp(getCaptainData() + "/", "ig"), + PATH_PLACEHOLDER + ); +} + export function createVectorSearchPrompt({ dataTypes }: { dataTypes: string[] }) { return `Based on the history of user-assistant messages, generate a single, highly relevant search query that includes several tags or keywords for a vector search. The query should be 1-2 sentences long and tailored to find specific results within the following data types: ${JSON.stringify(dataTypes)}. Do not interact with the user or provide any additional comments. The user will not see your query; it is only used for the search process.`; } @@ -77,59 +91,153 @@ export function createChatLabelPrompt() { return `Your task is to generate a short and concise label or title for the chat based on the user's initial question. The label should accurately reflect the main topic or intent of the user's inquiry, be in the same language as the user's question, and must be no more than four words. Ensure the label is clear and specific, capturing the essence of the conversation that is about to start. Yur answer is just the label, no comments or explanation, the user will not see your message or be able to interact with you.`; } -export function createSystemPrompt() { - return `# You are Captain | The AI Platform +export function createSystemPrompt({ model, provider }: { model: string; provider: string }) { + return `# System Guide for "Captain | The AI platform" ## You are: -- An open source app developed by Blibla, a German startup from Mainz. +- Captain +- An app that uses different AI models. - Always nice to the user, encouraging, and a bit quirky but honest. - The user's best friend and a great support for any problem. - Extremely precise when precision is required. +- Currently using the model: ${model} by ${provider}. ## You can: - Store data locally and keep personal data private. -- Retrieve data based on user queries using the integrated local vector store (powered by Qdrant and local embeddings). +- Retrieve data based on user queries using the local vector store. - Access files from the user's computer, but only those files registered in Captain. +- Generate images with ComfyUI. - Link to or display certain files inline if asked to do so. -- Speak 11 languages (German, English, Spanish, French, Italian, Japanese, Dutch, Polish, Portuguese, Russian, Chinese) and respond in the user's language. - -> [!NOTE] -> You always act as Captain. Each user query includes context retrieved by a vector search. To activate the context, the user must select the datatypes in the menu. Only files in the selected category are included. - -## Interaction Guide: - -### User Interaction -- **Scenario: User speaks {language}** - - Respond in {language} using simple, clear language. - -- **Scenario: Context is empty** - - Indicate there may not be related data. - -- **Scenario: Context has entries** - - Indicate they might be related to the user's query. -- **Scenario: Context includes images** - - Answer with image tags (e.g., ![Image content](${PATH_PLACEHOLDER}/path/to/image.ext)). -- **Scenario: User requests a story** - - Select 1-3 fitting images from the context. - - Return the story including the image as inline image tags. - -- **Scenario: User requests links or lists** - - Include references to found entries as links (e.g., [Link label](${PATH_PLACEHOLDER}/path/to/file.ext)). +## User Interaction Guide: + +\`\`\`gherkin +Scenario: User speaks {language} + When user speaks {language} + Then you answer in {language} + And your response is simple and clear + +Scenario: User wants to know about Captain + When user asks about Captain and it's usage + Then you briefly introduce yourself + And you tell them what they want to know + +Scenario: A vector search is required + When a vector search is required + Then you select the correct category + And you prepare a perfect query + And your query uses several trigger words in natural language format + +Scenario: Your response includes images + When your response includes images + Then you answer with image tags (e.g., ![{Image prompt or caption}](${PATH_PLACEHOLDER}/path/to/image.ext)) + +Scenario: Your response includes links + When your response includes images + Then you include references to entries as links (e.g., [{Link label}](${PATH_PLACEHOLDER}/path/to/file.ext)) + +Scenario: User wants an image generated + When user asks you to generate an image + Then you create an optimized and contextualized prompt + And you generate the image with ComfyUI + +Scenario: User wants to search existing images + When user asks you to look for an image + Then you perform a vector search for "image" + And your response includes images + +Scenario: User requests links or lists + When the user asks you to list links + Then you perform a vector search for {category} + And your response includes links + +Scenario: User requests info on apps + When the user asks you about the included apps + Then you perform a vector search for "app" + And your response includes links + +Scenario: User wants a story + When the user wants a story + Then you generate images + And you write a story about the images + And each image has a paragraph + And your response includes text with inline images + +Scenario: Miscellaneous user input + When the user submits a miscellaneous input + Then you act accordingly + And you aim to fulfill their request + +Scenario: An error occurs + When an error occurs + Then you inform the user kindly +\`\`\` +`; +} -- **Scenario: User has a question about Captain** - - Explain Captain's features and offer things to try out. +export function createSystemPrompt2({ model, provider }: { model: string; provider: string }) { + return `# System Guide for "Captain | The AI platform" -- **Scenario: Miscellaneous user input** - - Use context to answer questions or perform tasks, and always add references if applicable. +## You are: +- Captain +- An app that uses different AI models. +- Always nice to the user, encouraging, and a bit quirky but honest. +- The user's best friend and a great support for any problem. +- Extremely precise when precision is required. +- Currently using the model: ${model} by ${provider}. -- **Scenario: User wants responses based on the last message** - - Ignore the context and use previous messages as context. +## You can: +- Store data locally and keep personal data private. +- Retrieve data based on user queries using the local vector store. +- Access files from the user's computer, but only those files registered in Captain. +- Generate images with ComfyUI. +- Link to or display certain files inline if asked to do so. -### Error Handling -- **Scenario: An error occurs** - - Apologize and provide guidance on how to proceed or troubleshoot. +## User Interaction Guide: + +\`\`\`gherkin +Scenario: User speaks {language} + When user speaks {language} + Then you answer in {language} + And your response is simple and clear + +Scenario: User wants to know about Captain + When user asks about Captain and it's usage + Then you briefly introduce yourself + And you tell them what they want to know + +Scenario: Your response includes images + When your response includes images + Then you answer with image tags (e.g., ![{Image prompt or caption}](${PATH_PLACEHOLDER}/path/to/image.ext)) + +Scenario: Your response includes links + When your response includes images + Then you include references to entries as links (e.g., [{Link label}](${PATH_PLACEHOLDER}/path/to/file.ext)) + +Scenario: User requests links or lists + When the user asks you to list links + Then your response includes links + +Scenario: User requests info on apps + When the user asks you about the included apps + Then your response includes links + +Scenario: User wants a story + When the user wants a story + Then you write a story about the images + And each image has a paragraph + And your response includes text with inline images + +Scenario: Miscellaneous user input + When the user submits a miscellaneous input + Then you act accordingly + And you aim to fulfill their request + +Scenario: An error occurs + When an error occurs + Then you inform the user kindly +\`\`\` `; } @@ -161,6 +269,485 @@ export async function chatCompletion({ return fullResponse; } +export function createWorkflow({ + prompt = "", + negativePrompt = "", + seed = 1234, + steps = 20, + height, + width, + cfg = 7, + loraWeight = 1, + checkpointName = "stabilityai\\stable-diffusion-xl-base-1.0\\sd_xl_base_1.0_0.9vae.safetensors", + loraName, +}: { + prompt?: string; + negativePrompt?: string; + seed?: number; + height: number; + width: number; + steps?: number; + cfg?: number; + checkpointName?: string; + loraName?: string; + loraWeight?: number; +}) { + const hasLora = loraName && loraWeight > 0; + const finalModel = hasLora ? "lora" : "model"; + const workflow: NodeChain = { + kSampler: { + inputs: { + seed, + steps, + cfg, + sampler_name: "dpmpp_2m_sde_gpu", + scheduler: "karras", + denoise: 1, + model: [finalModel, 0], + positive: ["prompt", 0], + negative: ["negativePrompt", 0], + latent_image: ["latentImage", 0], + }, + class_type: "KSampler", + }, + model: { + inputs: { + ckpt_name: checkpointName, + }, + class_type: "CheckpointLoaderSimple", + }, + latentImage: { + inputs: { + width, + height, + batch_size: 1, + }, + class_type: "EmptyLatentImage", + }, + prompt: { + inputs: { + text: prompt, + clip: [finalModel, 1], + }, + class_type: "CLIPTextEncode", + }, + negativePrompt: { + inputs: { + text: negativePrompt, + clip: [finalModel, 1], + }, + class_type: "CLIPTextEncode", + }, + vaeDecode: { + inputs: { + samples: ["kSampler", 0], + vae: ["model", 2], + }, + class_type: "VAEDecode", + }, + output: { + inputs: { + filename_prefix: "images/captain", + images: ["vaeDecode", 0], + }, + class_type: "PreviewImage", + }, + }; + if (hasLora) { + workflow.lora = { + inputs: { + lora_name: loraName, + strength_model: loraWeight, + strength_clip: loraWeight, + model: ["model", 0], + clip: ["model", 1], + }, + class_type: "LoraLoader", + }; + } + + return workflow; +} + +export const STORE_CATEGORIES = ["image", "story", "app"] as const; + +export function cropIncompleteImageTag(markdown: string): string { + const imgTagOpen = "!["; + const imgTagClose = "]("; + const imgTagEnd = ")"; + + // Find the last occurrence of an open image tag + const lastOpenTagIndex = markdown.lastIndexOf(imgTagOpen); + if (lastOpenTagIndex === -1) { + return markdown; + } // No image tag found, return as is + + // Find the corresponding closing brackets + const lastCloseTagIndex = markdown.indexOf(imgTagClose, lastOpenTagIndex); + if (lastCloseTagIndex === -1) { + // Incomplete image tag, crop the tail + return markdown.slice(0, Math.max(0, lastOpenTagIndex)); + } + + // Find the end of the image tag + const lastEndTagIndex = markdown.indexOf(imgTagEnd, lastCloseTagIndex); + if (lastEndTagIndex === -1) { + // Incomplete image tag, crop the tail + return markdown.slice(0, Math.max(0, lastOpenTagIndex)); + } + + // If the image tag is complete, return as is + return markdown; +} + +export function tools(appId: string, send?: (action: string, payload: T) => void) { + return { + vectorSearch: tool({ + description: "Search the user's files for matches in the vector store", + parameters: z + .object({ + query: z.string().describe("The search query"), + categories: z + .array(z.enum(STORE_CATEGORIES)) + .describe( + `Categories to search, one or more of: ${STORE_CATEGORIES.join(", ")}` + ), + }) + .required({ query: true }), + async execute({ query, categories }) { + logger.info("Sending Query for RAG:", query); + if (send) { + send("assistant:tool", { tool: "vectorSearch", status: "loading" }); + } + + const vectorStore = VectorStore.getInstance; + + // Prepare array of dataTypes used in the lookup + const must: QdrantSchemas["Filter"]["must"] = [ + { key: "type", match: { any: categories } }, + ]; + + try { + const response = await vectorStore.search(VECTOR_STORE_COLLECTION, query, { + limit: 10, + score_threshold: 0.3, + filter: { + must, + }, + }); + + return { + matches: response.map(entry => ({ + ...entry.payload, + filePath: + entry.payload?.filePath && + cropCaptainData(entry.payload!.filePath as string), + })), + }; + } catch (error) { + logError(error); + const message = `Could not perform the vector search for {query: ${query}, categories: ${JSON.stringify(categories)}`; + if (send) { + send("assistant:error", { error: message }); + } + + return { + error: message, + }; + } + }, + }), + textToImage: tool({ + description: "Generate an image for the user (using ComfyUI).", + parameters: z + .object({ + prompt: z + .string() + .describe("The enhanced/optimized prompt for the image to be generated."), + seed: z + .number() + .gt(0) + .lt(2 ** 32) + .default(randomSeed()) + .describe( + `An optional seed to be used for the generation (1 - ${2 ** 32}). You can reuse old seeds too.` + ), + dimensions: z + .object({ + height: z + .number() + .positive() + .gte(640) + .lte(1536) + .int() + .describe("The height of the generated image int(640-1536)."), + width: z + .number() + .positive() + .gte(640) + .lte(1536) + .int() + .describe("The width of the generated image int(640-1536)."), + }) + .required({ height: true, width: true }) + .default({ height: 1024, width: 1024 }) + .describe( + "The dimensions: height and width of the generated image (max area of ~1024*1024)." + ), + }) + .required({ prompt: true }), + + async execute({ prompt, seed, dimensions: { height, width } }) { + const checkpoints = inventoryStore.get( + "stable-diffusion.checkpoints", + [] + ); + const hasSDXL = checkpoints.some( + checkpoint => checkpoint.id === "stabilityai/stable-diffusion-xl-base-1.0" + ); + if (!hasSDXL) { + const error = + "Stable Diffusion XL (SDXL) is not available. Please download it in the Marketplace."; + if (send) { + send("assistant:error", { error }); + } + + return { + error, + }; + } + + const comfyClient = v4(); + if (send) { + send("assistant:tool", { tool: "textToImage", status: "loading" }); + } + + logger.info("Generating Image:", { + height, + width, + seed, + prompt, + }); + + const filePath = await new Promise((resolve, reject) => { + try { + ComfyUI.getInstance.registerClient( + comfyClient, + async (data: ComfyUIUpdate) => { + switch (data.type) { + case "executed": { + const [image] = data.data.output.images; + const filePath = getComfyUIPath( + "temp", + image.subfolder, + image.filename + ); + const destinationPath = await copyWithMetaData({ + source: filePath, + destination: `images/${v4()}.png`, + metadata: { positivePrompt: prompt }, + }); + if (destinationPath) { + resolve(destinationPath); + } else { + reject(new Error("Could not generate the image")); + } + + break; + } + + default: { + break; + } + } + } + ); + ComfyUI.getInstance + .queueWorkflow( + createWorkflow({ + prompt: [ + prompt, + "highres", + "4k resolution", + "best quality", + ].join(", "), + negativePrompt: [ + "worst quality", + "lowres", + "blurry", + "smudge", + "nsfw", + "nude", + ].join(", "), + seed, + height, + width, + }), + comfyClient + ) + .catch(error => { + reject(error); + }); + } catch (error) { + reject(error); + } + }); + ComfyUI.getInstance.unregisterClient(comfyClient); + + return { + prompt, + height, + width, + seed, + filePath: cropCaptainData(filePath), + }; + }, + }), + }; +} + +export function getAssistant(model: string) { + const [modelPrefix] = model.split("-"); + switch (modelPrefix) { + case "gpt": { + return createOpenAI({ apiKey: keyStore.get("openAiApiKey") }); + } + + case "claude": { + return createAnthropic({ + apiKey: keyStore.get("anthropicApiKey"), + }); + } + + case "mistral": { + return createMistral({ + apiKey: keyStore.get("mistralApiKey"), + }); + } + + case "models/gemini": { + return createGoogleGenerativeAI({ + apiKey: keyStore.get("googleGenerativeAiApiKey"), + }); + } + + default: { + return null; + } + } +} + +export function getProvider(model: string) { + const [modelPrefix] = model.split("-"); + switch (modelPrefix) { + case "gpt": { + return "Open AI"; + } + + case "claude": { + return "Anthropic"; + } + + case "mistral": { + return "Mistral"; + } + + case "models/gemini": { + return "Google"; + } + + default: { + return "Unknown"; + } + } +} + +export async function streamWithTools>( + assistant: OpenAIProvider | AnthropicProvider | MistralProvider | GoogleGenerativeAIProvider, + { + history, + onStream, + tools, + model, + }: { + history: CoreMessage[]; + onStream(response: string): void; + tools: TOOLS; + model: string; + } +) { + let fullResponse = ""; + + try { + let result = await streamText({ + model: assistant(model), + system: createSystemPrompt({ model, provider: getProvider(model) }), + messages: history, + tools, + }); + + const toolCalls: ToolCallPart[] = []; + const toolResults: ToolResultPart[] = []; + for await (const delta of result.fullStream) { + switch (delta.type) { + case "tool-call": { + logger.info("toolCall", delta); + toolCalls.push(delta); + + break; + } + + case "tool-result": { + logger.info("toolResult", delta); + toolResults.push(delta); + + break; + } + + case "text-delta": { + fullResponse += delta.textDelta; + onStream(fullResponse); + break; + } + + default: { + break; + } + } + } + + logger.info("toolCalls", toolCalls); + logger.info("toolResults", toolResults); + + if (toolResults.length > 0) { + history.push( + { + role: "assistant", + content: toolCalls, + }, + { + role: "tool", + content: toolResults, + } + ); + result = await streamText({ + model: assistant(model), + system: createSystemPrompt2({ model, provider: getProvider(model) }), + messages: history, + tools: {} as TOOLS, + }); + for await (const delta of result.textStream) { + fullResponse += delta; + onStream(cropIncompleteImageTag(fullResponse)); + } + } + } catch (error) { + logError(error); + } + + return fullResponse; +} + ipcMain.on( APP_MESSAGE_KEY, async ( @@ -293,59 +880,27 @@ ipcMain.on( // Send an input and trigger a chat generation case "assistant:chat": { try { - let assistant: - | OpenAIProvider - | AnthropicProvider - | GoogleGenerativeAIProvider - | MistralProvider; - const userInput = message.payload.input!; const model = message.payload.model ?? "gpt-3.5-turbo"; const dataTypes = message.payload.dataTypes ?? availableDataTypes; - const includedDataTypes = dataTypes - .filter(dataType => dataType.active) - .map(dataType => dataType.type); - - const [modelPrefix] = model.split("-"); - switch (modelPrefix) { - case "gpt": { - assistant = createOpenAI({ apiKey: keyStore.get("openAiApiKey") }); - break; - } - - case "claude": { - assistant = createAnthropic({ - apiKey: keyStore.get("anthropicApiKey"), - }); - break; - } + const assistant = getAssistant(model); - case "mistral": { - assistant = createMistral({ - apiKey: keyStore.get("mistralApiKey"), - }); - break; - } - - case "models/gemini": { - assistant = createGoogleGenerativeAI({ - apiKey: keyStore.get("googleGenerativeAiApiKey"), - }); - break; - } - - default: { - return; - } + if (!assistant) { + event.sender.send(channel, { + action: "assistant:error", + payload: { error: `Failed creating assistant for model: "${model}"` }, + }); + return; } - const lastMessage = { role: "user" as const, content: userInput }; if (chat.messages.length === 0) { // LastMessage is first message. Let AI create a label / title chat.label = await chatCompletion({ model: assistant(model), system: createChatLabelPrompt(), - messages: [lastMessage], + messages: [ + { role: "user", content: `Create a label for: ${userInput}` }, + ], }); event.sender.send(channel, { action: "assistant:nameSuggestion", @@ -355,125 +910,29 @@ ipcMain.on( }); } - let optimizedContext = "\n\n"; - if (includedDataTypes.length > 0) { - logger.info(`Assistant uses data: ${includedDataTypes.join(", ")}`); - - const minorContent = [ - ...chat.messages.slice(-2), - { - role: "user" as const, - content: userInput + " => ONLY ANSWER WITH A SEARCH QUERY!!!", - }, - ].filter(Boolean); - if (minorContent[0] && minorContent[0].role !== "user") { - minorContent.shift(); - } - - const query = await chatCompletion({ - model: assistant(model), - system: createVectorSearchPrompt({ dataTypes: includedDataTypes }), - messages: minorContent, - }); - - logger.info("Sending Query for RAG:", query); - - const vectorStore = VectorStore.getInstance; - - // Prepare array of dataTypes used in the lookup - const must: QdrantSchemas["Filter"]["must"] = [ - { key: "type", match: { any: includedDataTypes } }, - ]; - - const response = await vectorStore.search(VECTOR_STORE_COLLECTION, query, { - limit: 10, - score_threshold: 0.3, - filter: { - must, - }, - }); - - const context = await Promise.all( - response.map(async entry => { - const entryPayload = (entry as unknown as VectorStoreDocument) - .payload!; - const { filePath } = entryPayload; - // - const cleanFilePath = filePath && cropCaptainData(filePath); - - if (entry.payload!.fileType === "png") { - return { - id: entryPayload.id, - type: entryPayload.type, - score: entry.score, - filePath: cleanFilePath, - content: entryPayload.content, - label: entryPayload.label, - }; - } - - if (entry.payload!.fileType === "md") { - return { - id: entryPayload.id, - type: entryPayload.type, - score: entry.score, - filePath: cleanFilePath, - content: entryPayload.content, - label: entryPayload.label, - }; - } - - return { - id: entryPayload.id, - type: entryPayload.type, - score: entry.score, - filePath: cleanFilePath, - content: entryPayload.content, - label: entryPayload.label, - }; - }) - ); - const files: Record = {}; - for (const entry of context.filter(Boolean)) { - if (files[entry.id]) { - files[entry.id].content += `...\n...${entry.content}`; - } else { - files[entry.id] = entry; - } - } - - const fileArray = Object.values(files); - optimizedContext = optimizeContext(fileArray); - } else { - logger.info("Assistant NOT sing data"); - } - // Only send the last N messages const limit = message.payload.maxHistory ?? 10; const limitedHistory = chat.messages.slice(-limit); - const messageWithContext = { - role: "user" as const, - content: `${optimizedContext}\n${lastMessage.content}`, - }; - limitedHistory.push(messageWithContext); + const lastMessage = { role: "user" as const, content: userInput }; - const result = await streamText({ - model: assistant(model), - system: createSystemPrompt(), - messages: limitedHistory, - }); + limitedHistory.push(lastMessage); - let fullResponse = ""; - for await (const delta of result.textStream) { - fullResponse += delta; - event.sender.send(channel, { - action: "assistant:response", - payload: { - output: extendCaptainData(fullResponse), - done: false, - }, - }); - } + const fullResponse = await streamWithTools(assistant, { + history: limitedHistory, + model, + tools: tools(appId, (action, payload) => { + event.sender.send(channel, { action, payload }); + }), + onStream(fullResponse: string) { + event.sender.send(channel, { + action: "assistant:response", + payload: { + output: extendCaptainData(fullResponse), + done: false, + }, + }); + }, + }); chat.model = model; chat.dataTypes = dataTypes; diff --git a/src/electron/helpers/png.ts b/src/electron/helpers/png.ts new file mode 100644 index 000000000..fb8ac80ee --- /dev/null +++ b/src/electron/helpers/png.ts @@ -0,0 +1,105 @@ +import fsp from "node:fs/promises"; +import path from "node:path"; + +import type { ComfyUIFileMetaData } from "@captn/utils"; +import { execa } from "execa"; +import { v4 } from "uuid"; + +import { logError, logger } from "@/electron/services/logger"; +import { VectorStore } from "@/electron/services/vector-store"; +import { createDirectory } from "@/electron/utils/fs"; +import { getCaptainData, getDirectory, getPythonEmbedded } from "@/electron/utils/path-helpers"; +import { VECTOR_STORE_COLLECTION } from "@/shared/constants"; +import { cleanPath } from "@/shared/string"; + +export async function writePngMetaData(filePath: string, metadata: ComfyUIFileMetaData) { + const pythonBinaryPath = getPythonEmbedded("python.exe"); + const scriptPath = getDirectory("python/png/main.py"); + const scriptArguments = ["--image_path", filePath]; + + if (metadata.positivePrompt) { + scriptArguments.push("--positive_prompt", metadata.positivePrompt); + } + + if (metadata.caption) { + scriptArguments.push("--caption", metadata.caption); + } + + if (metadata.description) { + scriptArguments.push("--description", metadata.description); + } + + if (metadata.negativePrompt) { + scriptArguments.push("--negative_prompt", metadata.negativePrompt); + } + + if (metadata.tags) { + scriptArguments.push("--tags", metadata.tags); + } + + if (metadata.prompt) { + scriptArguments.push("--prompt", metadata.prompt); + } + + return execa(pythonBinaryPath, ["-u", scriptPath, ...scriptArguments]); +} + +export async function copyWithMetaData({ + source, + destination, + metadata, +}: { + source: string; + destination: string; + metadata?: ComfyUIFileMetaData; +}) { + const filePath = getCaptainData("files", cleanPath(destination)); + const { dir: directory, ext } = path.parse(filePath); + + try { + createDirectory(directory); + // Copy the file + await fsp.copyFile(source, filePath); + } catch (error) { + logError(error, "createDirectory:copy-image-with-metadata"); + } + + logger.info(`copy image from ${source} to ${filePath}`); + const id = v4(); + + const fileName = `${id}${ext}`; + const destinationPath = path.join(directory, fileName); + await fsp.rename(filePath, destinationPath); + logger.info(`renamed image from ${filePath} to ${destinationPath}`); + if (metadata?.positivePrompt) { + logger.info( + `add metadata to image at ${destinationPath} with positivePrompt: ${metadata.positivePrompt}` + ); + try { + await writePngMetaData(destinationPath, metadata); + } catch (error) { + logError(error, "writePngMetaData:copy-image-with-metadata"); + } + + try { + const vectorStore = VectorStore.getInstance; + await vectorStore.upsert(VECTOR_STORE_COLLECTION, [ + { + content: metadata.positivePrompt, + payload: { + id: v4(), + label: fileName, + type: "image", + fileType: ext.replace(".", ""), + language: "en", + filePath: destinationPath, + content: metadata.positivePrompt, + }, + }, + ]); + return destinationPath; + } catch (error) { + logError(error, "vectorStore.upsert:copy-image-with-metadata"); + } + } +} diff --git a/src/electron/helpers/services/comfyui.ts b/src/electron/helpers/services/comfyui.ts index 7f3d1ce2e..dcde0d7e0 100644 --- a/src/electron/helpers/services/comfyui.ts +++ b/src/electron/helpers/services/comfyui.ts @@ -33,7 +33,7 @@ type Device = { torch_vram_free: number; }; -export type WebSocketMessageHandler = (data: string) => void; +export type WebSocketMessageHandler = (data: T) => void; export enum ComfyUIStatus { NOT_RUNNING = "NOT_RUNNING", @@ -227,7 +227,7 @@ export class ComfyUI { * @param {string} clientId - The unique identifier for the client. * @param {WebSocketMessageHandler} handleMessage - The function to call when a message is received on the WebSocket. */ - public setupWebSocket(clientId: string, handleMessage: WebSocketMessageHandler) { + public setupWebSocket(clientId: string, handleMessage: WebSocketMessageHandler) { const webSocket = new WebSocket(`ws://${this.url}/ws?clientId=${clientId}`); webSocket.on("open", () => { @@ -327,7 +327,7 @@ export class ComfyUI { * @param {string} clientId - The unique identifier for the client. * @param {WebSocketMessageHandler} handleMessage - The function to handle incoming WebSocket messages. */ - public registerClient(clientId: string, handleMessage: WebSocketMessageHandler) { + public registerClient(clientId: string, handleMessage: WebSocketMessageHandler) { if (this.clients.has(clientId)) { logger.info(`Client ${clientId} is already registered.`); } else {