Unable to consistently save the conversation threads on messageDone when using AssistantResponse. #3234

gablabelle · 2024-10-12T14:13:43Z

gablabelle
Oct 12, 2024

Hello,

When using AssistantResponse from a Next.js API route handler, the API call ends before it could properly save the conversation.

Since they are non blocking operations, whether I try to trigger cacheAssistantsThread in the messageDone callback or after finalMessages has run, sometimes it has the time to run and sometimes it doesn't.

Any suggestions on what I could do to ensure cacheAssistantsThread has finished running before the API call ends?

I'd like to keep the streaming if possible since it greatly improves the user experience.

// ... imports

export async function POST(
  req: Request,
  context: z.infer<typeof routeContextSchema>
) {
  try {
    // ... some checks

    // Create a thread if needed
    const threadId =
      parsedBody.threadId ?? (await openAiClient.beta.threads.create({})).id;

    // Add a message to the thread
    const createMessage = await openAiClient.beta.threads.messages.create(
      threadId,
      {
        role: "user",
        content,
      }
    );

    return AssistantResponse(
      { threadId, messageId: createMessage.id },
      async ({ forwardStream, sendDataMessage }) => {
        // Run the assistant on the thread
        const runStream = openAiClient.beta.threads.runs
          .stream(threadId, {
            assistant_id: openAiAssistantId,
          })
          .on("messageDone", (message) => {
            console.log("messageDone", message);
            const userMessage = {
              id: createMessage.id,
              role: createMessage.role,
              content: createMessage.content[0].text.value,
            };
            const assistantMessage = {
              id: message.id,
              role: message.role,
              content: message.content[0].text.value,
            };
            return cacheAssistantsThread({
              // ... more data
              chatId: parsedBody.chatId,
              messages: [userMessage, assistantMessage],
            });
          })
          .on("error", (error) => {
            console.error("runStream error:", error);
          });

        runStream.finalMessages().then((finalMessages) => {
          console.log("finalMessages", finalMessages);
        });
        // forward run status would stream message deltas
        await forwardStream(runStream);
      }
    );
  } catch (error) {
    console.error(error);
    if (error instanceof z.ZodError) {
      return new Response(JSON.stringify(error.issues), { status: 422 });
    }

    return new Response(JSON.stringify((error as Error).message), {
      status: 500,
    });
  }
}

And here is what cacheAssistantsThread does:

export async function cacheAssistantsThread({
  workspaceId,
  assistant,
  externalId,
  chatId,
  messages,
  session,
  saveToInbox = false,
}: {
  workspaceId: string;
  assistant: {
    id: string;
    openAiAssistantId: string;
    slug: string;
    title: string;
  };
  session?: {
    id: string;
    name: string | null;
    email: string;
  };
  externalId?: string;
  chatId: string;
  messages: AssistantMessage[];
  saveToInbox?: boolean | null | undefined;
}) {
  const chat = await getChat(
    workspaceId,
    assistant.id,
    chatId,
    session?.id || ""
  );
  const title = chat ? chat.title : messages[0].content?.substring(0, 100);
  const updatedAt = Date.now();

  const payload: Chat = {
    chatId,
    externalId: externalId || "",
    title: title || "",
    workspaceId,
    session: session || { id: "", name: null, email: "" },
    assistant,
    createdAt: chat?.createdAt ? chat.createdAt : updatedAt,
    updatedAt,
    messages: [...(chat?.messages || []), ...messages],
  };

  await saveChatPayload({
    payload,
    saveToInbox,
  });

  await incrementConversationCount(workspaceId, session?.id);

  return payload;
}

async function saveChatPayload({
  payload,
  saveToInbox = false,
}: {
  payload: Chat;
  saveToInbox?: boolean | null | undefined;
}) {
  // console.log(">>> saveChatPayload payload:", payload);
  await kv.hmset(`chat:${payload.chatId}`, payload);
  await kv.zadd(
    `workspace:user:chat:archives:${payload.workspaceId}:${payload.assistant.id}:${payload.session.id}`,
    {
      score: payload.updatedAt,
      member: `chat:${payload.chatId}`,
    }
  );

  if (saveToInbox) {
   // Send payload to qstash
    await queuePayload("chat", payload.workspaceId, payload);
  }
}

async function incrementConversationCount(
  workspaceId: string,
  userId?: string
) {
  if (userId) {
    const userKey = `workspace:user:conversation:count:${workspaceId}:${userId}`;
    await kv.incr(userKey);
  }
  const workspaceKey = `workspace:user:conversation:count:${workspaceId}`;
  return await kv.incr(workspaceKey);
}

gablabelle · 2024-10-12T17:00:31Z

gablabelle
Oct 12, 2024
Author

OK so as a solution I had to implement my own modified version of AssistantResponse.

Basically I'm exposing a onRunStreamCallbackFn callback function and I'm running that callback responsible for caching the conversation in the finally block before controller.close();

import { AssistantMessage, DataMessage, formatStreamPart } from "ai";
import type { AssistantStream, OpenAI, Run } from "@repo/openai";

/**
You can pass the thread and the latest message into the `AssistantResponse`. This establishes the context for the response.
 */
type AssistantResponseSettings = {
  /**
  The thread ID that the response is associated with.
     */
  threadId: string;

  /**
  The ID of the latest message that the response is associated with.
   */
  messageId: string;
};

/**
  The process parameter is a callback in which you can run the assistant on threads, and send messages and data messages to the client.
   */
type AssistantResponseCallback = (options: {
  /**
  @deprecated use variable from outer scope instead.
     */
  threadId: string;

  /**
  @deprecated use variable from outer scope instead.
     */
  messageId: string;

  /**
  Forwards an assistant message (non-streaming) to the client.
     */
  sendMessage: (message: AssistantMessage) => void;

  /**
  Send a data message to the client. You can use this to provide information for rendering custom UIs while the assistant is processing the thread.
   */
  sendDataMessage: (message: DataMessage) => void;

  /**
  Forwards the assistant response stream to the client. Returns the `Run` object after it completes, or when it requires an action.
     */
  forwardStream: (stream: AssistantStream) => Promise<Run | undefined>;

  onAnnotation: (
    fn: (
      index: number,
      annotations: MessageAnnotation
    ) => Promise<{ newText: string }>
  ) => void;

  onRunStreamCompleted: (
    fn: (message: OpenAI.Beta.Threads.Messages.Message) => Promise<void>
  ) => void;
}) => Promise<void>;

export interface MessageAnnotation {
  text: string;
  file_id: string;
  start_index: number;
  end_index: number;
}

const referenceRegex = /【[^】]*】/g;

/**
  The `AssistantResponse` allows you to send a stream of assistant update to `useAssistant`.
  It is designed to facilitate streaming assistant responses to the `useAssistant` hook.
  It receives an assistant thread and a current message, and can send messages and data messages to the client.
   */
export function AssistantResponseWithAnnotations(
  { threadId, messageId }: AssistantResponseSettings,
  process: AssistantResponseCallback,
  openAiClient: OpenAI
): Response {
  let annotationIndex = 0;
  let messageCompleted: OpenAI.Beta.Threads.Messages.Message | undefined;
  let citedFiles: { [fileId: string]: OpenAI.Files.FileObject } = {};
  const processedFiles = new Set<string>();

  // Function to process annotations
  const processAnnotations = async (
    text: string,
    annotations: any[],
    annotationCallbackFn:
      | ((
          index: number,
          annotation: MessageAnnotation
        ) => Promise<{
          newText: string;
        }>)
      | undefined
  ): Promise<string> => {
    let processedText = text;
    // Sort annotations in reverse order (end to start)
    annotations.sort((a, b) => b.start_index - a.start_index);

    for (const annotation of annotations) {
      if (
        annotation.type === "file_citation" &&
        annotation.file_citation?.file_id
      ) {
        const fileId = annotation.file_citation.file_id;
        // if (processedFiles.has(fileId)) {
        //   // Remove duplicate annotations
        //   processedText =
        //     processedText.slice(0, annotation.start_index) +
        //     processedText.slice(annotation.end_index);
        //   continue;
        // }

        const citedFile = citedFiles[fileId];
        if (citedFile && annotationCallbackFn) {
          const { newText } = await annotationCallbackFn(annotationIndex, {
            text: citedFile.filename,
            file_id: citedFile.id,
            start_index: annotation.start_index,
            end_index: annotation.end_index,
          });

          // Replace the original annotation marker with the new reference format
          processedText =
            processedText.slice(0, annotation.start_index) +
            newText +
            processedText.slice(annotation.end_index);
          annotationIndex++;

          // Mark this file as processed
          processedFiles.add(fileId);
        }
      }
    }

    // Remove any remaining annotation markers
    processedText = processedText.replace(/【[^】]*】/g, "");

    return processedText;
  };

  const stream = new ReadableStream({
    async start(controller) {
      const textEncoder = new TextEncoder();

      const sendMessage = (message: AssistantMessage) => {
        controller.enqueue(
          textEncoder.encode(formatStreamPart("assistant_message", message))
        );
      };

      const sendDataMessage = (message: DataMessage) => {
        controller.enqueue(
          textEncoder.encode(formatStreamPart("data_message", message))
        );
      };

      const sendError = (errorMessage: string) => {
        controller.enqueue(
          textEncoder.encode(formatStreamPart("error", errorMessage))
        );
      };

      let annotationCallbackFn:
        | undefined
        | ((
            index: number,
            annotations: MessageAnnotation
          ) => Promise<{
            newText: string;
          }>);

      let onRunStreamCallbackFn:
        | ((message: OpenAI.Beta.Threads.Messages.Message) => Promise<void>)
        | undefined;

      const forwardStream = async (stream: AssistantStream) => {
        let result: Run | undefined = undefined;
        let fullMessage = "";
        let lastSentLength = 0;

        for await (const value of stream) {
          switch (value.event) {
            case "thread.message.created": {
              controller.enqueue(
                textEncoder.encode(
                  formatStreamPart("assistant_message", {
                    id: value.data.id,
                    role: "assistant",
                    content: [{ type: "text", text: { value: "" } }],
                  })
                )
              );
              break;
            }

            case "thread.message.delta": {
              const content = value.data.delta.content?.[0];
              if (
                content &&
                content.type == "text" &&
                content.text &&
                content.text.value
              ) {
                let text = content.text.value;
                text = text.replace(referenceRegex, "");

                if (
                  content.text.annotations &&
                  Array.isArray(content.text.annotations)
                ) {
                  // Populate citedFiles object
                  for (const annotation of content.text.annotations) {
                    if (
                      annotation.type === "file_citation" &&
                      annotation.file_citation?.file_id &&
                      !citedFiles[annotation.file_citation.file_id]
                    ) {
                      try {
                        const file = await openAiClient.files.retrieve(
                          annotation.file_citation.file_id
                        );
                        citedFiles[annotation.file_citation.file_id] = file;
                      } catch (error) {
                        console.error("Error retrieving file:", error);
                      }
                    }
                  }

                  text = await processAnnotations(
                    text,
                    content.text.annotations,
                    annotationCallbackFn
                  );
                }

                fullMessage += text;

                // Send only the new part of the message
                const newContent = fullMessage.slice(lastSentLength);
                if (newContent) {
                  controller.enqueue(
                    textEncoder.encode(formatStreamPart("text", newContent))
                  );
                  lastSentLength = fullMessage.length;
                }
              }
              break;
            }

            case "thread.message.completed": {
              // Store value.data.content so it can be used later
              messageCompleted = value.data;
              break;
            }

            case "thread.run.completed":
            case "thread.run.requires_action": {
              result = value.data;
              break;
            }
          }
        }

        return result;
      };

      // send the threadId and messageId as the first message:
      controller.enqueue(
        textEncoder.encode(
          formatStreamPart("assistant_control_data", {
            threadId,
            messageId,
          })
        )
      );

      try {
        await process({
          threadId,
          messageId,
          sendMessage,
          sendDataMessage,
          forwardStream,
          onAnnotation: (fn) => {
            annotationCallbackFn = fn;
          },
          onRunStreamCompleted: (fn) => {
            onRunStreamCallbackFn = fn;
          },
        });
      } catch (error) {
        sendError((error as any).message ?? `${error}`);
      } finally {
        if (onRunStreamCallbackFn && messageCompleted) {
          try {
            // Apply annotations to messageCompleted before passing it to onRunStreamCallbackFn
            if (messageCompleted.content[0].type === "text") {
              let fullMessage = messageCompleted.content[0].text.value;
              const annotations =
                messageCompleted.content[0].text.annotations || [];

              fullMessage = await processAnnotations(
                fullMessage,
                annotations,
                annotationCallbackFn
              );

              messageCompleted.content[0].text.value = fullMessage;
            }

            await onRunStreamCallbackFn(messageCompleted);
          } catch (callbackError) {
            if (callbackError instanceof Error) {
              console.error("Error in onRunStreamCallbackFn:", callbackError);
              sendError(
                `Error in onRunStreamCallbackFn: ${callbackError.message}`
              );
            } else {
              sendError(`Error in onRunStreamCallbackFn: ${callbackError}`);
            }
          }
        }

        // Close the stream after ensuring the callback has been invoked
        controller.close();
      }
    },
    pull(controller) {},
    cancel() {},
  });

  return new Response(stream, {
    status: 200,
    headers: {
      "Content-Type": "text/plain; charset=utf-8",
    },
  });
}

0 replies

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unable to consistently save the conversation threads on messageDone when using AssistantResponse. #3234

{{title}}

Replies: 1 comment

{{title}}

Select a reply

Unable to consistently save the conversation threads on messageDone when using AssistantResponse. #3234

gablabelle Oct 12, 2024

Replies: 1 comment

gablabelle Oct 12, 2024 Author

gablabelle
Oct 12, 2024

gablabelle
Oct 12, 2024
Author