Skip to content

Commit

Permalink
Hotfix protocol for breaking changes (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
bcherry authored Sep 17, 2024
1 parent 5c320c8 commit 7940a23
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 85 deletions.
84 changes: 43 additions & 41 deletions plugins/openai/src/voice_assistant/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ export class VoiceAssistant {
this.ws.onopen = () => {
this.connected = true;
this.sendClientCommand({
event: proto.ClientEventType.SET_INFERENCE_CONFIG,
event: proto.ClientEventType.UPDATE_SESSION_CONFIG,
...this.options.inferenceConfig,
});
resolve();
Expand All @@ -169,15 +169,17 @@ export class VoiceAssistant {

addUserMessage(text: string, generate: boolean = true): void {
this.sendClientCommand({
event: proto.ClientEventType.ADD_ITEM,
event: proto.ClientEventType.ADD_MESSAGE,
type: 'message',
role: 'user',
content: [
{
type: 'text',
text: text,
},
],
message: {
role: 'user',
content: [
{
type: 'text',
text: text,
},
],
},
});
if (generate) {
this.sendClientCommand({
Expand Down Expand Up @@ -240,17 +242,20 @@ export class VoiceAssistant {
case proto.ServerEventType.START_SESSION:
this.setState(proto.State.LISTENING);
break;
case proto.ServerEventType.ADD_ITEM:
this.handleAddItem(event);
case proto.ServerEventType.ADD_MESSAGE:
this.handleAddMessage(event);
break;
case proto.ServerEventType.ADD_CONTENT:
this.handleAddContent(event);
break;
case proto.ServerEventType.ITEM_ADDED:
this.handleItemAdded(event);
case proto.ServerEventType.MESSAGE_ADDED:
this.handleMessageAdded(event);
break;
case proto.ServerEventType.GENERATION_FINISHED:
this.handleGenerationFinished(event);
break;
case proto.ServerEventType.TURN_FINISHED:
this.handleTurnFinished(event);
case proto.ServerEventType.GENERATION_CANCELED:
this.handleGenerationCanceled();
break;
case proto.ServerEventType.VAD_SPEECH_STARTED:
this.handleVadSpeechStarted(event);
Expand All @@ -260,9 +265,6 @@ export class VoiceAssistant {
case proto.ServerEventType.INPUT_TRANSCRIBED:
this.handleInputTranscribed(event);
break;
case proto.ServerEventType.MODEL_LISTENING:
this.handleModelListening();
break;
default:
this.logger.warn(`Unknown server event: ${JSON.stringify(event)}`);
}
Expand All @@ -282,11 +284,11 @@ export class VoiceAssistant {
this.room as Room,
this.room?.localParticipant?.identity,
trackSid,
event.item_id,
event.message_id,
);

this.setState(proto.State.SPEAKING);
this.playingHandle = this.agentPlayout.play(event.item_id as string, trFwd);
this.playingHandle = this.agentPlayout.play(event.message_id as string, trFwd);
this.playingHandle.on('complete', () => {
this.setState(proto.State.LISTENING);
});
Expand All @@ -304,22 +306,22 @@ export class VoiceAssistant {
}
}

private handleAddItem(event: proto.ServerEvent): void {
if (event.event !== proto.ServerEventType.ADD_ITEM) return;
private handleAddMessage(event: proto.ServerEvent): void {
if (event.event !== proto.ServerEventType.ADD_MESSAGE) return;
if (event.type === 'tool_call') {
this.setState(proto.State.THINKING);
this.thinking = true;
}
}

private handleItemAdded(event: proto.ServerEvent): void {
if (event.event !== proto.ServerEventType.ITEM_ADDED) return;
private handleMessageAdded(event: proto.ServerEvent): void {
if (event.event !== proto.ServerEventType.MESSAGE_ADDED) return;
switch (event.type) {
case 'tool_call': {
this.options.functions[event.name].execute(event.arguments).then((content) => {
this.thinking = false;
this.sendClientCommand({
event: proto.ClientEventType.ADD_ITEM,
event: proto.ClientEventType.ADD_MESSAGE,
type: 'tool_response',
tool_call_id: event.tool_call_id as string,
content: JSON.stringify(content),
Expand All @@ -338,7 +340,7 @@ export class VoiceAssistant {

private handleInputTranscribed(event: proto.ServerEvent): void {
if (event.event !== proto.ServerEventType.INPUT_TRANSCRIBED) return;
const itemId = event.item_id as string;
const itemId = event.message_id as string;
const transcription = event.transcript as string;
if (!itemId || transcription === undefined) {
this.logger.error('Item ID or transcription not set');
Expand All @@ -353,22 +355,9 @@ export class VoiceAssistant {
}
}

private handleModelListening(): void {
if (this.playingHandle && !this.playingHandle.done) {
this.playingHandle.interrupt();
this.sendClientCommand({
event: proto.ClientEventType.TRUNCATE_CONTENT,
message_id: this.playingHandle.messageId,
index: 0, // ignored for now (see OAI docs)
text_chars: this.playingHandle.publishedTextChars(),
audio_samples: this.playingHandle.playedAudioSamples,
});
}
}

private handleVadSpeechStarted(event: proto.ServerEvent): void {
if (event.event !== proto.ServerEventType.VAD_SPEECH_STARTED) return;
const itemId = event.item_id;
const itemId = event.message_id;
const participantIdentity = this.linkedParticipant?.identity;
const trackSid = this.subscribedTrack?.sid;
if (participantIdentity && trackSid && itemId) {
Expand All @@ -378,7 +367,7 @@ export class VoiceAssistant {
}
}

private handleTurnFinished(event: Record<string, unknown>): void {
private handleGenerationFinished(event: Record<string, unknown>): void {
if (event.reason !== 'interrupt' && event.reason !== 'stop') {
log().warn(`assistant turn finished unexpectedly reason ${event.reason}`);
}
Expand All @@ -388,6 +377,19 @@ export class VoiceAssistant {
}
}

private handleGenerationCanceled(): void {
if (this.playingHandle && !this.playingHandle.done) {
this.playingHandle.interrupt();
this.sendClientCommand({
event: proto.ClientEventType.TRUNCATE_CONTENT,
message_id: this.playingHandle.messageId,
index: 0, // ignored for now (see OAI docs)
text_chars: this.playingHandle.publishedTextChars(),
audio_samples: this.playingHandle.playedAudioSamples,
});
}
}

private linkParticipant(participantIdentity: string): void {
if (!this.room) {
this.logger.error('Room is not set');
Expand Down
87 changes: 43 additions & 44 deletions plugins/openai/src/voice_assistant/proto.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ export enum AudioFormat {
export enum ServerEventType {
START_SESSION = 'start_session',
ERROR = 'error',
ADD_ITEM = 'add_item',
ADD_MESSAGE = 'add_message',
ADD_CONTENT = 'add_content',
ITEM_ADDED = 'item_added',
TURN_FINISHED = 'turn_finished',
MESSAGE_ADDED = 'message_added',
GENERATION_FINISHED = 'generation_finished',
GENERATION_CANCELED = 'generation_canceled',
VAD_SPEECH_STARTED = 'vad_speech_started',
VAD_SPEECH_STOPPED = 'vad_speech_stopped',
INPUT_TRANSCRIBED = 'input_transcribed',
MODEL_LISTENING = 'model_listening',
}

export type ServerEvent =
Expand All @@ -43,7 +43,7 @@ export type ServerEvent =
error: string;
}
| ({
event: ServerEventType.ADD_ITEM;
event: ServerEventType.ADD_MESSAGE;
id: string;
previous_id: string;
conversation_label: string;
Expand All @@ -55,12 +55,12 @@ export type ServerEvent =
))
| {
event: ServerEventType.ADD_CONTENT;
item_id: string;
message_id: string;
type: 'text' | 'audio' | 'tool_call_arguments';
data: string; // text or base64 audio or JSON stringified object
}
| ({
event: ServerEventType.ITEM_ADDED;
event: ServerEventType.MESSAGE_ADDED;
id: string;
previous_id: string;
conversation_label: string;
Expand All @@ -74,30 +74,29 @@ export type ServerEvent =
}
))
| {
event: ServerEventType.TURN_FINISHED;
reason: 'stop' | 'max_tokens' | 'content_filter' | 'interrupt';
event: ServerEventType.GENERATION_FINISHED;
reason: 'stop' | 'max_tokens' | 'content_filter' | 'interrupt'; // FIXME: not sure these are all right
conversation_label: string;
item_ids: string[];
message_ids: string[];
}
| {
event: ServerEventType.GENERATION_CANCELED;
}
| {
event: ServerEventType.VAD_SPEECH_STARTED | ServerEventType.VAD_SPEECH_STOPPED;
sample_index: number;
item_id: string;
message_id: string;
}
| {
event: ServerEventType.INPUT_TRANSCRIBED;
item_id: string;
message_id: string;
transcript: string;
}
| {
event: ServerEventType.MODEL_LISTENING;
item_id: string;
};

export enum ClientEventType {
SET_INFERENCE_CONFIG = 'set_inference_config',
ADD_ITEM = 'add_item',
DELETE_ITEM = 'delete_item',
UPDATE_SESSION_CONFIG = 'update_session_config',
ADD_MESSAGE = 'add_message',
DELETE_MESSAGE = 'delete_message',
ADD_USER_AUDIO = 'add_user_audio',
COMMIT_PENDING_AUDIO = 'commit_pending_audio',
CLIENT_TURN_FINISHED = 'client_turn_finished',
Expand All @@ -112,38 +111,38 @@ export enum ClientEventType {

export type ClientEvent =
| ({
event: ClientEventType.SET_INFERENCE_CONFIG;
event: ClientEventType.UPDATE_SESSION_CONFIG;
} & InferenceConfig)
| ({
event: ClientEventType.ADD_ITEM;
event: ClientEventType.ADD_MESSAGE;
// id, previous_id, conversation_label are unused by us
} & (
| ({
| {
type: 'message';
} & (
| {
role: 'user' | 'assistant' | 'system';
content: [
| {
message:
| {
role: 'user' | 'assistant' | 'system';
content: [
{
type: 'text';
text: string;
}
| {
type: 'audio';
audio: string; // base64 encoded buffer
},
];
}
| {
role: 'assistant' | 'system';
content: [
{
type: 'text';
text: string;
},
];
}
))
];
}
| {
role: 'user';
content: [
| {
type: 'text';
text: string;
}
| {
type: 'audio';
audio: string; // base64 encoded buffer
},
];
};
}
| {
type: 'tool_response';
tool_call_id: string;
Expand All @@ -156,7 +155,7 @@ export type ClientEvent =
}
))
| {
event: ClientEventType.DELETE_ITEM;
event: ClientEventType.DELETE_MESSAGE;
id: string;
conversation_label?: string; // defaults to 'default'
}
Expand Down

0 comments on commit 7940a23

Please sign in to comment.