Skip to content

Commit

Permalink
Fix assistant startup process (#36)
Browse files Browse the repository at this point in the history
  • Loading branch information
bcherry authored Sep 12, 2024
1 parent 4aeaae9 commit ccff5ce
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 55 deletions.
6 changes: 6 additions & 0 deletions .changeset/dry-rules-relax.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@livekit/agents-plugin-openai": patch
"livekit-agents-examples": patch
---

Fix assistant startup process
2 changes: 1 addition & 1 deletion agents/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"typescript": "^5.0.0"
},
"dependencies": {
"@livekit/rtc-node": "^0.8.0",
"@livekit/rtc-node": "^0.8.1",
"@livekit/protocol": "^1.21.0",
"commander": "^12.0.0",
"livekit-server-sdk": "^2.6.1",
Expand Down
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
"@livekit/agents": "workspace:*",
"@livekit/agents-plugin-elevenlabs": "workspace:*",
"@livekit/agents-plugin-openai": "workspace:*",
"@livekit/rtc-node": "^0.8.0"
"@livekit/rtc-node": "^0.8.1"
}
}
13 changes: 5 additions & 8 deletions examples/src/minimal_assistant.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,11 @@ export default defineAgent({

console.log('starting assistant example agent');

// FIXME: for some reason the remoteParticipants are not being populated at connection time nor calling onParticipantConnected
setTimeout(() => {
const assistant = new VoiceAssistant({
...defaultInferenceConfig,
system_message: 'You talk unprompted.',
});
assistant.start(ctx.room);
}, 500);
const assistant = new VoiceAssistant({
...defaultInferenceConfig,
system_message: 'You talk unprompted.',
});
assistant.start(ctx.room);
},
});

Expand Down
2 changes: 1 addition & 1 deletion plugins/openai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
},
"dependencies": {
"@livekit/agents": "workspace:*",
"@livekit/rtc-node": "^0.8.0",
"@livekit/rtc-node": "^0.8.1",
"ws": "^8.16.0"
}
}
42 changes: 27 additions & 15 deletions plugins/openai/src/voice_assistant/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@
import { AudioByteStream } from '@livekit/agents';
import { findMicroTrackId } from '@livekit/agents';
import { log } from '@livekit/agents';
import type { AudioFrameEvent, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
import type {
AudioFrameEvent,
LocalTrackPublication,
RemoteAudioTrack,
RemoteParticipant,
Room,
} from '@livekit/rtc-node';
import {
AudioFrame,
AudioSource,
Expand Down Expand Up @@ -59,13 +65,13 @@ export class VoiceAssistant {
private ws: WebSocket | null = null;
private connected: boolean = false;
private participant: RemoteParticipant | string | null = null;
private localTrack: LocalAudioTrack | null = null;
private agentPublication: LocalTrackPublication | null = null;
private localTrackSid: string | null = null;
private localSource: AudioSource | null = null;
private pendingMessages: Map<string, string> = new Map();

start(room: Room, participant: RemoteParticipant | string | null = null): Promise<void> {
return new Promise((resolve, reject) => {
return new Promise(async (resolve, reject) => {
if (this.ws !== null) {
log().warn('VoiceAssistant already started');
resolve();
Expand All @@ -79,6 +85,12 @@ export class VoiceAssistant {

this.linkParticipant(participant.identity);
});
room.on(RoomEvent.TrackPublished, () => {
this.subscribeToMicrophone();
});
room.on(RoomEvent.TrackSubscribed, () => {
this.subscribeToMicrophone();
});

this.room = room;
this.participant = participant;
Expand All @@ -98,10 +110,17 @@ export class VoiceAssistant {
}

this.localSource = new AudioSource(proto.SAMPLE_RATE, proto.NUM_CHANNELS);
this.localTrack = LocalAudioTrack.createAudioTrack('assistant_voice', this.localSource);
const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.localSource);
const options = new TrackPublishOptions();
options.source = TrackSource.SOURCE_MICROPHONE;
room.localParticipant?.publishTrack(this.localTrack, options);
this.agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null;
if (!this.agentPublication) {
log().error('Failed to publish track');
reject(new Error('Failed to publish track'));
return;
}

await this.agentPublication.waitForSubscription();

this.ws = new WebSocket(proto.API_URL, {
headers: {
Expand Down Expand Up @@ -142,10 +161,7 @@ export class VoiceAssistant {
const truncatedDataPartial = command['data']
? { data: (command['data'] as string).slice(0, 30) + '…' }
: {};
log().debug('->', {
...command,
...truncatedDataPartial,
});
log().debug(`-> ${JSON.stringify({ ...command, ...truncatedDataPartial })}`);
}
this.ws.send(JSON.stringify(command));
}
Expand All @@ -154,11 +170,7 @@ export class VoiceAssistant {
const truncatedDataPartial = event['data']
? { data: (event['data'] as string).slice(0, 30) + '…' }
: {};
log().debug('<-', {
...event,
...truncatedDataPartial,
});

log().debug(`<- ${JSON.stringify({ ...event, ...truncatedDataPartial })}`);
switch (event.event) {
case proto.ServerEvent.START_SESSION:
break;
Expand All @@ -182,7 +194,7 @@ export class VoiceAssistant {
this.handleInputTranscribed(event);
break;
default:
log().warn('Unknown server event:', event);
log().warn(`Unknown server event: ${JSON.stringify(event)}`);
}
}

Expand Down
58 changes: 29 additions & 29 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit ccff5ce

Please sign in to comment.