From ccff5ce34d071a0fb449da5ce77938e346679b1b Mon Sep 17 00:00:00 2001 From: Ben Cherry Date: Thu, 12 Sep 2024 15:48:08 -0700 Subject: [PATCH] Fix assistant startup process (#36) --- .changeset/dry-rules-relax.md | 6 +++ agents/package.json | 2 +- examples/package.json | 2 +- examples/src/minimal_assistant.ts | 13 ++--- plugins/openai/package.json | 2 +- plugins/openai/src/voice_assistant/index.ts | 42 +++++++++------ pnpm-lock.yaml | 58 ++++++++++----------- 7 files changed, 70 insertions(+), 55 deletions(-) create mode 100644 .changeset/dry-rules-relax.md diff --git a/.changeset/dry-rules-relax.md b/.changeset/dry-rules-relax.md new file mode 100644 index 0000000..7aeda98 --- /dev/null +++ b/.changeset/dry-rules-relax.md @@ -0,0 +1,6 @@ +--- +"@livekit/agents-plugin-openai": patch +"livekit-agents-examples": patch +--- + +Fix assistant startup process diff --git a/agents/package.json b/agents/package.json index 8c8ad7e..f4a94c7 100644 --- a/agents/package.json +++ b/agents/package.json @@ -18,7 +18,7 @@ "typescript": "^5.0.0" }, "dependencies": { - "@livekit/rtc-node": "^0.8.0", + "@livekit/rtc-node": "^0.8.1", "@livekit/protocol": "^1.21.0", "commander": "^12.0.0", "livekit-server-sdk": "^2.6.1", diff --git a/examples/package.json b/examples/package.json index 0030bef..59a590d 100644 --- a/examples/package.json +++ b/examples/package.json @@ -13,6 +13,6 @@ "@livekit/agents": "workspace:*", "@livekit/agents-plugin-elevenlabs": "workspace:*", "@livekit/agents-plugin-openai": "workspace:*", - "@livekit/rtc-node": "^0.8.0" + "@livekit/rtc-node": "^0.8.1" } } diff --git a/examples/src/minimal_assistant.ts b/examples/src/minimal_assistant.ts index 50847b0..83781d2 100644 --- a/examples/src/minimal_assistant.ts +++ b/examples/src/minimal_assistant.ts @@ -10,14 +10,11 @@ export default defineAgent({ console.log('starting assistant example agent'); - // FIXME: for some reason the remoteParticipants are not being populated at connection time nor calling onParticipantConnected - setTimeout(() => { - const assistant = new VoiceAssistant({ - ...defaultInferenceConfig, - system_message: 'You talk unprompted.', - }); - assistant.start(ctx.room); - }, 500); + const assistant = new VoiceAssistant({ + ...defaultInferenceConfig, + system_message: 'You talk unprompted.', + }); + assistant.start(ctx.room); }, }); diff --git a/plugins/openai/package.json b/plugins/openai/package.json index 9ab8a60..767967e 100644 --- a/plugins/openai/package.json +++ b/plugins/openai/package.json @@ -19,7 +19,7 @@ }, "dependencies": { "@livekit/agents": "workspace:*", - "@livekit/rtc-node": "^0.8.0", + "@livekit/rtc-node": "^0.8.1", "ws": "^8.16.0" } } diff --git a/plugins/openai/src/voice_assistant/index.ts b/plugins/openai/src/voice_assistant/index.ts index 988fbb1..3e4c23f 100644 --- a/plugins/openai/src/voice_assistant/index.ts +++ b/plugins/openai/src/voice_assistant/index.ts @@ -5,7 +5,13 @@ import { AudioByteStream } from '@livekit/agents'; import { findMicroTrackId } from '@livekit/agents'; import { log } from '@livekit/agents'; -import type { AudioFrameEvent, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node'; +import type { + AudioFrameEvent, + LocalTrackPublication, + RemoteAudioTrack, + RemoteParticipant, + Room, +} from '@livekit/rtc-node'; import { AudioFrame, AudioSource, @@ -59,13 +65,13 @@ export class VoiceAssistant { private ws: WebSocket | null = null; private connected: boolean = false; private participant: RemoteParticipant | string | null = null; - private localTrack: LocalAudioTrack | null = null; + private agentPublication: LocalTrackPublication | null = null; private localTrackSid: string | null = null; private localSource: AudioSource | null = null; private pendingMessages: Map = new Map(); start(room: Room, participant: RemoteParticipant | string | null = null): Promise { - return new Promise((resolve, reject) => { + return new Promise(async (resolve, reject) => { if (this.ws !== null) { log().warn('VoiceAssistant already started'); resolve(); @@ -79,6 +85,12 @@ export class VoiceAssistant { this.linkParticipant(participant.identity); }); + room.on(RoomEvent.TrackPublished, () => { + this.subscribeToMicrophone(); + }); + room.on(RoomEvent.TrackSubscribed, () => { + this.subscribeToMicrophone(); + }); this.room = room; this.participant = participant; @@ -98,10 +110,17 @@ export class VoiceAssistant { } this.localSource = new AudioSource(proto.SAMPLE_RATE, proto.NUM_CHANNELS); - this.localTrack = LocalAudioTrack.createAudioTrack('assistant_voice', this.localSource); + const track = LocalAudioTrack.createAudioTrack('assistant_voice', this.localSource); const options = new TrackPublishOptions(); options.source = TrackSource.SOURCE_MICROPHONE; - room.localParticipant?.publishTrack(this.localTrack, options); + this.agentPublication = (await room.localParticipant?.publishTrack(track, options)) || null; + if (!this.agentPublication) { + log().error('Failed to publish track'); + reject(new Error('Failed to publish track')); + return; + } + + await this.agentPublication.waitForSubscription(); this.ws = new WebSocket(proto.API_URL, { headers: { @@ -142,10 +161,7 @@ export class VoiceAssistant { const truncatedDataPartial = command['data'] ? { data: (command['data'] as string).slice(0, 30) + '…' } : {}; - log().debug('->', { - ...command, - ...truncatedDataPartial, - }); + log().debug(`-> ${JSON.stringify({ ...command, ...truncatedDataPartial })}`); } this.ws.send(JSON.stringify(command)); } @@ -154,11 +170,7 @@ export class VoiceAssistant { const truncatedDataPartial = event['data'] ? { data: (event['data'] as string).slice(0, 30) + '…' } : {}; - log().debug('<-', { - ...event, - ...truncatedDataPartial, - }); - + log().debug(`<- ${JSON.stringify({ ...event, ...truncatedDataPartial })}`); switch (event.event) { case proto.ServerEvent.START_SESSION: break; @@ -182,7 +194,7 @@ export class VoiceAssistant { this.handleInputTranscribed(event); break; default: - log().warn('Unknown server event:', event); + log().warn(`Unknown server event: ${JSON.stringify(event)}`); } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index a1f5790..f8a9bb9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -81,8 +81,8 @@ importers: specifier: ^1.21.0 version: 1.21.0 '@livekit/rtc-node': - specifier: ^0.8.0 - version: 0.8.0 + specifier: ^0.8.1 + version: 0.8.1 commander: specifier: ^12.0.0 version: 12.0.0 @@ -121,8 +121,8 @@ importers: specifier: workspace:* version: link:../plugins/openai '@livekit/rtc-node': - specifier: ^0.8.0 - version: 0.8.0 + specifier: ^0.8.1 + version: 0.8.1 devDependencies: typescript: specifier: ^5.0.0 @@ -156,8 +156,8 @@ importers: specifier: workspace:* version: link:../../agents '@livekit/rtc-node': - specifier: ^0.8.0 - version: 0.8.0 + specifier: ^0.8.1 + version: 0.8.1 ws: specifier: ^8.16.0 version: 8.17.0 @@ -505,8 +505,8 @@ packages: cpu: [arm64] os: [darwin] - '@livekit/rtc-node-darwin-arm64@0.8.0': - resolution: {integrity: sha512-q5Y0XM57C9MBViXtBCPUhit0gEZ3QELlQBp3H3fmiUYB7aZyPkuYqYCXbehbZANmYR/BREus/347XEv+78DQGQ==} + '@livekit/rtc-node-darwin-arm64@0.8.1': + resolution: {integrity: sha512-8gbNHmtmBV2h3HFfva50lxTPDZAA+8M4hztOP5PBftZqeTkovApsVbTZUGoVNRV2bkNlnndNUJHF4OaiZPEmHw==} engines: {node: '>= 10'} cpu: [arm64] os: [darwin] @@ -517,8 +517,8 @@ packages: cpu: [x64] os: [darwin] - '@livekit/rtc-node-darwin-x64@0.8.0': - resolution: {integrity: sha512-tn/LxrLwijJYNqt0KUsXDlodQnFLRXoIeKB68Em4t6f9MdLcAZLHC+BzyR22xA6yMVrxsg2EbWdXah7ku6Z9lQ==} + '@livekit/rtc-node-darwin-x64@0.8.1': + resolution: {integrity: sha512-NPDXfC78uByvlSqmILXp09IRbwIoxuN6tpdtaKCobPYLow/M3NJ2nJESBMnmVhqcDNFYfhza+twmfbTu5lOBXg==} engines: {node: '>= 10'} cpu: [x64] os: [darwin] @@ -529,8 +529,8 @@ packages: cpu: [arm64] os: [linux] - '@livekit/rtc-node-linux-arm64-gnu@0.8.0': - resolution: {integrity: sha512-k34GugBtELR4w+hE5zw9B0SpvQHVDKnXcIAkne5VMVS9YUQDCSiZcuT1Wzz3iHBjaZpZaM1YkC5/nGGd0Kcs7Q==} + '@livekit/rtc-node-linux-arm64-gnu@0.8.1': + resolution: {integrity: sha512-QWgtfsX/uxi4bb9dJVQe3mT6trLRIzRtVOeYlxJll7xNxPvepFqmz8Y695ZxpDNvAkhiwYAemVfd+JJOIqav/A==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] @@ -541,8 +541,8 @@ packages: cpu: [x64] os: [linux] - '@livekit/rtc-node-linux-x64-gnu@0.8.0': - resolution: {integrity: sha512-96ao1Wba6fXXoTXPFLZpOLCWwTVcQX3zYL7UHsf1EjOgD32k3ENusxr0mjsujSlQNgSevamsAixFtCpxAQgNCQ==} + '@livekit/rtc-node-linux-x64-gnu@0.8.1': + resolution: {integrity: sha512-f9Mks0HQxAtYejss2i0rPSQ0Js0lN9TmqKd6X+BgBIRgMi7Xf/z8Xu91aDpknJoRA7Zw6Yowwwnuo6oy/bZhgw==} engines: {node: '>= 10'} cpu: [x64] os: [linux] @@ -553,8 +553,8 @@ packages: cpu: [x64] os: [win32] - '@livekit/rtc-node-win32-x64-msvc@0.8.0': - resolution: {integrity: sha512-abp5bUZyDXb1nWhypirIB+vJc69p8tFBbAZKPiJks/U9Nc0ereWlSCiIkvblzc+zSgKjPFiwG0DXpNmf6A7yjA==} + '@livekit/rtc-node-win32-x64-msvc@0.8.1': + resolution: {integrity: sha512-1sV4/OY7MdlTrZx2y0HIg57c4lGhMURVqxIWYu8LLDnk8DDPcxWVUMCDbUTYzhOcyxZbQaa5vWiv6s+95xM3yA==} engines: {node: '>= 10'} cpu: [x64] os: [win32] @@ -563,8 +563,8 @@ packages: resolution: {integrity: sha512-ITRv6CCfjGC/K7aI3AZnNBD3eQnHPguL0q8SmH52quebwNn1BYW+IZAzoIa4VCNnOlrfCdrGT1DCmZQz61nn0w==} engines: {node: '>= 18'} - '@livekit/rtc-node@0.8.0': - resolution: {integrity: sha512-L7u51GFO1WhM6RWfkiIapCD1MjHL5lgnm43PRNLgrNriQI0/W8o+dR6nCHacYL+dj48cGx0rObnBcvKDPbHQUg==} + '@livekit/rtc-node@0.8.1': + resolution: {integrity: sha512-gY2EkDsQ3IFamq2uOVQuteK0GJi4/rlcXO2/XGAnbPPeaIEpv65gbE4nZw/EvU2NCT0dnlyYLoTKLrqcGBq12g==} engines: {node: '>= 18'} '@manypkg/find-root@1.1.0': @@ -3424,31 +3424,31 @@ snapshots: '@livekit/rtc-node-darwin-arm64@0.7.0': optional: true - '@livekit/rtc-node-darwin-arm64@0.8.0': + '@livekit/rtc-node-darwin-arm64@0.8.1': optional: true '@livekit/rtc-node-darwin-x64@0.7.0': optional: true - '@livekit/rtc-node-darwin-x64@0.8.0': + '@livekit/rtc-node-darwin-x64@0.8.1': optional: true '@livekit/rtc-node-linux-arm64-gnu@0.7.0': optional: true - '@livekit/rtc-node-linux-arm64-gnu@0.8.0': + '@livekit/rtc-node-linux-arm64-gnu@0.8.1': optional: true '@livekit/rtc-node-linux-x64-gnu@0.7.0': optional: true - '@livekit/rtc-node-linux-x64-gnu@0.8.0': + '@livekit/rtc-node-linux-x64-gnu@0.8.1': optional: true '@livekit/rtc-node-win32-x64-msvc@0.7.0': optional: true - '@livekit/rtc-node-win32-x64-msvc@0.8.0': + '@livekit/rtc-node-win32-x64-msvc@0.8.1': optional: true '@livekit/rtc-node@0.7.0': @@ -3462,16 +3462,16 @@ snapshots: '@livekit/rtc-node-linux-x64-gnu': 0.7.0 '@livekit/rtc-node-win32-x64-msvc': 0.7.0 - '@livekit/rtc-node@0.8.0': + '@livekit/rtc-node@0.8.1': dependencies: '@bufbuild/protobuf': 1.9.0 typed-emitter: 2.1.0 optionalDependencies: - '@livekit/rtc-node-darwin-arm64': 0.8.0 - '@livekit/rtc-node-darwin-x64': 0.8.0 - '@livekit/rtc-node-linux-arm64-gnu': 0.8.0 - '@livekit/rtc-node-linux-x64-gnu': 0.8.0 - '@livekit/rtc-node-win32-x64-msvc': 0.8.0 + '@livekit/rtc-node-darwin-arm64': 0.8.1 + '@livekit/rtc-node-darwin-x64': 0.8.1 + '@livekit/rtc-node-linux-arm64-gnu': 0.8.1 + '@livekit/rtc-node-linux-x64-gnu': 0.8.1 + '@livekit/rtc-node-win32-x64-msvc': 0.8.1 '@manypkg/find-root@1.1.0': dependencies: