-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial realtime voice assistant spike (#27)
- Loading branch information
Showing
16 changed files
with
751 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
--- | ||
"@livekit/agents": patch | ||
"@livekit/agents-plugin-openai": patch | ||
--- | ||
|
||
Add transcript support to realtime voice assistant |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
use flake |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
import { AudioFrame } from '@livekit/rtc-node'; | ||
import { log } from './log.js'; | ||
|
||
export class AudioByteStream { | ||
private sampleRate: number; | ||
private numChannels: number; | ||
private bytesPerFrame: number; | ||
private buf: Int8Array; | ||
|
||
constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) { | ||
this.sampleRate = sampleRate; | ||
this.numChannels = numChannels; | ||
|
||
if (samplesPerChannel === null) { | ||
samplesPerChannel = Math.floor(sampleRate / 50); // 20ms by default | ||
} | ||
|
||
this.bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16) | ||
this.buf = new Int8Array(); | ||
} | ||
|
||
write(data: ArrayBuffer): AudioFrame[] { | ||
this.buf = new Int8Array([...this.buf, ...new Int8Array(data)]); | ||
|
||
const frames: AudioFrame[] = []; | ||
while (this.buf.length >= this.bytesPerFrame) { | ||
const frameData = this.buf.slice(0, this.bytesPerFrame); | ||
this.buf = this.buf.slice(this.bytesPerFrame); | ||
|
||
frames.push( | ||
new AudioFrame( | ||
new Int16Array(frameData.buffer), | ||
this.sampleRate, | ||
this.numChannels, | ||
frameData.length / 2, | ||
), | ||
); | ||
} | ||
|
||
return frames; | ||
} | ||
|
||
flush(): AudioFrame[] { | ||
if (this.buf.length % (2 * this.numChannels) !== 0) { | ||
log().warn('AudioByteStream: incomplete frame during flush, dropping'); | ||
return []; | ||
} | ||
|
||
return [ | ||
new AudioFrame( | ||
new Int16Array(this.buf.buffer), | ||
this.sampleRate, | ||
this.numChannels, | ||
this.buf.length / 2, | ||
), | ||
]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
import { type JobContext, WorkerOptions, cli, defineAgent } from '@livekit/agents'; | ||
import { VoiceAssistant, defaultInferenceConfig } from '@livekit/agents-plugin-openai'; | ||
|
||
export default defineAgent({ | ||
entry: async (ctx: JobContext) => { | ||
await ctx.connect(); | ||
|
||
console.log('starting assistant example agent'); | ||
|
||
// FIXME: for some reason the remoteParticipants are not being populated at connection time nor calling onParticipantConnected | ||
setTimeout(() => { | ||
const assistant = new VoiceAssistant({ | ||
...defaultInferenceConfig, | ||
system_message: 'You talk unprompted.', | ||
}); | ||
assistant.start(ctx.room); | ||
}, 500); | ||
}, | ||
}); | ||
|
||
// check that we're running this file and not importing functions from it | ||
// without this if closure, our code would start` a new Agents process on every job process. | ||
if (process.argv[1] === import.meta.filename) { | ||
cli.runApp(new WorkerOptions({ agent: import.meta.filename })); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/** | ||
* Config file for API Extractor. For more info, please visit: https://api-extractor.com | ||
*/ | ||
{ | ||
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", | ||
|
||
/** | ||
* Optionally specifies another JSON config file that this file extends from. This provides a way for | ||
* standard settings to be shared across multiple projects. | ||
* | ||
* If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains | ||
* the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be | ||
* resolved using NodeJS require(). | ||
* | ||
* SUPPORTED TOKENS: none | ||
* DEFAULT VALUE: "" | ||
*/ | ||
"extends": "../../api-extractor-shared.json", | ||
"mainEntryPointFilePath": "./dist/index.d.ts" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
"name": "@livekit/agents-plugin-openai", | ||
"version": "0.1.0", | ||
"description": "OpenAI plugin for LiveKit Node Agents", | ||
"main": "dist/index.js", | ||
"types": "dist/index.d.ts", | ||
"author": "LiveKit", | ||
"type": "module", | ||
"scripts": { | ||
"build": "tsc", | ||
"lint": "eslint -f unix \"src/**/*.{ts,js}\"", | ||
"api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", | ||
"api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" | ||
}, | ||
"devDependencies": { | ||
"@microsoft/api-extractor": "^7.35.0", | ||
"@types/ws": "^8.5.10", | ||
"typescript": "^5.0.0" | ||
}, | ||
"dependencies": { | ||
"@livekit/agents": "workspace:*", | ||
"@livekit/rtc-node": "^0.8.0", | ||
"ws": "^8.16.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
// SPDX-FileCopyrightText: 2024 LiveKit, Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
export * from './voice_assistant/index.js'; |
Oops, something went wrong.