Skip to content

Commit

Permalink
Reintroduce TTS WS
Browse files Browse the repository at this point in the history
  • Loading branch information
dvonthenen committed Sep 9, 2024
1 parent 0d19615 commit c7977f3
Show file tree
Hide file tree
Showing 63 changed files with 4,547 additions and 332 deletions.
6 changes: 6 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ issues:
- path: pkg/client/listen/v1/websocket/new_using_chan.go
linters:
- gocritic
- path: pkg/client/speak/v1/websocket/client_callback.go
linters:
- dupl
- path: pkg/client/speak/v1/websocket/client_channel.go
linters:
- dupl
- path: pkg/client/listen/v1/websocket/client_callback.go
linters:
- dupl
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ For documentation relating to Speech-to-Text (and Intelligence) from PreRecorded

For documentation relating to Text-to-Speech:

- WebSocket:
- Speak REST Client - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/websocket](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/websocket)
- Speak REST API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket)
- Speak API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket/interfaces](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket/interfaces)

- REST:
- Speak REST Client - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/rest](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/rest)
- Speak REST API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/rest](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/rest)
Expand Down Expand Up @@ -207,6 +212,11 @@ Speech-to-Text - Live Audio:
- From a Microphone - [examples/speech-to-text/websocket/microphone](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/speech-to-text/websocket/microphone/main.go)
- From an HTTP Endpoint - [examples/speech-to-text/websocket/http](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/speech-to-text/websocket/http/main.go)

Text-to-Speech - WebSocket

- Websocket Simple Example - [examples/text-to-speech/websocket/simple](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/websocket/simple/main.go)
- Interactive Websocket - [examples/text-to-speech/websocket/interactive](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/websocket/interactive/main.go)

Text-to-Speech - REST

- Save audio to a Path - [examples/text-to-speech/rest/file](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/rest/file/main.go)
Expand Down
2 changes: 1 addition & 1 deletion docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ import (
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/listen/v1/websocket"
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/manage/v1"
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/rest"
// _ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket"
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket"
)
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func main() {
callback = *NewMyHandler()

// create a Deepgram client
dgClient, err := client.NewWSUsingChan(ctx, "", cOptions, tOptions, &callback)
dgClient, err := client.NewWSUsingChan(ctx, "", cOptions, tOptions, callback)
if err != nil {
fmt.Println("ERROR creating LiveTranscription connection:", err)
return
Expand Down
2 changes: 1 addition & 1 deletion examples/speech-to-text/websocket/replay/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ func main() {
}

// create a Deepgram client
dgClient, err := client.NewWebSocketForDemo(ctx, options)
dgClient, err := client.NewWSUsingChanForDemo(ctx, options)
if err != nil {
log.Println("ERROR creating LiveTranscription connection:", err)
return
Expand Down
2 changes: 1 addition & 1 deletion examples/speech-to-text/websocket/test/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func main() {
}

// create a Deepgram client
dgClient, err := client.NewWebSocket(ctx, "", cOptions, tOptions, nil)
dgClient, err := client.NewWSUsingChan(ctx, "", cOptions, tOptions, nil)
if err != nil {
fmt.Println("ERROR creating LiveTranscription connection:", err)
return
Expand Down
6 changes: 4 additions & 2 deletions examples/text-to-speech/rest/file/hello-world/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (

const (
textToSpeech string = "Hello, World!"
filePath string = "./test.mp3"
filePath string = "./test.wav"
)

func main() {
Expand All @@ -33,7 +33,9 @@ func main() {

// set the Transcription options
options := &interfaces.SpeakOptions{
Model: "aura-asteria-en",
Model: "aura-asteria-en",
Encoding: "linear16",
SampleRate: 48000,
}

// create a Deepgram client
Expand Down
209 changes: 209 additions & 0 deletions examples/text-to-speech/websocket/interactive_callback/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package main

import (
"bufio"
"context"
"fmt"
"os"
"strings"
"time"

msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces"
interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces"
speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak"
)

const (
TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
AUDIO_FILE = "output.wav"
)

// Implement your own callback
type MyCallback struct{}

func (c MyCallback) Open(or *msginterfaces.OpenResponse) error {
fmt.Printf("\n[Open] Received\n")
return nil
}

func (c MyCallback) Metadata(md *msginterfaces.MetadataResponse) error {
fmt.Printf("\n[Metadata] Received\n")
fmt.Printf("Metadata.RequestID: %s\n", strings.TrimSpace(md.RequestID))
return nil
}

func (c MyCallback) Binary(byMsg []byte) error {
fmt.Printf("\n[Binary] Received\n")

file, err := os.OpenFile(AUDIO_FILE, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o666)
if err != nil {
fmt.Printf("Error creating file %s: %v\n", AUDIO_FILE, err)
return err
}

_, err = file.Write(byMsg)
file.Close()

if err != nil {
fmt.Printf("Error writing audio data to file: %v\n", err)
return err
}

return nil
}

func (c MyCallback) Flush(fl *msginterfaces.FlushedResponse) error {
fmt.Printf("\n[Flushed] Received\n")
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
return nil
}

func (c MyCallback) Clear(fl *msginterfaces.ClearedResponse) error {
fmt.Printf("\n[Cleared] Received\n")
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
return nil
}

func (c MyCallback) Close(cr *msginterfaces.CloseResponse) error {
fmt.Printf("\n[Close] Received\n")
return nil
}

func (c MyCallback) Warning(wr *msginterfaces.WarningResponse) error {
fmt.Printf("\n[Warning] Received\n")
fmt.Printf("Warning.Code: %s\n", wr.WarnCode)
fmt.Printf("Warning.Description: %s\n\n", wr.WarnMsg)
return nil
}

func (c MyCallback) Error(er *msginterfaces.ErrorResponse) error {
fmt.Printf("\n[Error] Received\n")
fmt.Printf("Error.Code: %s\n", er.ErrCode)
fmt.Printf("Error.Description: %s\n\n", er.ErrMsg)
return nil
}

func (c MyCallback) UnhandledEvent(byData []byte) error {
// handle the unhandled event
fmt.Printf("\n[UnhandledEvent] Received\n")
fmt.Printf("UnhandledEvent: %s\n\n", string(byData))
return nil
}

func main() {
// init library
speak.InitWithDefault()

// Go context
ctx := context.Background()

// print instructions
fmt.Print("\n\nPress ENTER to exit!\n\n")

// set the Client options
cOptions := &interfaces.ClientOptions{
// AutoFlushSpeakDelta: 1000,
}

// set the TTS options
ttsOptions := &interfaces.WSSpeakOptions{
Model: "aura-asteria-en",
Encoding: "linear16",
SampleRate: 48000,
}

// create the callback
callback := MyCallback{}

// create a new stream using the NewStream function
dgClient, err := speak.NewWSUsingCallback(ctx, "", cOptions, ttsOptions, callback)
if err != nil {
fmt.Println("ERROR creating TTS connection:", err)
return
}

// connect the websocket to Deepgram
bConnected := dgClient.Connect()
if !bConnected {
fmt.Println("Client.Connect failed")
os.Exit(1)
}

// Simulate user input to reset the buffer, flush, send new text, or just exit
time.Sleep(2 * time.Second)
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
input := bufio.NewScanner(os.Stdin)
for input.Scan() {
switch input.Text() {
case "r":
err = dgClient.Reset()
if err != nil {
fmt.Printf("Error resetting buffer: %v\n", err)
} else {
fmt.Println("Buffer reset successfully.")
}
case "f":
// delete file if exists
_ = os.Remove(AUDIO_FILE)

file, err := os.OpenFile(AUDIO_FILE, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o666)
if err != nil {
fmt.Printf("Failed to open file. Err: %v\n", err)
return
}

// Add a wav audio container header to the file if you want to play the audio
// using a media player like VLC, Media Player, or Apple Music
header := []byte{
0x52, 0x49, 0x46, 0x46, // "RIFF"
0x00, 0x00, 0x00, 0x00, // Placeholder for file size
0x57, 0x41, 0x56, 0x45, // "WAVE"
0x66, 0x6d, 0x74, 0x20, // "fmt "
0x10, 0x00, 0x00, 0x00, // Chunk size (16)
0x01, 0x00, // Audio format (1 for PCM)
0x01, 0x00, // Number of channels (1)
0x80, 0xbb, 0x00, 0x00, // Sample rate (48000)
0x00, 0xee, 0x02, 0x00, // Byte rate (48000 * 2)
0x02, 0x00, // Block align (2)
0x10, 0x00, // Bits per sample (16)
0x64, 0x61, 0x74, 0x61, // "data"
0x00, 0x00, 0x00, 0x00, // Placeholder for data size
}

_, err = file.Write(header)
if err != nil {
fmt.Printf("Failed to write header to file. Err: %v\n", err)
return
}
file.Close()

err = dgClient.Flush()
if err != nil {
fmt.Printf("Error flushing buffer: %v\n", err)
} else {
fmt.Println("Buffer flushed successfully.")
}
case "":
goto EXIT
default:
err = dgClient.SpeakWithText(input.Text())
if err != nil {
fmt.Printf("Error sending text input: %v\n", err)
} else {
fmt.Println("Text sent successfully.")
}
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
}
}

EXIT:

// close the connection
dgClient.Stop()

fmt.Printf("Program exiting...\n")
}
Loading

0 comments on commit c7977f3

Please sign in to comment.