Skip to content

Commit

Permalink
Reintroduce TTS WS
Browse files Browse the repository at this point in the history
  • Loading branch information
dvonthenen committed Jul 15, 2024
1 parent 8287a80 commit d11c946
Show file tree
Hide file tree
Showing 14 changed files with 1,784 additions and 68 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ For documentation relating to Speech-to-Text (and Intelligence) from PreRecorded

For documentation relating to Text-to-Speech:

- WebSocket:
- Speak REST Client - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/websocket](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/websocket)
- Speak REST API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket)
- Speak API Interfaces - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket/interfaces](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/websocket/interfaces)

- REST:
- Speak REST Client - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/rest](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/client/speak/v1/rest)
- Speak REST API - [https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/rest](https://pkg.go.dev/github.com/deepgram/deepgram-go-sdk@main/pkg/api/speak/v1/rest)
Expand Down Expand Up @@ -153,6 +158,11 @@ Speech-to-Text - Live Audio:
- From a Microphone - [examples/speech-to-text/websocket/microphone](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/speech-to-text/websocket/microphone/main.go)
- From an HTTP Endpoint - [examples/speech-to-text/websocket/http](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/speech-to-text/websocket/http/main.go)

Text-to-Speech - WebSocket

- Websocket Simple Example - [examples/text-to-speech/websocket/simple](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/websocket/simple/main.go)
- Interactive Websocket - [examples/text-to-speech/websocket/interactive](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/websocket/interactive/main.go)

Text-to-Speech - REST

- Save audio to a Path - [examples/text-to-speech/rest/file](https://github.com/deepgram/deepgram-go-sdk/blob/main/examples/text-to-speech/rest/file/main.go)
Expand Down
2 changes: 1 addition & 1 deletion docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ import (
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/listen/v1/websocket"
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/manage/v1"
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/rest"
// _ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket"
_ "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket"
)
159 changes: 159 additions & 0 deletions examples/text-to-speech/websocket/interactive/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package main

import (
"bufio"
"context"
"fmt"
"os"
"strings"
"time"

msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces"
interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces"
speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak"
)

const (
API_KEY = ""
TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
AUDIO_FILE = "output.mp3"
)

// Implement your own callback
type MyCallback struct{}

func (c MyCallback) Metadata(md *msginterfaces.MetadataResponse) error {
fmt.Printf("\n[Metadata] Received\n")
fmt.Printf("Metadata.RequestID: %s\n", strings.TrimSpace(md.RequestID))
return nil
}

func (c MyCallback) Binary(byMsg []byte) error {
fmt.Printf("\n[Binary] Received\n")

file, err := os.OpenFile(AUDIO_FILE, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o666)
if err != nil {
fmt.Printf("Error creating file %s: %v\n", AUDIO_FILE, err)
return err
}
defer file.Close()

_, err = file.Write(byMsg)
if err != nil {
fmt.Printf("Error writing audio data to file: %v\n", err)
return err
}

fmt.Printf("Audio data saved to %s\n", AUDIO_FILE)
return nil
}

func (c MyCallback) Flush(fl *msginterfaces.FlushedResponse) error {
fmt.Printf("\n[Flushed] Received\n")
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
return nil
}

func (c MyCallback) Warning(wr *msginterfaces.WarningResponse) error {
fmt.Printf("\n[Warning] Received\n")
fmt.Printf("Warning.Code: %s\n", wr.WarnCode)
fmt.Printf("Warning.Description: %s\n\n", wr.WarnMsg)
return nil
}

func (c MyCallback) Error(er *msginterfaces.ErrorResponse) error {
fmt.Printf("\n[Error] Received\n")
fmt.Printf("Error.Code: %s\n", er.ErrCode)
fmt.Printf("Error.Description: %s\n\n", er.Description)
return nil
}

func (c MyCallback) Close(cr *msginterfaces.CloseResponse) error {
fmt.Printf("\n[Close] Received\n")
return nil
}

func (c MyCallback) Open(or *msginterfaces.OpenResponse) error {
fmt.Printf("\n[Open] Received\n")
return nil
}

func main() {
// init library
speak.InitWithDefault()

// Go context
ctx := context.Background()

// print instructions
fmt.Print("\n\nPress ENTER to exit!\n\n")

// set the TTS options
ttsOptions := &interfaces.SpeakOptions{
Model: "aura-asteria-en",
}

// set the Client options
cOptions := &interfaces.ClientOptions{}

// create the callback
callback := MyCallback{}

// create a new stream using the NewStream function
dgClient, err := speak.NewWebSocket(ctx, "", cOptions, ttsOptions, callback)
if err != nil {
fmt.Println("ERROR creating TTS connection:", err)
return
}

// connect the websocket to Deepgram
bConnected := dgClient.Connect()
if !bConnected {
fmt.Println("Client.Connect failed")
os.Exit(1)
}

// Simulate user input to reset the buffer, flush, send new text, or just exit
time.Sleep(2 * time.Second)
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
input := bufio.NewScanner(os.Stdin)
for input.Scan() {
switch input.Text() {
case "r":
err = dgClient.Reset()
if err != nil {
fmt.Printf("Error resetting buffer: %v\n", err)
} else {
fmt.Println("Buffer reset successfully.")
}
case "f":
err = dgClient.Flush()
if err != nil {
fmt.Printf("Error flushing buffer: %v\n", err)
} else {
fmt.Println("Buffer flushed successfully.")
}
case "":
goto EXIT
default:
err = dgClient.SpeakWithText(input.Text())
if err != nil {
fmt.Printf("Error sending text input: %v\n", err)
} else {
fmt.Println("Text sent successfully.")
}
fmt.Printf("\n\nPress 'r' and ENTER to reset the buffer, 'f' and ENTER to flush, enter new text to send it, or just ENTER to exit...\n\n> ")
}
}

EXIT:

// close the connection
dgClient.Stop()

fmt.Printf("Program exiting...\n")
}
138 changes: 138 additions & 0 deletions examples/text-to-speech/websocket/simple/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package main

import (
"context"
"fmt"
"os"
"strings"
"time"

msginterfaces "github.com/deepgram/deepgram-go-sdk/pkg/api/speak/v1/websocket/interfaces"
interfaces "github.com/deepgram/deepgram-go-sdk/pkg/client/interfaces/v1"
speak "github.com/deepgram/deepgram-go-sdk/pkg/client/speak"
)

const (
API_KEY = ""
TTS_TEXT = "Hello, this is a text to speech example using Deepgram."
AUDIO_FILE = "output.mp3"
)

// Implement your own callback
type MyCallback struct{}

func (c MyCallback) Metadata(md *msginterfaces.MetadataResponse) error {
fmt.Printf("\n[Metadata] Received\n")
fmt.Printf("Metadata.RequestID: %s\n", strings.TrimSpace(md.RequestID))
return nil
}

func (c MyCallback) Binary(byMsg []byte) error {
fmt.Printf("\n[Binary] Received\n")

file, err := os.OpenFile(AUDIO_FILE, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o666)
if err != nil {
fmt.Printf("Error creating file %s: %v\n", AUDIO_FILE, err)
return err
}
defer file.Close()

_, err = file.Write(byMsg)
if err != nil {
fmt.Printf("Error writing audio data to file: %v\n", err)
return err
}

fmt.Printf("Audio data saved to %s\n", AUDIO_FILE)
return nil
}

func (c MyCallback) Flush(fl *msginterfaces.FlushedResponse) error {
fmt.Printf("\n[Flushed] Received\n")
return nil
}

func (c MyCallback) Warning(wr *msginterfaces.WarningResponse) error {
fmt.Printf("\n[Warning] Received\n")
fmt.Printf("Warning.Code: %s\n", wr.WarnCode)
fmt.Printf("Warning.Description: %s\n\n", wr.WarnMsg)
return nil
}

func (c MyCallback) Error(er *msginterfaces.ErrorResponse) error {
fmt.Printf("\n[Error] Received\n")
fmt.Printf("Error.Code: %s\n", er.ErrCode)
fmt.Printf("Error.Description: %s\n\n", er.ErrMsg)
return nil
}

func (c MyCallback) Close(cr *msginterfaces.CloseResponse) error {
fmt.Printf("\n[Close] Received\n")
return nil
}

func (c MyCallback) Open(or *msginterfaces.OpenResponse) error {
fmt.Printf("\n[Open] Received\n")
return nil
}

func main() {
// init library
speak.Init(speak.InitLib{
LogLevel: speak.LogLevelDefault, // LogLevelDefault, LogLevelFull, LogLevelDebug, LogLevelTrace
})

// Go context
ctx := context.Background()

// set the Client options
cOptions := &interfaces.ClientOptions{}

// set the TTS options
ttsOptions := &interfaces.SpeakOptions{
Model: "aura-asteria-en",
}

// create the callback
callback := MyCallback{}

// create a new stream using the NewStream function
dgClient, err := speak.NewWebSocket(ctx, "", cOptions, ttsOptions, callback)
if err != nil {
fmt.Println("ERROR creating TTS connection:", err)
return
}

// connect the websocket to Deepgram
bConnected := dgClient.Connect()
if !bConnected {
fmt.Println("Client.Connect failed")
os.Exit(1)
}

// Send the text input
err = dgClient.SpeakWithText(TTS_TEXT)
if err != nil {
fmt.Printf("Error sending text input: %v\n", err)
return
}

// Flush the text input
err = dgClient.Flush()
if err != nil {
fmt.Printf("Error sending text input: %v\n", err)
return
}

// wait for user input to exit
time.Sleep(5 * time.Second)

// close the connection
dgClient.Stop()

fmt.Printf("Program exiting...\n")
}
25 changes: 25 additions & 0 deletions pkg/api/speak/v1/websocket/constants.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
// Use of this source code is governed by a MIT license that can be found in the LICENSE file.
// SPDX-License-Identifier: MIT

package websocketv1

import (
"errors"
)

const (
PackageVersion string = "v1.0"
)

// errors
var (
// ErrInvalidInput required input was not found
ErrInvalidInput = errors.New("required input was not found")

// ErrInvalidMessageType invalid message type
ErrInvalidMessageType = errors.New("invalid message type")

// ErrUserCallbackNotDefined user callback object not defined
ErrUserCallbackNotDefined = errors.New("user callback object not defined")
)
Loading

0 comments on commit d11c946

Please sign in to comment.