[HOLD] Reintroduce TTS WS

deepgram · Sep 3, 2024 · c35f190 · c35f190
1 parent 4f2ed8f
commit c35f190
Show file tree

Hide file tree

Showing 30 changed files with 2,980 additions and 216 deletions.
diff --git a/deepgram/__init__.py b/deepgram/__init__.py
@@ -96,12 +96,12 @@
 from .client import (
     SpeakOptions,
     SpeakRESTOptions,
-    # SpeakWebSocketOptions,
+    SpeakWSOptions,
     # FileSource,
     SpeakRestSource,
     SpeakSource,
 )
-from .client import SpeakWebSocketEvents
+from .client import SpeakWebSocketEvents, SpeakWebSocketMessage
 
 ## speak REST
 from .client import (
@@ -115,21 +115,22 @@
     SpeakRESTResponse,
 )
 
-# ## speak WebSocket
-# from .client import (
-#     SpeakWebSocketClient,
-#     AsyncSpeakWebSocketClient,
-# )
-# from .client import (
-#     SpeakWebSocketResponse,
-#     # OpenResponse,
-#     # MetadataResponse,
-#     FlushedResponse,
-#     # CloseResponse,
-#     # UnhandledResponse,
-#     WarningResponse,
-#     # ErrorResponse,
-# )
+## speak WebSocket
+from .client import (
+    SpeakWebSocketClient,
+    AsyncSpeakWebSocketClient,
+    SpeakWSClient,
+    AsyncSpeakWSClient,
+)
+from .client import (
+    # OpenResponse,
+    # MetadataResponse,
+    FlushedResponse,
+    # CloseResponse,
+    # UnhandledResponse,
+    WarningResponse,
+    # ErrorResponse,
+)
 
 # manage
 from .client import ManageClient, AsyncManageClient
@@ -173,10 +174,26 @@
 )
 
 # utilities
+# pylint: disable=wrong-import-position
 from .audio import Microphone
 from .audio import (
-    LOGGING,
-    CHANNELS,
-    RATE,
-    CHUNK,
+    INPUT_LOGGING,
+    INPUT_CHANNELS,
+    INPUT_RATE,
+    INPUT_CHUNK,
 )
+
+LOGGING = INPUT_LOGGING
+CHANNELS = INPUT_CHANNELS
+RATE = INPUT_RATE
+CHUNK = INPUT_CHUNK
+
+from .audio import Speaker
+from .audio import (
+    OUTPUT_LOGGING,
+    OUTPUT_CHANNELS,
+    OUTPUT_RATE,
+    OUTPUT_CHUNK,
+)
+
+# pylint: enable=wrong-import-position
diff --git a/deepgram/audio/__init__.py b/deepgram/audio/__init__.py
@@ -3,4 +3,17 @@
 # SPDX-License-Identifier: MIT
 
 from .microphone import Microphone
-from .microphone import LOGGING, CHANNELS, RATE, CHUNK
+from .microphone import (
+    LOGGING as INPUT_LOGGING,
+    CHANNELS as INPUT_CHANNELS,
+    RATE as INPUT_RATE,
+    CHUNK as INPUT_CHUNK,
+)
+
+from .speaker import Speaker
+from .speaker import (
+    LOGGING as OUTPUT_LOGGING,
+    CHANNELS as OUTPUT_CHANNELS,
+    RATE as OUTPUT_RATE,
+    CHUNK as OUTPUT_CHUNK,
+)
diff --git a/deepgram/audio/microphone/constants.py b/deepgram/audio/microphone/constants.py
@@ -5,7 +5,6 @@
 from ...utils import verboselogs
 
 # Constants for microphone
-
 LOGGING = verboselogs.WARNING
 CHANNELS = 1
 RATE = 16000

diff --git a/deepgram/audio/microphone/microphone.py b/deepgram/audio/microphone/microphone.py
@@ -5,7 +5,7 @@
 import inspect
 import asyncio
 import threading
-from typing import Optional, Callable, TYPE_CHECKING
+from typing import Optional, Callable, Union, TYPE_CHECKING
 import logging
 
 from ...utils import verboselogs
@@ -21,10 +21,10 @@ class Microphone:  # pylint: disable=too-many-instance-attributes
     """
 
     _logger: verboselogs.VerboseLogger
-    _exit: threading.Event
 
     _audio: "pyaudio.PyAudio"
     _stream: "pyaudio.Stream"
+
     _chunk: int
     _rate: int
     _format: int
@@ -34,9 +34,10 @@ class Microphone:  # pylint: disable=too-many-instance-attributes
 
     _asyncio_loop: asyncio.AbstractEventLoop
     _asyncio_thread: threading.Thread
+    _exit: threading.Event
 
-    _push_callback_org: object
-    _push_callback: object
+    _push_callback_org: Optional[Callable] = None
+    _push_callback: Optional[Callable] = None
 
     def __init__(
         self,
@@ -53,6 +54,7 @@ def __init__(
         self._logger = verboselogs.VerboseLogger(__name__)
         self._logger.addHandler(logging.StreamHandler())
         self._logger.setLevel(verbose)
+
         self._exit = threading.Event()
 
         self._audio = pyaudio.PyAudio()
@@ -71,9 +73,16 @@ def _start_asyncio_loop(self) -> None:
 
     def is_active(self) -> bool:
         """
-        returns True if the stream is active, False otherwise
+        is_active - returns the state of the stream
+
+        Args:
+            None
+
+        Returns:
+            True if the stream is active, False otherwise
         """
         self._logger.debug("Microphone.is_active ENTER")
+
         if self._stream is None:
             self._logger.error("stream is None")
             self._logger.debug("Microphone.is_active LEAVE")
@@ -87,24 +96,34 @@ def is_active(self) -> bool:
 
     def set_callback(self, push_callback: Callable) -> None:
         """
-        Set the callback function to be called when data is received.
+        set_callback - sets the callback function to be called when data is received.
+
+        Args:
+            push_callback (Callable): The callback function to be called when data is received.
+                                      This should be the websocket send function.
+
+        Returns:
+            None
         """
         self._push_callback_org = push_callback
 
     def start(self) -> bool:
         """
-        starts the microphone stream
+        starts - starts the microphone stream
+
+        Returns:
+            bool: True if the stream was started, False otherwise
         """
         self._logger.debug("Microphone.start ENTER")
 
         self._logger.info("format: %s", self._format)
         self._logger.info("channels: %d", self._channels)
         self._logger.info("rate: %d", self._rate)
         self._logger.info("chunk: %d", self._chunk)
-        self._logger.info("input_device_id: %d", self._input_device_index)
+        # self._logger.info("input_device_id: %d", self._input_device_index)
 
         if self._push_callback_org is None:
-            self._logger.error("start() failed. No callback set.")
+            self._logger.error("start failed. No callback set.")
             self._logger.debug("Microphone.start LEAVE")
             return False
 
@@ -114,9 +133,13 @@ def start(self) -> bool:
             self._asyncio_thread = threading.Thread(target=self._start_asyncio_loop)
             self._asyncio_thread.start()
 
-            self._push_callback = lambda data: asyncio.run_coroutine_threadsafe(
-                self._push_callback_org(data), self._asyncio_loop
-            ).result()
+            self._push_callback = lambda data: (
+                asyncio.run_coroutine_threadsafe(
+                    self._push_callback_org(data), self._asyncio_loop
+                ).result()
+                if self._push_callback_org
+                else None
+            )
         else:
             self._logger.verbose("regular threaded callback")
             self._push_callback = self._push_callback_org
@@ -134,7 +157,7 @@ def start(self) -> bool:
         self._exit.clear()
         self._stream.start_stream()
 
-        self._logger.notice("start() succeeded")
+        self._logger.notice("start succeeded")
         self._logger.debug("Microphone.start LEAVE")
         return True
 
@@ -176,41 +199,50 @@ def _callback(
 
     def mute(self) -> bool:
         """
-        Mutes the microphone stream
+        mute - mutes the microphone stream
+
+        Returns:
+            bool: True if the stream was muted, False otherwise
         """
         self._logger.debug("Microphone.mute ENTER")
 
         if self._stream is None:
-            self._logger.error("mute() failed. Library not initialized.")
+            self._logger.error("mute failed. Library not initialized.")
             self._logger.debug("Microphone.mute LEAVE")
             return False
 
         self._is_muted = True
 
-        self._logger.notice("mute() succeeded")
+        self._logger.notice("mute succeeded")
         self._logger.debug("Microphone.mute LEAVE")
         return True
 
     def unmute(self) -> bool:
         """
-        Unmutes the microphone stream
+        unmute - unmutes the microphone stream
+
+        Returns:
+            bool: True if the stream was unmuted, False otherwise
         """
         self._logger.debug("Microphone.unmute ENTER")
 
         if self._stream is None:
-            self._logger.error("unmute() failed. Library not initialized.")
+            self._logger.error("unmute failed. Library not initialized.")
             self._logger.debug("Microphone.unmute LEAVE")
             return False
 
         self._is_muted = False
 
-        self._logger.notice("unmute() succeeded")
+        self._logger.notice("unmute succeeded")
         self._logger.debug("Microphone.unmute LEAVE")
         return True
 
     def finish(self) -> bool:
         """
-        Stops the microphone stream
+        finish - stops the microphone stream
+
+        Returns:
+            bool: True if the stream was stopped, False otherwise
         """
         self._logger.debug("Microphone.finish ENTER")
 
@@ -219,19 +251,24 @@ def finish(self) -> bool:
 
         # Stop the stream.
         if self._stream is not None:
+            self._logger.notice("stopping stream...")
             self._stream.stop_stream()
             self._stream.close()
             self._stream = None  # type: ignore
+            self._logger.notice("stream stopped")
 
         # clean up the thread
         if (
-            inspect.iscoroutinefunction(self._push_callback_org)
-            and self._asyncio_thread is not None
+            # inspect.iscoroutinefunction(self._push_callback_org)
+            # and
+            self._asyncio_thread
+            is not None
         ):
+            self._logger.notice("stopping asyncio loop...")
             self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
             self._asyncio_thread.join()
             self._asyncio_thread = None  # type: ignore
-        self._logger.notice("stream/recv thread joined")
+            self._logger.notice("_asyncio_thread joined")
 
         self._logger.notice("finish succeeded")
         self._logger.debug("Microphone.finish LEAVE")

diff --git a/deepgram/audio/speaker/__init__.py b/deepgram/audio/speaker/__init__.py
@@ -0,0 +1,6 @@
+# Copyright 2023-2024 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+from .speaker import Speaker
+from .constants import LOGGING, CHANNELS, RATE, CHUNK
diff --git a/deepgram/audio/speaker/constants.py b/deepgram/audio/speaker/constants.py
@@ -0,0 +1,12 @@
+# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+from ...utils import verboselogs
+
+# Constants for microphone
+LOGGING = verboselogs.WARNING
+TIMEOUT = 0.050
+CHANNELS = 1
+RATE = 48000
+CHUNK = 8194
diff --git a/deepgram/audio/speaker/errors.py b/deepgram/audio/speaker/errors.py
@@ -0,0 +1,21 @@
+# Copyright 2024 Deepgram SDK contributors. All Rights Reserved.
+# Use of this source code is governed by a MIT license that can be found in the LICENSE file.
+# SPDX-License-Identifier: MIT
+
+
+# exceptions for speaker
+class DeepgramSpeakerError(Exception):
+    """
+    Exception raised for known errors related to Speaker library.
+
+    Attributes:
+        message (str): The error message describing the exception.
+    """
+
+    def __init__(self, message: str):
+        super().__init__(message)
+        self.name = "DeepgramSpeakerError"
+        self.message = message
+
+    def __str__(self):
+        return f"{self.name}: {self.message}"