Agent API Early Access

deepgram · Oct 24, 2024 · 3782edb · 3782edb
1 parent 84fc18b
commit 3782edb
Show file tree

Hide file tree

Showing 36 changed files with 3,085 additions and 313 deletions.
diff --git a/deepgram/__init__.py b/deepgram/__init__.py
@@ -34,7 +34,7 @@
 from .errors import DeepgramApiKeyError
 
 # listen/read client
-from .client import Listen, Read
+from .client import ListenRouter, ReadRouter, SpeakRouter, AgentRouter
 
 # common
 from .client import (
@@ -302,6 +302,56 @@
     AsyncSelfHostedClient,
 )
 
+
+# agent
+from .client import AgentWebSocketEvents
+
+# websocket
+from .client import (
+    AgentWebSocketClient,
+    AsyncAgentWebSocketClient,
+)
+
+from .client import (
+    #### common websocket response
+    # OpenResponse,
+    # CloseResponse,
+    # ErrorResponse,
+    # UnhandledResponse,
+    #### unique
+    WelcomeResponse,
+    SettingsAppliedResponse,
+    ConversationTextResponse,
+    UserStartedSpeakingResponse,
+    AgentThinkingResponse,
+    FunctionCallingResponse,
+    AgentStartedSpeakingResponse,
+    AgentAudioDoneResponse,
+)
+
+from .client import (
+    # top level
+    SettingsConfigurationOptions,
+    UpdateInstructionsOptions,
+    UpdateSpeakOptions,
+    InjectAgentMessageOptions,
+    # sub level
+    Listen,
+    Speak,
+    Header,
+    Item,
+    Properties,
+    Parameters,
+    Function,
+    Provider,
+    Think,
+    Agent,
+    Input,
+    Output,
+    Audio,
+    Context,
+)
+
 # utilities
 # pylint: disable=wrong-import-position
 from .audio import Microphone, DeepgramMicrophoneError

diff --git a/deepgram/audio/microphone/microphone.py b/deepgram/audio/microphone/microphone.py
@@ -9,6 +9,7 @@
 import logging
 
 from ...utils import verboselogs
+
 from .constants import LOGGING, CHANNELS, RATE, CHUNK
 
 if TYPE_CHECKING:
@@ -22,8 +23,8 @@ class Microphone:  # pylint: disable=too-many-instance-attributes
 
     _logger: verboselogs.VerboseLogger
 
-    _audio: "pyaudio.PyAudio"
-    _stream: "pyaudio.Stream"
+    _audio: Optional["pyaudio.PyAudio"] = None
+    _stream: Optional["pyaudio.Stream"] = None
 
     _chunk: int
     _rate: int
@@ -145,77 +146,49 @@ def start(self) -> bool:
             self._asyncio_thread = None
             self._push_callback = self._push_callback_org
 
-        self._stream = self._audio.open(
-            format=self._format,
-            channels=self._channels,
-            rate=self._rate,
-            input=True,
-            frames_per_buffer=self._chunk,
-            input_device_index=self._input_device_index,
-            stream_callback=self._callback,
-        )
+        if self._audio is not None:
+            self._stream = self._audio.open(
+                format=self._format,
+                channels=self._channels,
+                rate=self._rate,
+                input=True,
+                output=False,
+                frames_per_buffer=self._chunk,
+                input_device_index=self._input_device_index,
+                stream_callback=self._callback,
+            )
+
+        if self._stream is None:
+            self._logger.error("start failed. No stream created.")
+            self._logger.debug("Microphone.start LEAVE")
+            return False
 
         self._exit.clear()
-        self._stream.start_stream()
+        if self._stream is not None:
+            self._stream.start_stream()
 
         self._logger.notice("start succeeded")
         self._logger.debug("Microphone.start LEAVE")
         return True
 
-    def _callback(
-        self, input_data, frame_count, time_info, status_flags
-    ):  # pylint: disable=unused-argument
-        """
-        The callback used to process data in callback mode.
-        """
-        # dynamic import of pyaudio as not to force the requirements on the SDK (and users)
-        import pyaudio  # pylint: disable=import-outside-toplevel
-
-        self._logger.debug("Microphone._callback ENTER")
-
-        if self._exit.is_set():
-            self._logger.info("exit is Set")
-            self._logger.notice("_callback stopping...")
-            self._logger.debug("Microphone._callback LEAVE")
-            return None, pyaudio.paAbort
-
-        if input_data is None:
-            self._logger.warning("input_data is None")
-            self._logger.debug("Microphone._callback LEAVE")
-            return None, pyaudio.paContinue
-
-        try:
-            if self._is_muted:
-                size = len(input_data)
-                input_data = b"\x00" * size
-
-            self._push_callback(input_data)
-        except Exception as e:
-            self._logger.error("Error while sending: %s", str(e))
-            self._logger.debug("Microphone._callback LEAVE")
-            raise
-
-        self._logger.debug("Microphone._callback LEAVE")
-        return input_data, pyaudio.paContinue
-
     def mute(self) -> bool:
         """
         mute - mutes the microphone stream
 
         Returns:
             bool: True if the stream was muted, False otherwise
         """
-        self._logger.debug("Microphone.mute ENTER")
+        self._logger.verbose("Microphone.mute ENTER")
 
         if self._stream is None:
             self._logger.error("mute failed. Library not initialized.")
-            self._logger.debug("Microphone.mute LEAVE")
+            self._logger.verbose("Microphone.mute LEAVE")
             return False
 
         self._is_muted = True
 
         self._logger.notice("mute succeeded")
-        self._logger.debug("Microphone.mute LEAVE")
+        self._logger.verbose("Microphone.mute LEAVE")
         return True
 
     def unmute(self) -> bool:
@@ -225,19 +198,42 @@ def unmute(self) -> bool:
         Returns:
             bool: True if the stream was unmuted, False otherwise
         """
-        self._logger.debug("Microphone.unmute ENTER")
+        self._logger.verbose("Microphone.unmute ENTER")
 
         if self._stream is None:
             self._logger.error("unmute failed. Library not initialized.")
-            self._logger.debug("Microphone.unmute LEAVE")
+            self._logger.verbose("Microphone.unmute LEAVE")
             return False
 
         self._is_muted = False
 
         self._logger.notice("unmute succeeded")
-        self._logger.debug("Microphone.unmute LEAVE")
+        self._logger.verbose("Microphone.unmute LEAVE")
         return True
 
+    def is_muted(self) -> bool:
+        """
+        is_muted - returns the state of the stream
+
+        Args:
+            None
+
+        Returns:
+            True if the stream is muted, False otherwise
+        """
+        self._logger.spam("Microphone.is_muted ENTER")
+
+        if self._stream is None:
+            self._logger.spam("is_muted: stream is None")
+            self._logger.spam("Microphone.is_muted LEAVE")
+            return False
+
+        val = self._is_muted
+
+        self._logger.spam("is_muted: %s", val)
+        self._logger.spam("Microphone.is_muted LEAVE")
+        return val
+
     def finish(self) -> bool:
         """
         finish - stops the microphone stream
@@ -255,7 +251,6 @@ def finish(self) -> bool:
             self._logger.notice("stopping stream...")
             self._stream.stop_stream()
             self._stream.close()
-            self._stream = None  # type: ignore
             self._logger.notice("stream stopped")
 
         # clean up the thread
@@ -265,13 +260,44 @@ def finish(self) -> bool:
             self._asyncio_thread
             is not None
         ):
-            self._logger.notice("stopping asyncio loop...")
+            self._logger.notice("stopping _asyncio_loop...")
             self._asyncio_loop.call_soon_threadsafe(self._asyncio_loop.stop)
             self._asyncio_thread.join()
-            self._asyncio_thread = None
             self._logger.notice("_asyncio_thread joined")
 
+        self._stream = None
+        self._asyncio_thread = None
+
         self._logger.notice("finish succeeded")
         self._logger.debug("Microphone.finish LEAVE")
 
         return True
+
+    def _callback(
+        self, input_data, frame_count, time_info, status_flags
+    ):  # pylint: disable=unused-argument
+        """
+        The callback used to process data in callback mode.
+        """
+        # dynamic import of pyaudio as not to force the requirements on the SDK (and users)
+        import pyaudio  # pylint: disable=import-outside-toplevel
+
+        if self._exit.is_set():
+            self._logger.notice("_callback exit is Set. stopping...")
+            return None, pyaudio.paAbort
+
+        if input_data is None:
+            self._logger.warning("input_data is None")
+            return None, pyaudio.paContinue
+
+        try:
+            if self._is_muted:
+                size = len(input_data)
+                input_data = b"\x00" * size
+
+            self._push_callback(input_data)
+        except Exception as e:
+            self._logger.error("Error while sending: %s", str(e))
+            raise
+
+        return input_data, pyaudio.paContinue