From b0228cba6d8a1a9520e9c64c66d86da2f5f36f8b Mon Sep 17 00:00:00 2001
From: JarbasAI <33701864+JarbasAl@users.noreply.github.com>
Date: Sat, 9 Dec 2023 15:05:22 +0000
Subject: [PATCH 1/2] fix single speaker models speaker_id arg

port https://github.com/OpenVoiceOS/ovos-tts-plugin-piper/pull/15/files

some models fail if the arguments contain "sid", this commit adds a check to skip that kwarg
---
 src/python_run/piper/voice.py | 37 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/python_run/piper/voice.py b/src/python_run/piper/voice.py
index 1edda099..72347e32 100644
--- a/src/python_run/piper/voice.py
+++ b/src/python_run/piper/voice.py
@@ -128,13 +128,13 @@ def synthesize_stream_raw(
                 noise_w=noise_w,
             ) + silence_bytes
 
-    def synthesize_ids_to_raw(
-        self,
-        phoneme_ids: List[int],
-        speaker_id: Optional[int] = None,
-        length_scale: Optional[float] = None,
-        noise_scale: Optional[float] = None,
-        noise_w: Optional[float] = None,
+        def synthesize_ids_to_raw(
+            self,
+            phoneme_ids: List[int],
+            speaker_id: Optional[int] = None,
+            length_scale: Optional[float] = None,
+            noise_scale: Optional[float] = None,
+            noise_w: Optional[float] = None,
     ) -> bytes:
         """Synthesize raw audio from phoneme ids."""
         if length_scale is None:
@@ -153,25 +153,24 @@ def synthesize_ids_to_raw(
             dtype=np.float32,
         )
 
+        args = {
+            "input": phoneme_ids_array,
+            "input_lengths": phoneme_ids_lengths,
+            "scales": scales
+        }
+
+        if self.config.num_speakers <= 1:
+            speaker_id = None
+
         if (self.config.num_speakers > 1) and (speaker_id is None):
             # Default speaker
             speaker_id = 0
 
-        sid = None
-
         if speaker_id is not None:
             sid = np.array([speaker_id], dtype=np.int64)
+            args["sid"] = sid
 
         # Synthesize through Onnx
-        audio = self.session.run(
-            None,
-            {
-                "input": phoneme_ids_array,
-                "input_lengths": phoneme_ids_lengths,
-                "scales": scales,
-                "sid": sid,
-            },
-        )[0].squeeze((0, 1))
+        audio = self.session.run(None, args, )[0].squeeze((0, 1))
         audio = audio_float_to_int16(audio.squeeze())
-
         return audio.tobytes()

From d43ecbc10963e5dbc6bc42cd9b2f410dddbd7a3e Mon Sep 17 00:00:00 2001
From: JarbasAI <33701864+JarbasAl@users.noreply.github.com>
Date: Sat, 9 Dec 2023 15:06:38 +0000
Subject: [PATCH 2/2] Update voice.py

---
 src/python_run/piper/voice.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/python_run/piper/voice.py b/src/python_run/piper/voice.py
index 72347e32..1b6003e2 100644
--- a/src/python_run/piper/voice.py
+++ b/src/python_run/piper/voice.py
@@ -128,13 +128,13 @@ def synthesize_stream_raw(
                 noise_w=noise_w,
             ) + silence_bytes
 
-        def synthesize_ids_to_raw(
-            self,
-            phoneme_ids: List[int],
-            speaker_id: Optional[int] = None,
-            length_scale: Optional[float] = None,
-            noise_scale: Optional[float] = None,
-            noise_w: Optional[float] = None,
+    def synthesize_ids_to_raw(
+        self,
+        phoneme_ids: List[int],
+        speaker_id: Optional[int] = None,
+        length_scale: Optional[float] = None,
+        noise_scale: Optional[float] = None,
+        noise_w: Optional[float] = None,
     ) -> bytes:
         """Synthesize raw audio from phoneme ids."""
         if length_scale is None: