From b0228cba6d8a1a9520e9c64c66d86da2f5f36f8b Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Sat, 9 Dec 2023 15:05:22 +0000 Subject: [PATCH 1/2] fix single speaker models speaker_id arg port https://github.com/OpenVoiceOS/ovos-tts-plugin-piper/pull/15/files some models fail if the arguments contain "sid", this commit adds a check to skip that kwarg --- src/python_run/piper/voice.py | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/python_run/piper/voice.py b/src/python_run/piper/voice.py index 1edda099..72347e32 100644 --- a/src/python_run/piper/voice.py +++ b/src/python_run/piper/voice.py @@ -128,13 +128,13 @@ def synthesize_stream_raw( noise_w=noise_w, ) + silence_bytes - def synthesize_ids_to_raw( - self, - phoneme_ids: List[int], - speaker_id: Optional[int] = None, - length_scale: Optional[float] = None, - noise_scale: Optional[float] = None, - noise_w: Optional[float] = None, + def synthesize_ids_to_raw( + self, + phoneme_ids: List[int], + speaker_id: Optional[int] = None, + length_scale: Optional[float] = None, + noise_scale: Optional[float] = None, + noise_w: Optional[float] = None, ) -> bytes: """Synthesize raw audio from phoneme ids.""" if length_scale is None: @@ -153,25 +153,24 @@ def synthesize_ids_to_raw( dtype=np.float32, ) + args = { + "input": phoneme_ids_array, + "input_lengths": phoneme_ids_lengths, + "scales": scales + } + + if self.config.num_speakers <= 1: + speaker_id = None + if (self.config.num_speakers > 1) and (speaker_id is None): # Default speaker speaker_id = 0 - sid = None - if speaker_id is not None: sid = np.array([speaker_id], dtype=np.int64) + args["sid"] = sid # Synthesize through Onnx - audio = self.session.run( - None, - { - "input": phoneme_ids_array, - "input_lengths": phoneme_ids_lengths, - "scales": scales, - "sid": sid, - }, - )[0].squeeze((0, 1)) + audio = self.session.run(None, args, )[0].squeeze((0, 1)) audio = audio_float_to_int16(audio.squeeze()) - return audio.tobytes() From d43ecbc10963e5dbc6bc42cd9b2f410dddbd7a3e Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Sat, 9 Dec 2023 15:06:38 +0000 Subject: [PATCH 2/2] Update voice.py --- src/python_run/piper/voice.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/python_run/piper/voice.py b/src/python_run/piper/voice.py index 72347e32..1b6003e2 100644 --- a/src/python_run/piper/voice.py +++ b/src/python_run/piper/voice.py @@ -128,13 +128,13 @@ def synthesize_stream_raw( noise_w=noise_w, ) + silence_bytes - def synthesize_ids_to_raw( - self, - phoneme_ids: List[int], - speaker_id: Optional[int] = None, - length_scale: Optional[float] = None, - noise_scale: Optional[float] = None, - noise_w: Optional[float] = None, + def synthesize_ids_to_raw( + self, + phoneme_ids: List[int], + speaker_id: Optional[int] = None, + length_scale: Optional[float] = None, + noise_scale: Optional[float] = None, + noise_w: Optional[float] = None, ) -> bytes: """Synthesize raw audio from phoneme ids.""" if length_scale is None: