forked from suno-ai/bark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test2.py
42 lines (28 loc) · 1.72 KB
/
test2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import numpy as np
from bark.api import generate_audio
from bark.generation import SAMPLE_RATE
from IPython.display import Audio
GEN_TEMP = 0.6
SPEAKER = "v2/en_speaker_6"
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
speaker_lookup = {"Samantha": "v2/en_speaker_9", "John": "v2/en_speaker_2"}
# Script generated by chat GPT
script = """
Samantha: Hey, have you heard about this new text-to-audio model called "Bark"?
John: No, I haven't. What's so special about it?
Samantha: Well, apparently it's the most realistic and natural-sounding text-to-audio model out there right now. People are saying it sounds just like a real person speaking.
John: Wow, that sounds amazing. How does it work?
Samantha: I think it uses advanced machine learning algorithms to analyze and understand the nuances of human speech, and then replicates those nuances in its own speech output.
John: That's pretty impressive. Do you think it could be used for things like audiobooks or podcasts?
Samantha: Definitely! In fact, I heard that some publishers are already starting to use Bark to create audiobooks. And I bet it would be great for podcasts too.
John: I can imagine. It would be like having your own personal voiceover artist.
Samantha: Exactly! I think Bark is going to be a game-changer in the world of text-to-audio technology."""
script = script.strip().split("\n")
script = [s.strip() for s in script if s]
pieces = []
silence = np.zeros(int(0.5*SAMPLE_RATE))
for line in script:
speaker, text = line.split(": ")
audio_array = generate_audio(text, history_prompt=speaker_lookup[speaker], )
pieces += [audio_array, silence.copy()]
Audio(np.concatenate(pieces), rate=SAMPLE_RATE).export("./out/test2", format="wav")