forked from dgaddy/silent_speech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
vocoder.py
36 lines (31 loc) · 1.16 KB
/
vocoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import json
import sys
import numpy as np
import torch
sys.path.append('./hifi_gan')
from env import AttrDict
from models import Generator
from absl import flags
FLAGS = flags.FLAGS
flags.DEFINE_string('hifigan_checkpoint', None, 'filename of hifi-gan generator checkpoint')
class Vocoder(object):
def __init__(self, device='cuda'):
assert FLAGS.hifigan_checkpoint is not None
checkpoint_file = FLAGS.hifigan_checkpoint
config_file = os.path.join(os.path.split(checkpoint_file)[0], 'config.json')
with open(config_file) as f:
hparams = AttrDict(json.load(f))
self.generator = Generator(hparams).to(device)
self.generator.load_state_dict(torch.load(checkpoint_file)['generator'])
self.generator.eval()
self.generator.remove_weight_norm()
def __call__(self, mel_spectrogram):
'''
mel_spectrogram should be a tensor of shape (seq_len, 80)
returns 1d tensor of audio
'''
with torch.no_grad():
mel_spectrogram = mel_spectrogram.T[np.newaxis,:,:]
audio = self.generator(mel_spectrogram)
return audio.squeeze()