Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add English G2P and tokenizer. #15

Merged
merged 3 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions NeMoOnnxSharp.Example/PretrainedModelInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,16 @@ private static PretrainedModelInfo[] CreateModelList()
"https://github.com/kaiidams/NeMoOnnxSharp/releases/download/v1.1/commandrecognition_en_matchboxnet3x1x64_v2.onnx",
"a0c5e4d14e83d3b6afdaf239265a390c2ca513bcdedf3d295bc1f9f97f19868a"
),
new PretrainedModelInfo(
"cmudict-0.7b_nv22.10",
"https://github.com/kaiidams/NeMoOnnxSharp/releases/download/v1.2/cmudict-0.7b_nv22.10",
"d330f3a3554d4c7ff8ef7bfc0c338ed74831d5f54109508fb829bdd82173608b"
),
new PretrainedModelInfo(
"heteronyms-052722",
"https://github.com/kaiidams/NeMoOnnxSharp/releases/download/v1.2/heteronyms-052722",
"b701909aedf753172eff223950f8859cd4b9b4c80199cf0a6e9ac4a307c8f8ec"
),
new PretrainedModelInfo(
"tts_en_hifigan",
"https://github.com/kaiidams/NeMoOnnxSharp/releases/download/v1.2/tts_en_hifigan.onnx",
Expand Down
22 changes: 22 additions & 0 deletions NeMoOnnxSharp.Example/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Text;
using System.Threading.Tasks;
using System.Collections.Generic;
using NeMoOnnxSharp.TTSTokenizers;

namespace NeMoOnnxSharp.Example
{
Expand All @@ -19,6 +20,10 @@ static async Task Main(string[] args)
{
await Transcribe();
}
else if (task == "speak")
{
await Speak();
}
else if (task == "vad")
{
await FramePredict(false);
Expand Down Expand Up @@ -54,6 +59,23 @@ static async Task Transcribe()
}
}

static async Task Speak()
{
string appDirPath = AppDomain.CurrentDomain.BaseDirectory;
string phoneDict = await DownloadModelAsync("cmudict-0.7b_nv22.10");
string heteronyms = await DownloadModelAsync("heteronyms-052722");
var g2p = new EnglishG2p(phoneDict, heteronyms);
var tokenizer = new EnglishPhonemesTokenizer(
g2p,
punct: true,
stresses: true,
chars: true,
apostrophe: true,
padWithSpace: true,
addBlankAt: BaseTokenizer.AddBlankAt.True);
tokenizer.Encode("Hello world!");
}

static async Task FramePredict(bool mbn)
{
string appDirPath = AppDomain.CurrentDomain.BaseDirectory;
Expand Down
21 changes: 21 additions & 0 deletions NeMoOnnxSharp.Tests/Data/cmudict-test
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Comment
YOU'VE Y UW1 V
READ R EH1 D
READ(1) R IY1 D
BOOK B UH1 K
THE DH AH0
THE(1) DH AH1
THE(2) DH IY0
OPERATING AA1 P ER0 EY2 T IH0 NG
OPERATING(1) AO1 P ER0 EY2 T IH0 NG
SYSTEM S IH1 S T AH0 M
DESIGN D IH0 Z AY1 N
AND AH0 N D
AND(1) AE1 N D
IMPLEMENTATION IH2 M P L AH0 M EH0 N T EY1 SH AH0 N
THIRD TH ER1 D
EDITION AH0 D IH1 SH AH0 N
EDITION(1) IH0 D IH1 SH AH0 N
DID D IH1 D
DID(1) D IH0 D
YOU Y UW1
1 change: 1 addition & 0 deletions NeMoOnnxSharp.Tests/Data/heteronyms-test
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
read
6 changes: 6 additions & 0 deletions NeMoOnnxSharp.Tests/NeMoOnnxSharp.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@
</ItemGroup>

<ItemGroup>
<None Update="Data\cmudict-test">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Data\heteronyms-test">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Data\make_test.py">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
Expand Down
89 changes: 89 additions & 0 deletions NeMoOnnxSharp.Tests/TextTokenizersTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
using Microsoft.VisualStudio.TestTools.UnitTesting;
using NeMoOnnxSharp.TTSTokenizers;
using System;
using System.Diagnostics;
using System.IO;

namespace NeMoOnnxSharp.Tests
{
[TestClass]
public class TextTokenizersTest
{
private readonly static string[] ExpectedTokens =
{
" ", "B", "CH", "D", "DH", "F", "G", "HH", "JH", "K", "L", "M",
"N", "NG", "P", "R", "S", "SH", "T", "TH", "V", "W", "Y", "Z", "ZH",
"AA0", "AA1", "AA2", "AE0", "AE1", "AE2", "AH0", "AH1", "AH2", "AO0",
"AO1", "AO2", "AW0", "AW1", "AW2", "AY0", "AY1", "AY2", "EH0", "EH1",
"EH2", "ER0", "ER1", "ER2", "EY0", "EY1", "EY2", "IH0", "IH1", "IH2",
"IY0", "IY1", "IY2", "OW0", "OW1", "OW2", "OY0", "OY1", "OY2", "UH0",
"UH1", "UH2", "UW0", "UW1", "UW2", "a", "b", "c", "d", "e", "f", "g",
"h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z", "'", ",", ".", "!", "?", "-", ":", ";", "/",
"\"", "(", ")", "[", "]", "{", "}", "<pad>", "<blank>", "<oov>"
};

private const string SampleText =
"You've read the book “Operating Systems Design and Implementation, 3rd edition”. Did you?";
private const string NormalizedSampleText =
"You've read the book “Operating Systems Design and Implementation, third edition”. Did you?";
private const string SamplePronText =
"Y|UW1|V| |r|e|a|d| |t|h|e| |B|UH1|K| |“|o|p|e|r|a|t|i|n|g| |"
+ "S|IH1|S|T|AH0|M|Z| |D|IH0|Z|AY1|N| |a|n|d| |IH2|M|P|L|AH0|"
+ "M|EH0|N|T|EY1|SH|AH0|N|,| |TH|ER1|D| |e|d|i|t|i|o|n|”|.| |"
+ "d|i|d| |Y|UW1|?";

private readonly static int[] SampleParsed =
{
0, 22, 68, 20, 0, 87, 74, 70, 73, 0, 89, 77, 74,
0, 1, 65, 9, 0, 105, 84, 85, 74, 87, 70, 89, 78,
83, 76, 0, 16, 53, 16, 18, 31, 11, 23, 0, 3, 52,
23, 41, 12, 0, 70, 83, 73, 0, 54, 11, 14, 10, 31,
11, 43, 12, 18, 50, 17, 31, 12, 97, 0, 19, 47, 3,
0, 74, 73, 78, 89, 78, 84, 83, 105, 98, 0, 73, 78,
73, 0, 22, 68, 100, 0
};

[TestInitialize]
public void Initialize()
{
string appDirPath = AppDomain.CurrentDomain.BaseDirectory;
_g2p = new EnglishG2p(
phonemeDict: Path.Combine(appDirPath, "Data", "cmudict-test"),
heteronyms: Path.Combine(appDirPath, "Data", "heteronyms-test"),
phonemeProbability: 1.0);
_tokenizer = new EnglishPhonemesTokenizer(
_g2p,
punct: true,
stresses: true,
chars: true,
apostrophe: true,
padWithSpace: true,
addBlankAt: BaseTokenizer.AddBlankAt.True);
}

[TestMethod]
public void TestTokenizerVocab()
{
CollectionAssert.AreEquivalent(ExpectedTokens, _tokenizer.Tokens);

Check warning on line 68 in NeMoOnnxSharp.Tests/TextTokenizersTest.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.

Check warning on line 68 in NeMoOnnxSharp.Tests/TextTokenizersTest.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.
}

[TestMethod]
public void TestEnglishG2p()
{
var pron = string.Join("|", _g2p.Parse(NormalizedSampleText));

Check warning on line 74 in NeMoOnnxSharp.Tests/TextTokenizersTest.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.

Check warning on line 74 in NeMoOnnxSharp.Tests/TextTokenizersTest.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.
Assert.AreEqual(SamplePronText, pron);
}

[TestMethod]
public void TestEnglishEncode()
{
var parsed = _tokenizer.Encode(NormalizedSampleText);

Check warning on line 81 in NeMoOnnxSharp.Tests/TextTokenizersTest.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.

Check warning on line 81 in NeMoOnnxSharp.Tests/TextTokenizersTest.cs

View workflow job for this annotation

GitHub Actions / build

Dereference of a possibly null reference.
CollectionAssert.AreEquivalent(SampleParsed, parsed);
}

private EnglishG2p? _g2p;
private EnglishPhonemesTokenizer? _tokenizer;

}
}
75 changes: 75 additions & 0 deletions NeMoOnnxSharp/TTSTokenizers/BaseTokenizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright (c) Katsuya Iida. All Rights Reserved.
// See LICENSE in the project root for license information.

// A number of implementation details in this file have been translated from the Python scripts of NVIDIA NeMo,
// largely located in the files found in this folder:
//
// https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/torch/tts_tokenizers.py
//
// The origin has the following copyright notice and license:
//
// https://github.com/NVIDIA/NeMo/blob/main/LICENSE
//

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;

namespace NeMoOnnxSharp.TTSTokenizers
{
public abstract class BaseTokenizer
{
public enum AddBlankAt
{
False,
True,
Last
}

protected const string Pad = "<pad>";
protected const string Blank = "<blank>";
protected const string OOV = "<oov>";

protected BaseTokenizer()
{
_sep = string.Empty;
_id2token = Array.Empty<string>();
_token2id = new Dictionary<string, int>();
_utilIds = new HashSet<int>();
}

/// <summary>
/// Turns str text into int tokens.
/// </summary>
public abstract int[] Encode(string text);

/// <summary>
/// Turns ints tokens into str text.
/// </summary>
public string Decode(int[] tokens)
{
return string.Join(
_sep,
tokens
.Where(t => !_utilIds.Contains(t))
.Select(t => _id2token[t]));
}

public string[] Tokens { get { return _id2token; } }
public int PadId { get { return _pad; } }
public int BlankId { get { return _blank; } }
public int OOVId { get { return _oov; } }
public string Sep { get { return _sep; } }

protected string[] _id2token;
protected IDictionary<string, int> _token2id;
protected ISet<int> _utilIds;
protected int _space;
protected int _pad;
protected int _blank;
protected int _oov;
protected string _sep;
protected bool _padWithSpace;
}
}
Loading