Skip to content

Commit

Permalink
Merge from Voice100Sharp
Browse files Browse the repository at this point in the history
  • Loading branch information
kaiidams committed Aug 5, 2023
1 parent 24b307a commit 20925a1
Show file tree
Hide file tree
Showing 28 changed files with 1,552 additions and 121 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/build-validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ jobs:
- name: Restore dependencies
run: dotnet restore
- name: Build
run: dotnet build --configuration Release --no-restore
run: dotnet build --configuration Release --no-restore
- name: Test
run: dotnet test --no-restore --verbosity normal
3 changes: 3 additions & 0 deletions NeMoOnnxSharp.Tests/Data/61-70968-0000-mod.wav
Git LFS file not shown
3 changes: 3 additions & 0 deletions NeMoOnnxSharp.Tests/Data/61-70968-0000.wav
Git LFS file not shown
79 changes: 79 additions & 0 deletions NeMoOnnxSharp.Tests/Data/make_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import librosa
import numpy as np


def readwav(filepath="61-70968-0000.wav", sr=16000):
waveform, sr = librosa.load(filepath, sr=sr)

print(f" - Length {len(waveform):6}")
print(f" - Max {waveform.max():6.3f}")
print(f" - Min {waveform.min():6.3f}")
print(f" - Mean {waveform.mean():6.3f}")

return waveform


def pad_waveform(waveform):
return np.concatenate([
np.zeros((512-400)//2),
waveform,
np.zeros((512-400)//2)
])


def spectrogram(waveform, log_offset=1e-6):
waveform = pad_waveform(waveform)

S = librosa.stft(
waveform,
n_fft=512,
hop_length=160,
win_length=400,
window="hann",
center=False)
S = np.log(np.abs(S) ** 2 + log_offset)

return S.T.astype(np.float32)


def melspectrogram(waveform, sr=16000, log_offset=1e-6):
waveform = pad_waveform(waveform)

M = librosa.feature.melspectrogram(
y=waveform,
sr=sr,
n_fft=512,
hop_length=160,
win_length=400,
window="hann",
center=False,
n_mels=64,
htk=True,
norm=None)
M = np.log(M + log_offset)

return M.T.astype(np.float32)


def main():
waveform = readwav()

print("Spectrogram")

X = spectrogram(waveform)
print(f" - Output {X.shape}")

with open('spectrogram.bin', 'wb') as f:
f.write(X.tobytes("C"))

print("Mel-Spectrogram")

X = melspectrogram(waveform)
print(f" - Output {X.shape}")

with open('melspectrogram.bin', 'wb') as f:
f.write(X.tobytes("C"))


if __name__ == "__main__":
main()
Binary file added NeMoOnnxSharp.Tests/Data/melspectrogram.bin
Binary file not shown.
Binary file added NeMoOnnxSharp.Tests/Data/spectrogram.bin
Binary file not shown.
76 changes: 76 additions & 0 deletions NeMoOnnxSharp.Tests/FFTTest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;
using System.IO;
using System.Runtime.InteropServices;

namespace NeMoOnnxSharp.Tests
{
[TestClass]
public class FFTTest
{
private static void CFFTRef(double[] xr, double[] xi, int N)
{
double[] yr = new double[N];
double[] yi = new double[N];
for (int i = 0; i < N; i++)
{
double vr = 0.0;
double vi = 0.0;
for (int k = 0; k < N; k++)
{
vr += Math.Cos(-2 * Math.PI * k * i / N) * xr[k];
vi += Math.Sin(-2 * Math.PI * k * i / N) * xr[k];
}
yr[i] = vr;
yi[i] = vi;
}
for (int i = 0; i < N; i++)
{
xr[i] = yr[i];
xi[i] = yi[i];
}
}

private static double MSE(double[] a, double[] b)
{
if (a.Length != b.Length) throw new ArgumentException();
int len = Math.Min(a.Length, b.Length);
double err = 0.0;
for (int i = 0; i < len; i++)
{
double diff = a[i] - b[i];
err += diff * diff;
}
return err / len;
}

[TestMethod]
public void TestCFFT()
{
var rng = new Random();
for (int N = 256; N <= 2048; N *= 2)
{
var xr0 = new double[N];
var xi0 = new double[N];
var xr1 = new double[N];
var xi1 = new double[N];
for (int i = 0; i < 10; i++)
{
for (int j = 0; j < N; j++)
{
xr0[j] = rng.NextDouble();
xi0[j] = rng.NextDouble();
xr1[j] = xr0[j];
xi1[j] = rng.NextDouble();
}
CFFTRef(xr0, xi0, N);
FFT.CFFT(xr1, xi1, N);
double error = MSE(xr0, xi1);
Assert.IsTrue(error < 1e-20);
error = MSE(xi0, xr1);
Assert.IsTrue(error < 1e-20);
}
}
}
}
}
40 changes: 40 additions & 0 deletions NeMoOnnxSharp.Tests/NeMoOnnxSharp.Tests.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net7.0</TargetFramework>
<Nullable>disable</Nullable>

<IsPackable>false</IsPackable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
<PackageReference Include="MSTest.TestAdapter" Version="3.0.2" />
<PackageReference Include="MSTest.TestFramework" Version="3.0.2" />
<PackageReference Include="coverlet.collector" Version="3.2.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\NeMoOnnxSharp\NeMoOnnxSharp.csproj" />
</ItemGroup>

<ItemGroup>
<None Update="Data\61-70968-0000-mod.wav">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Data\61-70968-0000.wav">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Data\melspectrogram.bin">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Data\spectrogram.bin">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
160 changes: 160 additions & 0 deletions NeMoOnnxSharp.Tests/Voice100Test.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;
using System.Diagnostics;
using System.IO;
using System.Reflection;
using System.Runtime.InteropServices;

namespace NeMoOnnxSharp.Tests
{
[TestClass]
public class NeMoOnnxSharpTest
{
private const int SampleRate = 16000;
private const string SampleWAVSpeechFile = "61-70968-0000.wav";

private static float[] ReadData(string file)
{
string appDirPath = AppDomain.CurrentDomain.BaseDirectory;
string path = Path.Combine(appDirPath, "Data", file);
var bytes = File.ReadAllBytes(path);
return MemoryMarshal.Cast<byte, float>(bytes).ToArray();
}

private static double MSE(float[] a, float[] b)
{
if (a.Length != b.Length) throw new ArgumentException();
int len = Math.Min(a.Length, b.Length);
double err = 0.0;
for (int i = 0; i < len; i++)
{
double diff = a[i] - b[i];
err += diff * diff;
}
return err / len;
}

private static double MSE(double[] a, double[] b)
{
if (a.Length != b.Length) throw new ArgumentException();
int len = Math.Min(a.Length, b.Length);
double err = 0.0;
for (int i = 0; i < len; i++)
{
double diff = a[i] - b[i];
err += diff * diff;
}
return err / len;
}

short[] waveform;
AudioProcessor processor;

public NeMoOnnxSharpTest()
{
string appDirPath = AppDomain.CurrentDomain.BaseDirectory;
string waveFile = Path.Combine(appDirPath, "Data", SampleWAVSpeechFile);
waveform = WaveFile.ReadWAV(waveFile, SampleRate);
processor = new AudioProcessor(
sampleRate: SampleRate,
window: "hann",
windowLength: 400,
hopLength: 160,
fftLength: 512,
//preNormalize: 0.8,
preemph: 0.0,
center: false,
nMelBands: 64,
melMinHz: 0.0,
melMaxHz: 0.0,
htk: true,
melNormalize: null,
logOffset: 1e-6,
postNormalize: false);
}

[TestMethod]
public void TestSpectrogram()
{
var x = processor.Spectrogram(waveform);
AssertMSE("spectrogram.bin", x);
}

[TestMethod]
public void TestMelSpectrogram()
{
var x = processor.MelSpectrogram(waveform);
AssertMSE("melspectrogram.bin", x);
}

[TestMethod]
public void TestReadFrame()
{
int windowLength = 5;
int fftLength = 9;
var processor = new AudioProcessor(
windowLength: windowLength,
fftLength: fftLength,
preemph: 0.0);

MethodInfo methodInfo1 = typeof(AudioProcessor).GetMethod(
"ReadFrameCenter", BindingFlags.NonPublic | BindingFlags.Instance);
MethodInfo methodInfo2 = typeof(AudioProcessor).GetMethod(
"ReadFrameCenterPreemphasis", BindingFlags.NonPublic | BindingFlags.Instance);

var rng = new Random();
short[] waveform = new short[1200];
double[] frame1 = new double[fftLength];
double[] frame2 = new double[fftLength];
for (int i = 0; i < waveform.Length; i++) waveform[i] = (short)rng.Next(short.MinValue, short.MaxValue);
#if true
for (int i = 0; i < 100; i++)
{
int offset = rng.Next(waveform.Length);
double scale = rng.NextDouble();
object[] parameters1 = { waveform, offset, scale, frame1 };
methodInfo1.Invoke(processor, parameters1);
object[] parameters2 = { waveform, offset, scale, frame2 };
methodInfo2.Invoke(processor, parameters2);
double error = MSE(frame1, frame2);
Assert.IsTrue(error == 0);
}
#else
for (int j = 0; j < 5; j++)
{
var stopWatch = new Stopwatch();
stopWatch.Start();
for (int i = 0; i < 1000000; i++)
{
int offset = rng.Next(waveform.Length);
double scale = rng.NextDouble();
object[] parameters1 = { waveform, offset, scale, frame1 };
methodInfo1.Invoke(processor, parameters1);
}
stopWatch.Stop();
Console.WriteLine(stopWatch.Elapsed);

stopWatch = new Stopwatch();
stopWatch.Start();
for (int i = 0; i < 1000000; i++)
{
int offset = rng.Next(waveform.Length);
double scale = rng.NextDouble();
object[] parameters2 = { waveform, offset, scale, frame1 };
methodInfo2.Invoke(processor, parameters2);
}
stopWatch.Stop();
Console.WriteLine(stopWatch.Elapsed);
}
#endif
}

private void AssertMSE(string path, float[] x, double threshold = 1e-3)
{
var truth = ReadData(path);
double mse = MSE(truth, x);
Console.WriteLine("MSE: {0}", mse);
Assert.IsTrue(mse < threshold);
}
}
}
Loading

0 comments on commit 20925a1

Please sign in to comment.