Skip to content

Commit

Permalink
Speech Recognition v2 - StartListening/StopListening (#1382)
Browse files Browse the repository at this point in the history
* Speech Recognition v2

* Fix tizen

* Fix tizen

* Fix PR comments

* Add tests

* Fix tests

* Rename `ISpeechToText.State` -> `ISpeechToText.CurrentState`

* Update Layout

* Add `ISpeechToText.StateChanged`

* Add Missing CancellationToken

* Update Sample App

* Update SpeechToTextPage.xaml

* `dotnet format`

* Add tests, update CurrentState

* Add Missing XML

* `dotnet format`

* Update Formatting

* Add Missing Cancellation Usage, Update `SpeechToTextImplementation.getRecognitionTaskCompletionSource` on macios

* Add `ResetSpeechRecognitionTaskCompletionSource()`

* Update SpeechToTextImplementation.tizen.cs

* `dotnet format`

* Dispose of `CancellationTokenRegistration`

* Add StateChanged impl on Tizen

---------

Co-authored-by: Brandon Minnick <[email protected]>
Co-authored-by: Jay Cho <[email protected]>
  • Loading branch information
3 people authored Sep 23, 2023
1 parent 322547e commit 429b205
Show file tree
Hide file tree
Showing 22 changed files with 840 additions and 238 deletions.
12 changes: 9 additions & 3 deletions Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,20 @@
CS1574: XML comment has cref attribute that could not be resolved
CS1580: Invalid type for parameter 'parameter number' in XML comment cref attribute
CS1581: Invalid return type in XML comment cref attribute
CS1584: XML comment has syntactically incorrect cref attribute
CS1584: XML comment has syntactically incorrect cref attribute
CS1587: XML comment is not placed on a valid language element
CS1589: The syntax of a tag which referenced a file was incorrect
CS1590: Invalid XML include element Missing file attribute
CS1591: Missing XML comment for publicly visible type or member
CS1592: Badly formed XML in included comments file
CS1598: XML parser could not be loaded. The XML documentation file will not be generated.
CS1658: Identifier expected; 'true' is a keyword
CS1658: Identifier expected; 'true' is a keyword
CS1710: XML comment on 'type' has a duplicate typeparam tag for 'parameter'
CS1711: XML comment has a typeparam tag, but there is no type parameter by that name
CS1712: Type parameter has no matching typeparam tag in the XML comment
CS1723: XML comment has cref attribute that refers to a type parameter
CS1734: XML comment has a paramref tag, but there is no parameter by that name -->
<WarningsAsErrors>nullable,CS0419,CS1570,CS1571,CS1572,CS1573,CS1574,CS1580,CS1581,CS1584,CS1589,CS1590,CS1592,CS1598,CS1658,CS1734</WarningsAsErrors>
<WarningsAsErrors>nullable,CS0419,CS1570,CS1571,CS1572,CS1573,CS1574,CS1580,CS1581,CS1584,CS1587,CS1589,CS1590,CS1591,CS1592,CS1598,CS1658,CS1710,CS1711,CS1712,CS1723,CS1734</WarningsAsErrors>
</PropertyGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,62 +14,115 @@
</ContentPage.Resources>

<ScrollView>
<Grid
RowDefinitions="64, 24, 64, 24, auto, 60, 60, 60"
<VerticalStackLayout
Spacing="20"
Padding="30,0">

<Label
Grid.Row="0"
Text="SpeechToText allows the user to convert speech to text in real time"/>
Text="SpeechToText allows the user to convert speech to text in real time"
HorizontalTextAlignment="Center"/>

<Label
Grid.Row="1"
Text="Locale"
FontAttributes="Bold"/>

<Picker
Grid.Row="2"
ItemsSource="{Binding Locales}"
SelectedItem="{Binding CurrentLocale}"
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"
Margin="0,0,0,20">
</Picker>
ItemDisplayBinding="{Binding ., Converter={StaticResource PickerLocaleDisplayConverter}}"/>

<Label
Grid.Row="3"
Text="Language Output"
FontAttributes="Bold"/>

<Label
Grid.Row="4"
Text="{Binding RecognitionText}"
FontSize="18"
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
MinimumHeightRequest="100"
Margin="0,0,0,20" />
MinimumHeightRequest="100" />

<Button
Grid.Row="5"
Text="Play"
Command="{Binding PlayCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>
HorizontalOptions="Center" />

<Button
Grid.Row="6"
Text="Listen"
Command="{Binding ListenCommand}"
HorizontalOptions="Center"
Margin="0,0,0,20"/>
<Border
StrokeThickness="2"
Stroke="#808080"
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>

<Button
Grid.Row="7"
Text="Stop Listening"
Command="{Binding ListenCancelCommand}"
HorizontalOptions="Center"/>
<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
ColumnSpacing="12">

<Button
Grid.Row="0"
Grid.Column="0"
Text="ListenAsync"
Command="{Binding ListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="Cancel Token"
Command="{Binding ListenCancelCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `ListenAsync` API allows you to await the final speech recognition results using async/await. `ListenAsync` is cancelled via CancellationToken."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>

</Grid>
</Grid>
</Border.Content>
</Border>

<Border
StrokeThickness="2"
Stroke="#808080"
StrokeShape="RoundRectangle 8,8,8,8"
Padding="12">
<Border.Content>
<Grid RowDefinitions="*,60"
ColumnDefinitions="*,*"
RowSpacing="12"
ColumnSpacing="12">

<Button
Grid.Row="0"
Grid.Column="0"
Text="StartListenAsync"
Command="{Binding StartListenCommand}"
HorizontalOptions="End" />

<Button
Grid.Row="0"
Grid.Column="1"
Text="StopListenAsync"
Command="{Binding StopListenCommand}"
HorizontalOptions="Start" />

<Label
Grid.Row="1"
Grid.ColumnSpan="2"
Text="The `StartListenAsync` API starts the speech-to-text service and shares the results using `RecognitionResultUpdated` event and `RecognitionResultCompleted` event."
HorizontalOptions="Center"
HorizontalTextAlignment="Center"
FontSize="12"/>

</Grid>
</Border.Content>
</Border>

</VerticalStackLayout>
</ScrollView>

</pages:BasePage>
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using System.Diagnostics;
using CommunityToolkit.Mvvm.ComponentModel;
using CommunityToolkit.Mvvm.Input;
using Microsoft.Maui.Dispatching;

namespace CommunityToolkit.Maui.Sample.ViewModels.Converters;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,23 @@ public partial class SpeechToTextViewModel : BaseViewModel
[ObservableProperty]
string? recognitionText = "Welcome to .NET MAUI Community Toolkit!";

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(ListenCommand))]
bool canListenExecute = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StartListenCommand))]
bool canStartListenExecute = true;

[ObservableProperty, NotifyCanExecuteChangedFor(nameof(StopListenCommand))]
bool canStopListenExecute = false;

public SpeechToTextViewModel(ITextToSpeech textToSpeech, ISpeechToText speechToText)
{
this.textToSpeech = textToSpeech;
this.speechToText = speechToText;

Locales.CollectionChanged += HandleLocalesCollectionChanged;
this.speechToText.StateChanged += HandleSpeechToTextStateChanged;
this.speechToText.RecognitionResultCompleted += HandleRecognitionResultCompleted;
}

public ObservableCollection<Locale> Locales { get; } = new();
Expand Down Expand Up @@ -59,9 +70,63 @@ async Task Play(CancellationToken cancellationToken)
}, cancellationToken);
}

[RelayCommand(IncludeCancelCommand = true)]
[RelayCommand(IncludeCancelCommand = true, CanExecute = nameof(CanListenExecute))]
async Task Listen(CancellationToken cancellationToken)
{
CanStartListenExecute = false;

try
{
var isGranted = await speechToText.RequestPermissions(cancellationToken);
if (!isGranted)
{
await Toast.Make("Permission not granted").Show(CancellationToken.None);
return;
}

const string beginSpeakingPrompt = "Begin speaking...";

RecognitionText = beginSpeakingPrompt;

var recognitionResult = await speechToText.ListenAsync(
CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
new Progress<string>(partialText =>
{
if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
RecognitionText += partialText + " ";
}), cancellationToken);

if (recognitionResult.IsSuccessful)
{
RecognitionText = recognitionResult.Text;
}
else
{
await Toast.Make(recognitionResult.Exception?.Message ?? "Unable to recognize speech").Show(CancellationToken.None);
}

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}
finally
{
CanStartListenExecute = true;
}
}

[RelayCommand(CanExecute = nameof(CanStartListenExecute))]
async Task StartListen(CancellationToken cancellationToken)
{
CanListenExecute = false;
CanStartListenExecute = false;
CanStopListenExecute = true;

var isGranted = await speechToText.RequestPermissions(cancellationToken);
if (!isGranted)
{
Expand All @@ -73,33 +138,43 @@ async Task Listen(CancellationToken cancellationToken)

RecognitionText = beginSpeakingPrompt;

var recognitionResult = await speechToText.ListenAsync(
CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage),
new Progress<string>(partialText =>
{
if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
RecognitionText += partialText + " ";
}), cancellationToken);
await speechToText.StartListenAsync(CultureInfo.GetCultureInfo(CurrentLocale?.Language ?? defaultLanguage), cancellationToken);

if (recognitionResult.IsSuccessful)
{
RecognitionText = recognitionResult.Text;
}
else
{
await Toast.Make(recognitionResult.Exception?.Message ?? "Unable to recognize speech").Show(CancellationToken.None);
}
speechToText.RecognitionResultUpdated += HandleRecognitionResultUpdated;

if (RecognitionText is beginSpeakingPrompt)
{
RecognitionText = string.Empty;
}
}

[RelayCommand(CanExecute = nameof(CanStopListenExecute))]
Task StopListen(CancellationToken cancellationToken)
{
CanListenExecute = true;
CanStartListenExecute = true;
CanStopListenExecute = false;

speechToText.RecognitionResultUpdated -= HandleRecognitionResultUpdated;

return speechToText.StopListenAsync(cancellationToken);
}

void HandleRecognitionResultUpdated(object? sender, SpeechToTextRecognitionResultUpdatedEventArgs e)
{
RecognitionText += e.RecognitionResult;
}

void HandleRecognitionResultCompleted(object? sender, SpeechToTextRecognitionResultCompletedEventArgs e)
{
RecognitionText = e.RecognitionResult;
}

async void HandleSpeechToTextStateChanged(object? sender, SpeechToTextStateChangedEventArgs e)
{
await Toast.Make($"State Changed: {e.State}").Show(CancellationToken.None);
}

void HandleLocalesCollectionChanged(object? sender, NotifyCollectionChangedEventArgs e)
{
OnPropertyChanged(nameof(CurrentLocale));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace CommunityToolkit.Maui.Media;

/// <summary>
/// <see cref="EventArgs"/> for <see cref="ISpeechToText.RecognitionResultCompleted"/>
/// </summary>
public class SpeechToTextRecognitionResultCompletedEventArgs : EventArgs
{
/// <summary>
/// Initialize a new instance of <see cref="SpeechToTextRecognitionResultCompletedEventArgs"/>
/// </summary>
public SpeechToTextRecognitionResultCompletedEventArgs(string recognitionResult)
{
RecognitionResult = recognitionResult;
}

/// <summary>
/// Speech recognition result
/// </summary>
public string RecognitionResult { get; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace CommunityToolkit.Maui.Media;

/// <summary>
/// <see cref="EventArgs"/> for <see cref="ISpeechToText.RecognitionResultUpdated"/>
/// </summary>
public class SpeechToTextRecognitionResultUpdatedEventArgs : EventArgs
{
/// <summary>
/// Initialize a new instance of <see cref="SpeechToTextRecognitionResultUpdatedEventArgs"/>
/// </summary>
public SpeechToTextRecognitionResultUpdatedEventArgs(string recognitionResult)
{
RecognitionResult = recognitionResult;
}

/// <summary>
/// Speech recognition result
/// </summary>
public string RecognitionResult { get; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
namespace CommunityToolkit.Maui.Media;

/// <summary>
/// <see cref="EventArgs"/> for <see cref="ISpeechToText.StateChanged"/>
/// </summary>
public class SpeechToTextStateChangedEventArgs : EventArgs
{
/// <summary>
/// Initialize a new instance of <see cref="SpeechToTextStateChangedEventArgs"/>
/// </summary>
public SpeechToTextStateChangedEventArgs(SpeechToTextState state)
{
State = state;
}

/// <summary>
/// Speech To Text State
/// </summary>
public SpeechToTextState State { get; }
}
Loading

0 comments on commit 429b205

Please sign in to comment.