Skip to content

Commit 42e37a1

Browse files
committed
chore: wip
1 parent 8a730e7 commit 42e37a1

File tree

5 files changed

+55
-28
lines changed

5 files changed

+55
-28
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# SpeechToTextIntegration Demo
2+
3+
This project demonstrates the integration of the Telerik UI for Blazor `SpeechToTextButton` component with a transcription model, such as OpenAI's `whisper-1`. It provides a simple Blazor UI for recording audio and transcribing speech to text, showcasing how to connect the UI component to a backend speech-to-text service.
4+
5+
## Main Purpose
6+
- **Showcase**: Illustrates how to use the Telerik `SpeechToTextButton` in a Blazor application.
7+
- **Integration**: Demonstrates sending recorded audio to a transcription model (e.g., OpenAI Whisper) and displaying the transcribed text in the UI.
8+
- **Extensibility**: Serves as a starting point for integrating other speech-to-text models or services.
9+
10+
## Configuration Notes
11+
- **Model Registration**: The setup for registering a transcription model (such as OpenAI Whisper or others) may vary. Refer to the specific model's documentation for registration and authentication steps.
12+
- **Audio Recording**: The requirements for the recorded audio (file size, type, encoding, etc.) depend on the chosen transcription model. Ensure that the audio format produced by the UI matches the model's expected input.
13+
- **Customization**: You may need to adjust the audio recording logic or backend integration to support different models or to optimize for accuracy and performance.
14+
15+
---
16+
For more details, see the source code and comments in the `Home.razor` component.

common/microsoft-extensions-ai-integration/SpeechToTextIntegration/SpeechToTextIntegration/Components/App.razor

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
<link rel="stylesheet" href="SpeechToTextIntegration.styles.css" />
1111
<link rel="icon" type="image/png" href="favicon.png" />
1212
<link href="https://unpkg.com/@@progress/kendo-theme-default@@11.0.1/dist/default-main.css" rel="stylesheet" />
13+
<script src="_content/Telerik.UI.for.Blazor/js/telerik-blazor.js" defer></script>
1314
<HeadOutlet @rendermode="InteractiveServer" />
1415
</head>
1516

common/microsoft-extensions-ai-integration/SpeechToTextIntegration/SpeechToTextIntegration/Components/Pages/Home.razor

Lines changed: 31 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
@page "/"
2+
23
@using Microsoft.Extensions.AI
3-
@inject IJSRuntime JSRuntime
44

5+
@inject IJSRuntime JSRuntime
56
@inject ISpeechToTextClient SpeechToTextClient
67

78
<TelerikTextArea @bind-Value="@TextValue"
@@ -17,18 +18,16 @@
1718
</TextAreaSuffixTemplate>
1819
</TelerikTextArea>
1920

20-
21-
2221
@code {
2322
private string TextValue { get; set; } = string.Empty;
2423
private DotNetObjectReference<Home>? dotNetObjectReference;
2524

26-
private async void OnStartHandler()
25+
private async Task OnStartHandler()
2726
{
2827
await JSRuntime.InvokeVoidAsync("speechRecognitionStarted");
2928
}
3029

31-
private async void OnEndHandler()
30+
private async Task OnEndHandler()
3231
{
3332
await JSRuntime.InvokeVoidAsync("speechRecognitionEnded");
3433
}
@@ -37,11 +36,18 @@
3736
{
3837
if (firstRender)
3938
{
40-
await JSRuntime.InvokeVoidAsync("initializeSpeechToTextButton");
41-
42-
dotNetObjectReference = DotNetObjectReference.Create(this);
43-
44-
await JSRuntime.InvokeVoidAsync("setDotNetObjectReference", dotNetObjectReference);
39+
try
40+
{
41+
await JSRuntime.InvokeVoidAsync("initializeSpeechToTextButton");
42+
43+
dotNetObjectReference = DotNetObjectReference.Create(this);
44+
45+
await JSRuntime.InvokeVoidAsync("setDotNetObjectReference", dotNetObjectReference);
46+
}
47+
catch (Exception ex)
48+
{
49+
Console.Error.WriteLine($"JSInterop failed: {ex.Message}");
50+
}
4551
}
4652

4753
await base.OnAfterRenderAsync(firstRender);
@@ -55,10 +61,10 @@
5561
return;
5662
}
5763

58-
using var stream = new MemoryStream(audioBytes);
59-
6064
try
6165
{
66+
using var stream = new MemoryStream(audioBytes);
67+
6268
await GetSpeechToTextResponse(stream);
6369
}
6470
catch (Exception e)
@@ -96,17 +102,18 @@
96102
this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
97103
this.mediaRecorder.onstop = async () => {
98104
if (this.mediaRecorder) {
99-
if (!this.recordingAborted) {
100-
const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
101-
const arrayBuffer = await audioBlob.arrayBuffer();
102-
const uint8Array = new Uint8Array(arrayBuffer);
103-
// Call back to Blazor with the recorded audio data
104-
try {
105-
window.dotNetObjectReference.invokeMethodAsync("OnRecordedAudio", uint8Array);
106-
console.log("Successfully called OnRecordedAudio via component reference");
107-
} catch (error) {
108-
console.error("Error calling OnRecordedAudio:", error);
105+
const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
106+
const arrayBuffer = await audioBlob.arrayBuffer();
107+
const uint8Array = new Uint8Array(arrayBuffer);
108+
// Call back to Blazor with the recorded audio data
109+
try {
110+
if (window.dotNetObjectReference) {
111+
await window.dotNetObjectReference.invokeMethodAsync("OnRecordedAudio", uint8Array);
112+
} else {
113+
console.warn("dotNetObjectReference is not set.");
109114
}
115+
} catch (error) {
116+
console.error("Error calling OnRecordedAudio:", error);
110117
}
111118
this.audioChunks = [];
112119
this.unbindMediaRecorderEvents();
@@ -152,12 +159,12 @@
152159
// Event callbacks
153160
onStart() {
154161
// add any additional logic here if necessary
155-
console.log("Media recorder started");
162+
console.log("Media recorder started");
156163
},
157164
158165
onEnd() {
159166
// add any additional logic here if necessary
160-
console.log("Media recorder ended");
167+
console.log("Media recorder ended");
161168
},
162169
163170
// Public API methods

common/microsoft-extensions-ai-integration/SpeechToTextIntegration/SpeechToTextIntegration/Program.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,17 @@
55

66
// Add services to the container.
77
builder.Services.AddRazorComponents()
8-
.AddInteractiveServerComponents();
8+
.AddInteractiveServerComponents()
9+
.AddHubOptions(o => o.MaximumReceiveMessageSize = 4 * 1024 * 1024);
910

1011
builder.Services.AddTelerikBlazor();
1112

1213
#region AI Service Registration Start
1314

14-
var modelName = Environment.GetEnvironmentVariable("OPEN_AI_REPL_ASSISTANT_MODEL_NAME");
15-
var key = Environment.GetEnvironmentVariable("OPEN_AI_REPL_ASSISTANT_MODEL_API_KEY");
15+
// Get the appropriate environment variables for your model's service.
16+
var key = Environment.GetEnvironmentVariable("YOUR_TRANSCRIPTION_MODEL_API_KEY");
1617

17-
// Open AI whisper-1 model registration
18+
// Open AI whisper-1 model registration. Refer to your preferred model documentation for more details.
1819
builder.Services.AddDistributedMemoryCache();
1920
builder.Services.AddSpeechToTextClient(services =>
2021
new OpenAI.Audio.AudioClient("whisper-1", key).AsISpeechToTextClient());
@@ -32,6 +33,7 @@
3233
}
3334

3435
app.UseHttpsRedirection();
36+
app.UseDeveloperExceptionPage();
3537

3638
app.UseStaticFiles();
3739
app.UseAntiforgery();

common/microsoft-extensions-ai-integration/SpeechToTextIntegration/SpeechToTextIntegration/appsettings.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
{
2+
"DetailedErrors": true,
23
"Logging": {
34
"LogLevel": {
45
"Default": "Information",

0 commit comments

Comments
 (0)