Skip to content

Commit 31da48b

Browse files
committed
chore(SpeechToTextButton): add integration example with MEAI
1 parent e8573d5 commit 31da48b

File tree

18 files changed

+594
-0
lines changed

18 files changed

+594
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# SpeechToTextIntegration Demo
2+
3+
This project demonstrates the integration of the Telerik UI for Blazor `SpeechToTextButton` component with a transcription model, such as OpenAI's `whisper-1`. It provides a simple Blazor UI for recording audio and transcribing speech to text, showcasing how to connect the UI component to a backend speech-to-text service.
4+
5+
## Main Purpose
6+
- **Showcase**: Illustrates how to use the Telerik `SpeechToTextButton` in a Blazor application.
7+
- **Integration**: Demonstrates sending recorded audio to a transcription model (e.g., OpenAI Whisper) and displaying the transcribed text in the UI.
8+
- **Extensibility**: Serves as a starting point for integrating other speech-to-text models or services.
9+
10+
## Configuration Notes
11+
- **Model Registration**: The setup for registering a transcription model (such as OpenAI Whisper or others) may vary. Refer to the specific model's documentation for registration and authentication steps.
12+
- **Audio Recording**: The requirements for the recorded audio (file size, type, encoding, etc.) depend on the chosen transcription model. Ensure that the audio format produced by the UI matches the model's expected input.
13+
- **Customization**: You may need to adjust the audio recording logic or backend integration to support different models or to optimize for accuracy and performance.
14+
15+
---
16+
For more details, see the source code and comments in the `Home.razor` component.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
2+
Microsoft Visual Studio Solution File, Format Version 12.00
3+
# Visual Studio Version 17
4+
VisualStudioVersion = 17.14.36109.1 d17.14
5+
MinimumVisualStudioVersion = 10.0.40219.1
6+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SpeechToTextIntegration", "SpeechToTextIntegration\SpeechToTextIntegration.csproj", "{3F2BEC52-4F23-42C6-8791-3DC6CA813DB1}"
7+
EndProject
8+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Telerik.Blazor", "..\..\..\..\blazor\Telerik.Blazor\Telerik.Blazor.csproj", "{AF9263B3-0FD2-6644-74FE-84A802165E95}"
9+
EndProject
10+
Global
11+
GlobalSection(SolutionConfigurationPlatforms) = preSolution
12+
Debug|Any CPU = Debug|Any CPU
13+
Release|Any CPU = Release|Any CPU
14+
EndGlobalSection
15+
GlobalSection(ProjectConfigurationPlatforms) = postSolution
16+
{3F2BEC52-4F23-42C6-8791-3DC6CA813DB1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17+
{3F2BEC52-4F23-42C6-8791-3DC6CA813DB1}.Debug|Any CPU.Build.0 = Debug|Any CPU
18+
{3F2BEC52-4F23-42C6-8791-3DC6CA813DB1}.Release|Any CPU.ActiveCfg = Release|Any CPU
19+
{3F2BEC52-4F23-42C6-8791-3DC6CA813DB1}.Release|Any CPU.Build.0 = Release|Any CPU
20+
{AF9263B3-0FD2-6644-74FE-84A802165E95}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21+
{AF9263B3-0FD2-6644-74FE-84A802165E95}.Debug|Any CPU.Build.0 = Debug|Any CPU
22+
{AF9263B3-0FD2-6644-74FE-84A802165E95}.Release|Any CPU.ActiveCfg = Release|Any CPU
23+
{AF9263B3-0FD2-6644-74FE-84A802165E95}.Release|Any CPU.Build.0 = Release|Any CPU
24+
EndGlobalSection
25+
GlobalSection(SolutionProperties) = preSolution
26+
HideSolutionNode = FALSE
27+
EndGlobalSection
28+
GlobalSection(ExtensibilityGlobals) = postSolution
29+
SolutionGuid = {1E0CB172-1F2C-4A5B-8DC3-67C1D8A23B53}
30+
EndGlobalSection
31+
EndGlobal
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
4+
<head>
5+
<meta charset="utf-8" />
6+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
7+
<base href="/" />
8+
<link rel="stylesheet" href="bootstrap/bootstrap.min.css" />
9+
<link rel="stylesheet" href="app.css" />
10+
<link rel="stylesheet" href="SpeechToTextIntegration.styles.css" />
11+
<link rel="icon" type="image/png" href="favicon.png" />
12+
<link href="https://unpkg.com/@@progress/kendo-theme-default@@11.0.1/dist/default-main.css" rel="stylesheet" />
13+
<script src="_content/Telerik.UI.for.Blazor/js/telerik-blazor.js" defer></script>
14+
<HeadOutlet @rendermode="InteractiveServer" />
15+
</head>
16+
17+
<body>
18+
<Routes @rendermode="InteractiveServer" />
19+
<script src="_framework/blazor.web.js"></script>
20+
</body>
21+
22+
</html>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
@inherits LayoutComponentBase
2+
3+
<div class="page">
4+
<main>
5+
<article class="content px-4">
6+
@Body
7+
</article>
8+
</main>
9+
</div>
10+
11+
<div id="blazor-error-ui">
12+
An unhandled error has occurred.
13+
<a href="" class="reload">Reload</a>
14+
<a class="dismiss">🗙</a>
15+
</div>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
.page {
2+
position: relative;
3+
display: flex;
4+
flex-direction: column;
5+
}
6+
7+
main {
8+
flex: 1;
9+
}
10+
11+
.sidebar {
12+
background-image: linear-gradient(180deg, rgb(5, 39, 103) 0%, #3a0647 70%);
13+
}
14+
15+
.top-row {
16+
background-color: #f7f7f7;
17+
border-bottom: 1px solid #d6d5d5;
18+
justify-content: flex-end;
19+
height: 3.5rem;
20+
display: flex;
21+
align-items: center;
22+
}
23+
24+
.top-row ::deep a, .top-row ::deep .btn-link {
25+
white-space: nowrap;
26+
margin-left: 1.5rem;
27+
text-decoration: none;
28+
}
29+
30+
.top-row ::deep a:hover, .top-row ::deep .btn-link:hover {
31+
text-decoration: underline;
32+
}
33+
34+
.top-row ::deep a:first-child {
35+
overflow: hidden;
36+
text-overflow: ellipsis;
37+
}
38+
39+
@media (max-width: 640.98px) {
40+
.top-row {
41+
justify-content: space-between;
42+
}
43+
44+
.top-row ::deep a, .top-row ::deep .btn-link {
45+
margin-left: 0;
46+
}
47+
}
48+
49+
@media (min-width: 641px) {
50+
.page {
51+
flex-direction: row;
52+
}
53+
54+
.sidebar {
55+
width: 250px;
56+
height: 100vh;
57+
position: sticky;
58+
top: 0;
59+
}
60+
61+
.top-row {
62+
position: sticky;
63+
top: 0;
64+
z-index: 1;
65+
}
66+
67+
.top-row.auth ::deep a:first-child {
68+
flex: 1;
69+
text-align: right;
70+
width: 0;
71+
}
72+
73+
.top-row, article {
74+
padding-left: 2rem !important;
75+
padding-right: 1.5rem !important;
76+
}
77+
}
78+
79+
#blazor-error-ui {
80+
background: lightyellow;
81+
bottom: 0;
82+
box-shadow: 0 -1px 2px rgba(0, 0, 0, 0.2);
83+
display: none;
84+
left: 0;
85+
padding: 0.6rem 1.25rem 0.7rem 1.25rem;
86+
position: fixed;
87+
width: 100%;
88+
z-index: 1000;
89+
}
90+
91+
#blazor-error-ui .dismiss {
92+
cursor: pointer;
93+
position: absolute;
94+
right: 0.75rem;
95+
top: 0.5rem;
96+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
@page "/"
2+
3+
@using Microsoft.Extensions.AI
4+
5+
@inject IJSRuntime JSRuntime
6+
@inject ISpeechToTextClient SpeechToTextClient
7+
8+
<TelerikTextArea @bind-Value="@TextValue"
9+
Width="300px"
10+
ShowSuffixSeparator="false">
11+
<TextAreaSuffixTemplate>
12+
<span class="k-spacer"></span>
13+
<TelerikSpeechToTextButton OnStart="@OnStartHandler"
14+
OnEnd="@OnEndHandler"
15+
FillMode="@ThemeConstants.Button.FillMode.Flat"
16+
IntegrationMode="@SpeechToTextButtonIntegrationMode.None">
17+
</TelerikSpeechToTextButton>
18+
</TextAreaSuffixTemplate>
19+
</TelerikTextArea>
20+
21+
@code {
22+
private string TextValue { get; set; } = string.Empty;
23+
private DotNetObjectReference<Home>? dotNetObjectReference;
24+
25+
private async Task OnStartHandler()
26+
{
27+
await JSRuntime.InvokeVoidAsync("speechRecognitionStarted");
28+
}
29+
30+
private async Task OnEndHandler()
31+
{
32+
await JSRuntime.InvokeVoidAsync("speechRecognitionEnded");
33+
}
34+
35+
protected override async Task OnAfterRenderAsync(bool firstRender)
36+
{
37+
if (firstRender)
38+
{
39+
try
40+
{
41+
await JSRuntime.InvokeVoidAsync("initializeSpeechToTextButton");
42+
43+
dotNetObjectReference = DotNetObjectReference.Create(this);
44+
45+
await JSRuntime.InvokeVoidAsync("setDotNetObjectReference", dotNetObjectReference);
46+
}
47+
catch (Exception ex)
48+
{
49+
Console.Error.WriteLine($"JSInterop failed: {ex.Message}");
50+
}
51+
}
52+
53+
await base.OnAfterRenderAsync(firstRender);
54+
}
55+
56+
[JSInvokable("OnRecordedAudio")]
57+
public async Task OnRecordedAudio(byte[] audioBytes)
58+
{
59+
if (audioBytes == null || audioBytes.Length == 0)
60+
{
61+
return;
62+
}
63+
64+
try
65+
{
66+
using var stream = new MemoryStream(audioBytes);
67+
68+
await GetSpeechToTextResponse(stream);
69+
}
70+
catch (Exception e)
71+
{
72+
Console.WriteLine(e.Message);
73+
return;
74+
}
75+
}
76+
77+
private async Task GetSpeechToTextResponse(MemoryStream stream)
78+
{
79+
var response = await SpeechToTextClient.GetTextAsync(stream);
80+
TextValue = response.Text;
81+
StateHasChanged();
82+
}
83+
}
84+
85+
<script>
86+
// Function to initialize the speechToTextButton object
87+
window.initializeSpeechToTextButton = function() {
88+
console.log("Initializing speechToTextButton object...");
89+
90+
// Create a dedicated object for speech-to-text functionality
91+
window.speechToTextButton = {
92+
// Properties
93+
mediaRecorder: null,
94+
recordingAborted: false,
95+
audioChunks: [],
96+
stream: null,
97+
98+
// Methods
99+
bindMediaRecorderEvents() {
100+
console.log("Binding media recorder events...");
101+
this.mediaRecorder.onstart = () => this.onStart();
102+
this.mediaRecorder.ondataavailable = (e) => this.audioChunks.push(e.data);
103+
this.mediaRecorder.onstop = async () => {
104+
if (this.mediaRecorder) {
105+
const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
106+
const arrayBuffer = await audioBlob.arrayBuffer();
107+
const uint8Array = new Uint8Array(arrayBuffer);
108+
// Call back to Blazor with the recorded audio data
109+
try {
110+
if (window.dotNetObjectReference) {
111+
await window.dotNetObjectReference.invokeMethodAsync("OnRecordedAudio", uint8Array);
112+
} else {
113+
console.warn("dotNetObjectReference is not set.");
114+
}
115+
} catch (error) {
116+
console.error("Error calling OnRecordedAudio:", error);
117+
}
118+
this.audioChunks = [];
119+
this.unbindMediaRecorderEvents();
120+
this.onEnd();
121+
}
122+
};
123+
},
124+
125+
unbindMediaRecorderEvents() {
126+
console.log("Unbinding media recorder events...");
127+
if (this.stream) {
128+
this.stream.getTracks().forEach(track => track.stop());
129+
this.stream = null;
130+
}
131+
if (this.mediaRecorder) {
132+
this.mediaRecorder.onstart = null;
133+
this.mediaRecorder.ondataavailable = null;
134+
this.mediaRecorder.onstop = null;
135+
this.mediaRecorder.onerror = null;
136+
if (this.mediaRecorder.stream) {
137+
this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
138+
}
139+
this.mediaRecorder = null;
140+
}
141+
},
142+
143+
async startMediaRecorder() {
144+
console.log("Starting media recorder...");
145+
this.recordingAborted = false;
146+
this.stream = await navigator.mediaDevices.getUserMedia({ audio: true });
147+
this.mediaRecorder = new MediaRecorder(this.stream);
148+
this.bindMediaRecorderEvents();
149+
this.mediaRecorder.start();
150+
},
151+
152+
async stopMediaRecorder() {
153+
console.log("Stopping media recorder...");
154+
if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
155+
this.mediaRecorder.stop();
156+
}
157+
},
158+
159+
// Event callbacks
160+
onStart() {
161+
// add any additional logic here if necessary
162+
console.log("Media recorder started");
163+
},
164+
165+
onEnd() {
166+
// add any additional logic here if necessary
167+
console.log("Media recorder ended");
168+
},
169+
170+
// Public API methods
171+
async speechRecognitionStarted() {
172+
console.log("Speech recognition started - called from Blazor");
173+
await this.startMediaRecorder();
174+
},
175+
176+
async speechRecognitionEnded() {
177+
console.log("Speech recognition ended - called from Blazor");
178+
await this.stopMediaRecorder();
179+
},
180+
};
181+
182+
// Expose the API methods to window for Blazor interop
183+
window.speechRecognitionStarted = () => window.speechToTextButton.speechRecognitionStarted();
184+
window.speechRecognitionEnded = () => window.speechToTextButton.speechRecognitionEnded();
185+
window.setDotNetObjectReference = (value) => window.dotNetObjectReference = value;
186+
187+
console.log("speechToTextButton object initialized successfully");
188+
};
189+
190+
</script>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<Router AppAssembly="typeof(Program).Assembly">
2+
<Found Context="routeData">
3+
<RouteView RouteData="routeData" DefaultLayout="typeof(Layout.MainLayout)" />
4+
<FocusOnNavigate RouteData="routeData" Selector="h1" />
5+
</Found>
6+
</Router>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
@using System.Net.Http
2+
@using System.Net.Http.Json
3+
@using Microsoft.AspNetCore.Components.Forms
4+
@using Microsoft.AspNetCore.Components.Routing
5+
@using Microsoft.AspNetCore.Components.Web
6+
@using static Microsoft.AspNetCore.Components.Web.RenderMode
7+
@using Microsoft.AspNetCore.Components.Web.Virtualization
8+
@using Microsoft.JSInterop
9+
@using SpeechToTextIntegration
10+
@using SpeechToTextIntegration.Components
11+
12+
@using Telerik.Blazor
13+
@using Telerik.Blazor.Components

0 commit comments

Comments
 (0)