Skip to content

Commit 4c72e43

Browse files
committed
first implementation
1 parent c09f416 commit 4c72e43

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
git+https://github.com/Uberi/speech_recognition.git@010382b
2+
PyAudio
3+
openai-whisper
4+
soundfile
5+
ffmpeg

extensions/whisper_stt/script.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import gradio as gr
2+
import speech_recognition as sr
3+
import modules.shared as shared
4+
5+
input_hijack = {
6+
'state': False,
7+
'value': ["", ""]
8+
}
9+
10+
11+
def input_modifier(string):
12+
return string
13+
14+
15+
def do_stt():
16+
transcription = ""
17+
r = sr.Recognizer()
18+
with sr.Microphone() as source:
19+
print("Say something!")
20+
r.adjust_for_ambient_noise(source)
21+
audio = r.listen(source)
22+
23+
# recognize speech using whisper
24+
try:
25+
transcription = r.recognize_whisper(audio, language="english", model="tiny.en")
26+
print("Whisper thinks you said " + transcription)
27+
except sr.UnknownValueError:
28+
print("Whisper could not understand audio")
29+
except sr.RequestError as e:
30+
print("Could not request results from Whisper")
31+
32+
# input_modifier(transcription)
33+
input_hijack.update({"state": True, "value": [transcription, transcription]})
34+
return transcription
35+
36+
37+
def ui():
38+
speech_button = gr.Button(value="STT")
39+
output_transcription = gr.Textbox(label="Speech Preview")
40+
speech_button.click(do_stt, outputs=[output_transcription])

0 commit comments

Comments
 (0)