Spaces:
Sleeping
Sleeping
File size: 2,020 Bytes
0f0361e 0da2c4e 0f0361e 0da2c4e 0f0361e 0da2c4e 0f0361e 0da2c4e 0f0361e 0da2c4e 0f0361e 8668b5d 0f0361e 0da2c4e 0f0361e 0da2c4e 0f0361e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
import torch
import speech_recognition as sr
from pydub import AudioSegment
import os
# Constants
MAX_AUDIO_DURATION = 600 # in seconds
# --- Helper: Convert audio to wav ---
def convert_audio_to_wav(file_path):
audio = AudioSegment.from_file(file_path)
wav_path = file_path.replace(file_path.split(".")[-1], "wav")
audio.export(wav_path, format="wav")
return wav_path
# --- Helper: Transcribe audio in chunks ---
def transcribe_audio_in_chunks(audio_path, chunk_duration=30):
recognizer = sr.Recognizer()
audio = AudioSegment.from_wav(audio_path)
if len(audio) > MAX_AUDIO_DURATION * 1000:
audio = audio[:MAX_AUDIO_DURATION * 1000]
full_text = []
for i in range(0, len(audio), chunk_duration * 1000):
chunk = audio[i: i + chunk_duration * 1000]
chunk_path = "temp_chunk.wav"
chunk.export(chunk_path, format="wav")
with sr.AudioFile(chunk_path) as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data, language="en-IN")
full_text.append(text)
except sr.UnknownValueError:
full_text.append("[Unrecognized Audio]")
except sr.RequestError as e:
full_text.append(f"[Speech Error: {e}]")
return " ".join(full_text)
# --- Main Function ---
def transcribe_audio(audio):
if not audio.endswith(".wav"):
audio = convert_audio_to_wav(audio)
transcription = transcribe_audio_in_chunks(audio)
return transcription
# --- Gradio UI ---
iface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(sources=["microphone", "upload"], type="filepath", label="Input English Audio")
],
outputs=[
gr.Textbox(label="Transcribed Text")
],
title="English Speech Recognition",
description="Upload or record English audio → Transcribe to text.",
allow_flagging="never"
)
iface.launch(debug=True, share=True)
|