Spaces:

nineninesix
/

Kyrgyz-Speech-To-Text

Running on Zero

App Files Files Community

Kyrgyz-Speech-To-Text / app.py

Simonlob

Update app.py

752db35 verified 23 days ago

raw

history blame contribute delete

2.46 kB

	import gradio as gr
	import spaces
	from utils import InitModels, ModelConfigs


	class KyrgyzSTTApp:
	"""Kyrgyz Speech-to-Text Application"""

	def __init__(self):
	self.model_manager = InitModels()
	self._initialize_models()

	def _initialize_models(self) -> None:
	"""Initialize all available models"""
	self.model_manager.initialize_all_models()

	@spaces.GPU
	def transcribe(self, audio, model_name: str) -> str:
	"""
	Transcribe audio using the selected model

	Args:
	audio: Audio file path
	model_name: Name of the model to use

	Returns:
	Transcribed text
	"""
	import torch

	# Get model and move to GPU (activated by @spaces.GPU decorator)
	model = self.model_manager.get_model(model_name)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model.to(device)

	# Get pipeline and tokenizer
	pipe = self.model_manager.get_pipeline(model_name)
	tokenizer = self.model_manager.get_tokenizer(model_name)

	# Update pipeline device
	pipe.model = model
	pipe.device = torch.device(device)

	# Inference logic (unchanged)
	text = pipe(audio)["text"]
	print(tokenizer.decode(tokenizer(text).input_ids))

	return text

	def create_interface(self) -> gr.Interface:
	"""Create and configure Gradio interface"""
	model_choices = list(ModelConfigs.get_all_configs().keys())

	iface = gr.Interface(
	fn=self.transcribe,
	inputs=[
	gr.Audio(type="filepath", label="Audio Input"),
	gr.Dropdown(
	choices=model_choices,
	value=model_choices[1], # Default to Medium
	label="Select Model"
	)
	],
	outputs=gr.Textbox(
	label="Transcript",
	lines=5,
	show_copy_button=True
	),
	title="Kyrgyz Speech-to-Text",
	description="Multi-language speech recognition for Kyrgyz, English, and Russian.\nAudio must be up to 30 seconds long!",
	theme=gr.themes.Ocean()
	)

	return iface

	def launch(self) -> None:
	"""Launch the Gradio interface"""
	iface = self.create_interface()
	iface.launch()


	if __name__ == "__main__":
	app = KyrgyzSTTApp()
	app.launch()