Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import spaces | |
| from utils import InitModels, ModelConfigs | |
| class KyrgyzSTTApp: | |
| """Kyrgyz Speech-to-Text Application""" | |
| def __init__(self): | |
| self.model_manager = InitModels() | |
| self._initialize_models() | |
| def _initialize_models(self) -> None: | |
| """Initialize all available models""" | |
| self.model_manager.initialize_all_models() | |
| def transcribe(self, audio, model_name: str) -> str: | |
| """ | |
| Transcribe audio using the selected model | |
| Args: | |
| audio: Audio file path | |
| model_name: Name of the model to use | |
| Returns: | |
| Transcribed text | |
| """ | |
| import torch | |
| # Get model and move to GPU (activated by @spaces.GPU decorator) | |
| model = self.model_manager.get_model(model_name) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| # Get pipeline and tokenizer | |
| pipe = self.model_manager.get_pipeline(model_name) | |
| tokenizer = self.model_manager.get_tokenizer(model_name) | |
| # Update pipeline device | |
| pipe.model = model | |
| pipe.device = torch.device(device) | |
| # Inference logic (unchanged) | |
| text = pipe(audio)["text"] | |
| print(tokenizer.decode(tokenizer(text).input_ids)) | |
| return text | |
| def create_interface(self) -> gr.Interface: | |
| """Create and configure Gradio interface""" | |
| model_choices = list(ModelConfigs.get_all_configs().keys()) | |
| iface = gr.Interface( | |
| fn=self.transcribe, | |
| inputs=[ | |
| gr.Audio(type="filepath", label="Audio Input"), | |
| gr.Dropdown( | |
| choices=model_choices, | |
| value=model_choices[1], # Default to Medium | |
| label="Select Model" | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| label="Transcript", | |
| lines=5, | |
| show_copy_button=True | |
| ), | |
| title="Kyrgyz Speech-to-Text", | |
| description="Multi-language speech recognition for Kyrgyz, English, and Russian.\nAudio must be up to 30 seconds long!", | |
| theme=gr.themes.Ocean() | |
| ) | |
| return iface | |
| def launch(self) -> None: | |
| """Launch the Gradio interface""" | |
| iface = self.create_interface() | |
| iface.launch() | |
| if __name__ == "__main__": | |
| app = KyrgyzSTTApp() | |
| app.launch() | |