Spaces:
Sleeping
Sleeping
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import gradio as gr | |
| from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq | |
| import torch | |
| import torchaudio | |
| # Charger le modèle audio-to-text | |
| model_name = "fixie-ai/ultravox-v0_4_1-llama-3_1-8b" | |
| # Initialiser le modèle et le processeur | |
| try: | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| processor = AutoProcessor.from_pretrained(model_name) | |
| model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name).to(device) | |
| print(f"Modèle chargé avec succès sur {device}") | |
| except Exception as e: | |
| print(f"Erreur lors du chargement du modèle: {e}") | |
| processor = None | |
| model = None | |
| def transcribe_audio(audio): | |
| """ | |
| Fonction pour transcrire l'audio en texte | |
| """ | |
| if model is None or processor is None: | |
| return "Erreur: Le modèle n'a pas pu être chargé." | |
| try: | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Charger l'audio | |
| waveform, sample_rate = torchaudio.load(audio) | |
| # Préparer l'input | |
| inputs = processor(waveform.squeeze().numpy(), sampling_rate=sample_rate, return_tensors="pt").to(device) | |
| # Générer la transcription | |
| with torch.no_grad(): | |
| generated_ids = model.generate(**inputs) | |
| # Décoder le résultat | |
| transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return transcription | |
| except Exception as e: | |
| return f"Erreur lors de la transcription: {str(e)}" | |
| # Interface Gradio | |
| interface = gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"), | |
| outputs=gr.Textbox(label="Transcription"), | |
| title="Audio to Text avec Ultravox", | |
| description="Uploadez un fichier audio ou enregistrez votre voix pour obtenir la transcription en texte." | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch(ssr_mode=False) | |