# interfaceV3.py import gradio as gr import shutil import os import subprocess import sys subprocess.run(["python", "src/setup_wav2lip.py"]) # AÑADIR RUTA DEL MÓDULO CUSTOM sys.path.append(os.path.abspath("./src")) from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion from call_openai_api import moni as rtff # Rutas AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav") VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4") TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt") RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav") RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav") RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4") TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py") RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py") def transcribir_con_progreso(audio_path): progreso = gr.Progress() progreso(0, "Iniciando transcripción...") model_name = "openai/whisper-large" progreso(25, "Cargando modelo Whisper...") transcripcion = transcribe_audio(audio_path, model_name) progreso(75, "Guardando transcripción...") guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH) progreso(100, "Transcripción completada.") return transcripcion def generar_audio_desde_texto(): print("Ejecutando text_to_speech...") result = subprocess.run( [sys.executable, TEXT_TO_SPEECH_PATH], capture_output=True, text=True ) print("stdout:", result.stdout) print("stderr:", result.stderr) if result.returncode != 0: raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}") if os.path.exists(RESULT_AUDIO_TEMP_PATH): os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True) shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH) print(f"Audio copiado a: {RESULT_AUDIO_FINAL_PATH}") return RESULT_AUDIO_FINAL_PATH else: print("Audio temporal no encontrado") return None def procesar_video_audio(): print("Iniciando procesamiento de video...") print("Audio de entrada:", RESULT_AUDIO_FINAL_PATH) print("Video de entrada:", VIDEO_PATH) result = subprocess.run( [sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH], capture_output=True, text=True ) print("stdout:", result.stdout) print("stderr:", result.stderr) if os.path.exists(RESULT_VIDEO_PATH): print("Video generado:", RESULT_VIDEO_PATH) return RESULT_VIDEO_PATH else: print("No se generó el video") return None def flujo_completo(audio_file_path): try: os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True) shutil.copy(audio_file_path, AUDIO_RECORD_PATH) print("Audio grabado copiado a:", AUDIO_RECORD_PATH) transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH) print("Texto transcrito:", transcripcion) respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH) print("Respuesta de OpenAI:", respuesta_openai) audio_generado = generar_audio_desde_texto() video_path = procesar_video_audio() return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path except Exception as e: return ( f"Error durante el flujo completo: {str(e)}", None, f"Error: {str(e)}", None, None ) def interfaz(): with gr.Blocks() as demo: with gr.Row(): with gr.Column(): gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500) audio_input = gr.Audio(label="Graba tu voz", type="filepath", format="wav") estado_grabacion = gr.Textbox(label="Estado", interactive=False) with gr.Column(): output_audio = gr.Audio(label="Audio grabado", interactive=False) output_audio_speech = gr.Audio(label="Audio TTS", interactive=False) video_resultado = gr.Video(label="Video procesado", interactive=False) texto_transcripcion = gr.Textbox(label="Texto transcrito") audio_input.change( flujo_completo, inputs=audio_input, outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado] ) return demo if __name__ == "__main__": demo = interfaz() demo.launch()