EKNA_V1 / app.py
fireedman's picture
Update app.py
561a65c verified
# interfaceV3.py
import gradio as gr
import shutil
import os
import subprocess
import sys
subprocess.run(["python", "src/setup_wav2lip.py"])
# AÑADIR RUTA DEL MÓDULO CUSTOM
sys.path.append(os.path.abspath("./src"))
from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
from call_openai_api import moni as rtff
# Rutas
AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav")
VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4")
TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt")
RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav")
RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav")
RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4")
TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py")
RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py")
def transcribir_con_progreso(audio_path):
progreso = gr.Progress()
progreso(0, "Iniciando transcripción...")
model_name = "openai/whisper-large"
progreso(25, "Cargando modelo Whisper...")
transcripcion = transcribe_audio(audio_path, model_name)
progreso(75, "Guardando transcripción...")
guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
progreso(100, "Transcripción completada.")
return transcripcion
def generar_audio_desde_texto():
print("Ejecutando text_to_speech...")
result = subprocess.run(
[sys.executable, TEXT_TO_SPEECH_PATH],
capture_output=True,
text=True
)
print("stdout:", result.stdout)
print("stderr:", result.stderr)
if result.returncode != 0:
raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
if os.path.exists(RESULT_AUDIO_TEMP_PATH):
os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
print(f"Audio copiado a: {RESULT_AUDIO_FINAL_PATH}")
return RESULT_AUDIO_FINAL_PATH
else:
print("Audio temporal no encontrado")
return None
def procesar_video_audio():
print("Iniciando procesamiento de video...")
print("Audio de entrada:", RESULT_AUDIO_FINAL_PATH)
print("Video de entrada:", VIDEO_PATH)
result = subprocess.run(
[sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
capture_output=True,
text=True
)
print("stdout:", result.stdout)
print("stderr:", result.stderr)
if os.path.exists(RESULT_VIDEO_PATH):
print("Video generado:", RESULT_VIDEO_PATH)
return RESULT_VIDEO_PATH
else:
print("No se generó el video")
return None
def flujo_completo(audio_file_path):
try:
os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
shutil.copy(audio_file_path, AUDIO_RECORD_PATH)
print("Audio grabado copiado a:", AUDIO_RECORD_PATH)
transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
print("Texto transcrito:", transcripcion)
respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
print("Respuesta de OpenAI:", respuesta_openai)
audio_generado = generar_audio_desde_texto()
video_path = procesar_video_audio()
return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
except Exception as e:
return (
f"Error durante el flujo completo: {str(e)}",
None,
f"Error: {str(e)}",
None,
None
)
def interfaz():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
audio_input = gr.Audio(label="Graba tu voz", type="filepath", format="wav")
estado_grabacion = gr.Textbox(label="Estado", interactive=False)
with gr.Column():
output_audio = gr.Audio(label="Audio grabado", interactive=False)
output_audio_speech = gr.Audio(label="Audio TTS", interactive=False)
video_resultado = gr.Video(label="Video procesado", interactive=False)
texto_transcripcion = gr.Textbox(label="Texto transcrito")
audio_input.change(
flujo_completo,
inputs=audio_input,
outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
)
return demo
if __name__ == "__main__":
demo = interfaz()
demo.launch()