File size: 8,529 Bytes
d4757ae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 |
# interfaceV2.py
import gradio as gr
import sounddevice as sd
from scipy.io.wavfile import write
import tempfile
import shutil
import os
import subprocess
import sys
from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion
from call_openai_api import moni as rtff # Asegúrate de que el archivo call_open_api.py esté en el mismo directorio
# Paths to files (adjusted as per your specified structure)
AUDIO_RECORD_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/grabacion_gradio.wav")
#VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun_5s.mp4")
VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/video/data_video_sun.mp4")
#TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
TRANSCRIPTION_TEXT_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/transcripcion.txt")
RESULT_AUDIO_TEMP_PATH = os.path.abspath( "C:/programacionEjercicios/miwav2lipv6/results/audiov2.wav")
RESULT_AUDIO_FINAL_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/assets/audio/audio.wav")
RESULT_VIDEO_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/results/result_voice.mp4")
TEXT_TO_SPEECH_PATH = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/text_to_speech.py")
# Function to record 8-second audio
def grabar_audio(duration=8, sample_rate=44100):
print("Starting recording...")
audio_data = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)
print(f"Recording in progress for {duration} seconds...")
sd.wait()
print("Recording completed.")
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
write(temp_audio.name, sample_rate, audio_data)
print("Audio temporarily saved at:", temp_audio.name)
temp_audio.close() # Asegúrate de cerrarlo antes de usarlo
os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True)
shutil.copy(temp_audio.name, AUDIO_RECORD_PATH)
print(f"Recording copied to: {AUDIO_RECORD_PATH}")
return AUDIO_RECORD_PATH, "Recording completed."
# Function to transcribe audio with Whisper
def transcribir_con_progreso(audio_path):
progreso = gr.Progress()
progreso(0, "Starting transcription...")
model_name = "openai/whisper-large"
progreso(25, "Loading Whisper model...")
transcripcion = transcribe_audio(audio_path, model_name)
progreso(75, "Saving transcription...")
guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH)
progreso(100, "Transcription completed.")
if not os.path.exists(TRANSCRIPTION_TEXT_PATH):
raise FileNotFoundError(f"El archivo {TRANSCRIPTION_TEXT_PATH} no se generó.")
return transcripcion
# Function to convert text to audio using text_to_speech.py
def generar_audio_desde_texto():
print("Generating audio from text...")
result = subprocess.run(
[sys.executable, TEXT_TO_SPEECH_PATH],
capture_output=True,
text=True
)
if result.returncode != 0:
raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}")
if result.stdout:
print("Output:", result.stdout)
if result.stderr:
print("Errors:", result.stderr)
if os.path.exists(RESULT_AUDIO_TEMP_PATH):
print(f"Temporary audio generated at: {RESULT_AUDIO_TEMP_PATH}")
os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True)
shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH)
print(f"Final audio copied to: {RESULT_AUDIO_FINAL_PATH}")
return RESULT_AUDIO_FINAL_PATH
else:
print(f"Error: Audio file was not generated in {RESULT_AUDIO_FINAL_PATH} ")
return None
# Function to process video and audio using run_inference.py with the generated audio file
def procesar_video_audio():
print("Starting video and audio processing...")
run_inference_path = os.path.abspath("C:/programacionEjercicios/miwav2lipv6/src/run_inference.py")
result = subprocess.run(
[sys.executable, run_inference_path, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH],
capture_output=True,
text=True
)
if result.stdout:
print("Output:", result.stdout)
if result.stderr:
print("Errors:", result.stderr)
if os.path.exists(RESULT_VIDEO_PATH):
print(f"Processed video saved at: {RESULT_VIDEO_PATH}")
return RESULT_VIDEO_PATH
else:
print("Error: Video file was not generated at 'results/result_voice.mp4'")
return None
# Gradio Interface Configuration
def interfaz():
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500)
grabar_button = gr.Button("Comenzando la grabacion de audio")
estado_grabacion = gr.Textbox(label="Recording Status", interactive=False)
with gr.Column():
output_audio = gr.Audio(AUDIO_RECORD_PATH, label="Audio Grabado", interactive=False)
output_audio_speech = gr.Audio(RESULT_AUDIO_FINAL_PATH, label="Audio TTS", interactive=False)
video_resultado = gr.Video(RESULT_VIDEO_PATH,label="Video procesado", interactive=False)
texto_transcripcion = gr.Textbox(label="Texto transcrito")
progreso_transcripcion = gr.Textbox(label="Transcription Status", interactive=False)
# Full flow: recording, transcription, text-to-speech, and video processing
"""
def flujo_completo():
_, mensaje_grabacion = grabar_audio()
transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH)
audio_generado = generar_audio_desde_texto()
video_path = procesar_video_audio()
# Ensure function always returns 5 outputs for Gradio, even in error cases
if video_path and audio_generado:
return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path
else:
return mensaje_grabacion, AUDIO_RECORD_PATH, transcripcion, audio_generado or "Audio generation failed", video_path or "Video generation failed"
"""
def flujo_completo():
try:
print("Inicio del flujo completo...")
# Grabar audio
audio_path, mensaje_grabacion = grabar_audio()
print("Audio grabado en:", audio_path)
# Transcribir audio
transcripcion = transcribir_con_progreso(audio_path)
print("Transcripción completada:", transcripcion)
#respuesta_openai = rtff(transcripcion)
respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH)
print("Respuesta generada por OpenAI")
# Generar audio desde texto
audio_generado = generar_audio_desde_texto()
print("Audio generado:", audio_generado)
# Procesar video y audio
video_path = procesar_video_audio()
print("Video procesado en:", video_path)
# Devolver resultados si todo fue exitoso
return mensaje_grabacion, audio_path, transcripcion, audio_generado, video_path
except Exception as e:
# Imprime el error en la terminal y regresa mensajes de error a la interfaz
print("Error detectado en flujo completo:", str(e))
return (
"Error durante el flujo completo",
None, # Audio grabado
f"Error: {str(e)}", # Transcripción
None, # Audio generado
None # Video procesado
)
grabar_button.click(
flujo_completo,
outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado]
)
return demo
if __name__ == "__main__":
demo = interfaz()
demo.launch(allowed_paths=["C:/programacionEjercicios/miwav2lipv6/assets", "C:/programacionEjercicios/miwav2lipv6/results"])
|