news3 / app.py
salomonsky's picture
Update app.py
fa691f6 verified
import gradio as gr
from gtts import gTTS
import os
from PIL import Image
from pydub import AudioSegment
import subprocess
import shutil
import math
def text_to_speech(text: str, output_filename="audio.mp3"):
try:
tts = gTTS(text=text, lang='es')
tts.save(output_filename)
return output_filename
except Exception as e:
raise Exception(f"Error al generar el audio con gTTS: {e}")
def get_audio_duration(audio_path):
if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
return 0
try:
audio = AudioSegment.from_file(audio_path)
return audio.duration_seconds
except Exception as e:
raise Exception(f"Error al obtener la duración del audio: {e}")
def process_image(img_path, target_width, target_height, output_folder, index):
try:
img = Image.open(img_path).convert("RGB")
original_width, original_height = img.size
target_ratio = target_width / target_height
image_ratio = original_width / original_height
if image_ratio > target_ratio:
new_width = int(original_height * target_ratio)
left = (original_width - new_width) / 2
img = img.crop((left, 0, left + new_width, original_height))
elif image_ratio < target_ratio:
new_height = int(original_width / target_ratio)
top = (original_height - new_height) / 2
img = img.crop((0, top, original_width, top + new_height))
img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
output_path = os.path.join(output_folder, f"processed_image_{index:03d}.png")
img.save(output_path)
return output_path
except Exception as e:
return None
def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
if not processed_images:
raise ValueError("No hay imágenes procesadas para crear el video.")
IMAGE_DURATION = 3
num_images = len(processed_images)
width, height = video_size
num_loops = math.ceil(audio_duration / (num_images * IMAGE_DURATION)) if (num_images * IMAGE_DURATION) > 0 else 1
filter_complex_chains = []
video_clips = []
total_clips = num_images * num_loops
input_commands = []
for img_path in processed_images * num_loops:
input_commands.extend(["-i", img_path])
for i in range(total_clips):
zoom = 1.2
filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
video_clips.append(f"[v{i}]")
concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter
command = ["ffmpeg", "-y"]
command.extend(input_commands)
command.extend([
"-filter_complex", filter_complex,
"-map", "[v]",
"-t", str(audio_duration),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
output_filename
])
try:
subprocess.run(command, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")
def combine_video_and_audio(video_path, audio_path, output_path):
command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
try:
subprocess.run(command, check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
raise Exception(f"Error al combinar video y audio: {e.stderr}")
def generate_tts_only(news_text_input):
if not news_text_input:
return "Por favor, escribe una noticia para generar el audio.", None
try:
audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
return "Audio generado con éxito.", audio_file
except Exception as e:
return f"Ocurrió un error al generar solo el audio: {e}", None
def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
processed_image_folder = "temp_processed_images"
final_output_video_path = "video_noticia_final.mp4"
temp_video_no_audio_path = "video_sin_audio.mp4"
temp_audio_file = "audio_para_video.mp3"
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
os.makedirs(processed_image_folder)
try:
if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
if isinstance(input_audio_file, str) and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
shutil.copy(input_audio_file, temp_audio_file)
else:
if not news_text_input:
raise ValueError("Escribe una noticia para generar el audio, ya que no se proporcionó una vista previa válida.")
text_to_speech(news_text_input, temp_audio_file)
audio_duration = get_audio_duration(temp_audio_file)
if audio_duration == 0: raise ValueError("La duración del audio es cero.")
target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
processed_images_paths = [p for p in processed_images_paths if p]
if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")
create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)
return "Video generado con éxito.", final_output_video_path
except Exception as e:
return f"Ocurrió un error: {e}", None
finally:
if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# � Creador de Videos de Noticias")
with gr.Row():
with gr.Column(scale=2):
news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
with gr.Accordion("Opciones de Audio (Opcional)", open=False):
generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
with gr.Column(scale=3):
output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
video_output = gr.Video(label="Video de la Noticia Generado")
generate_audio_button.click(
fn=generate_tts_only,
inputs=[news_input],
outputs=[audio_status_message, audio_output_preview]
)
generate_video_button.click(
fn=create_news_video_app,
inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
outputs=[output_message, video_output]
)
demo.launch()