Spaces:

salomonsky
/

news3

Running

File size: 8,043 Bytes

edb354a
ffd7703
edb354a
80d9746
0510ab1
 
 
80d9746
ffd7703
 
edb354a
ffd7703
 
d42851a
 
ffd7703
60edbd8
fc310b8
80d9746
 
fc310b8
 
 
 
80d9746
fc310b8
cea3c60
 
 
 
 
 
 
 
 
 
80d9746
 
cea3c60
 
80d9746
cc42f3e
cea3c60
 
 
 
 
 
6273d7d
80d9746
 
c9448ed
6273d7d
80d9746
 
 
 
 
 
 
 
 
 
 
 
ffd7703
80d9746
 
ffd7703
80d9746
 
 
 
ffd7703
80d9746
 
 
 
 
 
78756c2
 
80d9746
 
78756c2
80d9746
78756c2
80d9746
6273d7d
80d9746
 
fc310b8
80d9746
fc310b8
80d9746
fc310b8
ffd7703
80d9746
bebc0f4
d42851a
ffd7703
80d9746
b85e3c3
 
 
ffd7703
80d9746
 
 
 
b85e3c3
80d9746
 
cc42f3e
b85e3c3
80d9746
fa691f6
 
80d9746
 
fa691f6
 
ffd7703
 
cea3c60
ffd7703
edb354a
cc42f3e
80d9746
 
 
6273d7d
80d9746
 
cc42f3e
80d9746
c92ef96
d42851a
80d9746
cea3c60
80d9746
 
 
 
 
 
fa691f6
d42851a
80d9746
 
ffd7703
 
80d9746
 
 
 
 
 
 
 
d42851a
b85e3c3
 
ffd7703
b85e3c3
 
 
cea3c60
ffd7703
d42851a
edb354a
 
fa691f6

import gradio as gr
from gtts import gTTS
import os
from PIL import Image
from pydub import AudioSegment
import subprocess
import shutil
import math

def text_to_speech(text: str, output_filename="audio.mp3"):
    try:
        tts = gTTS(text=text, lang='es')
        tts.save(output_filename)
        return output_filename
    except Exception as e:
        raise Exception(f"Error al generar el audio con gTTS: {e}")

def get_audio_duration(audio_path):
    if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
        return 0
    try:
        audio = AudioSegment.from_file(audio_path)
        return audio.duration_seconds
    except Exception as e:
        raise Exception(f"Error al obtener la duración del audio: {e}")

def process_image(img_path, target_width, target_height, output_folder, index):
    try:
        img = Image.open(img_path).convert("RGB")
        original_width, original_height = img.size
        target_ratio = target_width / target_height
        image_ratio = original_width / original_height

        if image_ratio > target_ratio:
            new_width = int(original_height * target_ratio)
            left = (original_width - new_width) / 2
            img = img.crop((left, 0, left + new_width, original_height))
        elif image_ratio < target_ratio:
            new_height = int(original_width / target_ratio)
            top = (original_height - new_height) / 2
            img = img.crop((0, top, original_width, top + new_height))

        img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
        output_path = os.path.join(output_folder, f"processed_image_{index:03d}.png")
        img.save(output_path)
        return output_path
    except Exception as e:
        return None

def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
    if not processed_images:
        raise ValueError("No hay imágenes procesadas para crear el video.")

    IMAGE_DURATION = 3
    num_images = len(processed_images)
    width, height = video_size
    num_loops = math.ceil(audio_duration / (num_images * IMAGE_DURATION)) if (num_images * IMAGE_DURATION) > 0 else 1

    filter_complex_chains = []
    video_clips = []
    total_clips = num_images * num_loops

    input_commands = []
    for img_path in processed_images * num_loops:
        input_commands.extend(["-i", img_path])

    for i in range(total_clips):
        zoom = 1.2
        filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
        video_clips.append(f"[v{i}]")

    concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
    filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter

    command = ["ffmpeg", "-y"]
    command.extend(input_commands)
    command.extend([
        "-filter_complex", filter_complex,
        "-map", "[v]",
        "-t", str(audio_duration),
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        output_filename
    ])
    try:
        subprocess.run(command, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")

def combine_video_and_audio(video_path, audio_path, output_path):
    command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
    try:
        subprocess.run(command, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        raise Exception(f"Error al combinar video y audio: {e.stderr}")

def generate_tts_only(news_text_input):
    if not news_text_input:
        return "Por favor, escribe una noticia para generar el audio.", None
    try:
        audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
        return "Audio generado con éxito.", audio_file
    except Exception as e:
        return f"Ocurrió un error al generar solo el audio: {e}", None

def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
    processed_image_folder = "temp_processed_images"
    final_output_video_path = "video_noticia_final.mp4"
    temp_video_no_audio_path = "video_sin_audio.mp4"
    temp_audio_file = "audio_para_video.mp3"

    if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
    os.makedirs(processed_image_folder)

    try:
        if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
        
        if isinstance(input_audio_file, str) and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
            shutil.copy(input_audio_file, temp_audio_file)
        else:
            if not news_text_input:
                raise ValueError("Escribe una noticia para generar el audio, ya que no se proporcionó una vista previa válida.")
            text_to_speech(news_text_input, temp_audio_file)

        audio_duration = get_audio_duration(temp_audio_file)
        if audio_duration == 0: raise ValueError("La duración del audio es cero.")

        target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
        processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
        processed_images_paths = [p for p in processed_images_paths if p]
        if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")

        create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
        combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)

        return "Video generado con éxito.", final_output_video_path

    except Exception as e:
        return f"Ocurrió un error: {e}", None
    finally:
        if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
        if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
        if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
        if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# � Creador de Videos de Noticias")
    with gr.Row():
        with gr.Column(scale=2):
            news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
            image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
            video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
            with gr.Accordion("Opciones de Audio (Opcional)", open=False):
                generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
                audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
                audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
            generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
        with gr.Column(scale=3):
            output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
            video_output = gr.Video(label="Video de la Noticia Generado")

    generate_audio_button.click(
        fn=generate_tts_only,
        inputs=[news_input],
        outputs=[audio_status_message, audio_output_preview]
    )
    generate_video_button.click(
        fn=create_news_video_app,
        inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
        outputs=[output_message, video_output]
    )

demo.launch()