import gradio as gr
from gtts import gTTS
import os
from PIL import Image
from pydub import AudioSegment
import subprocess
import shutil
import math

def text_to_speech(text: str, output_filename="audio.mp3"):
    try:
        tts = gTTS(text=text, lang='es')
        tts.save(output_filename)
        return output_filename
    except Exception as e:
        raise Exception(f"Error al generar el audio con gTTS: {e}")

def get_audio_duration(audio_path):
    if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
        return 0
    try:
        audio = AudioSegment.from_file(audio_path)
        return audio.duration_seconds
    except Exception as e:
        raise Exception(f"Error al obtener la duración del audio: {e}")

def process_image(img_path, target_width, target_height, output_folder, index):
    try:
        img = Image.open(img_path).convert("RGB")
        original_width, original_height = img.size
        target_ratio = target_width / target_height
        image_ratio = original_width / original_height

        if image_ratio > target_ratio:
            new_width = int(original_height * target_ratio)
            left = (original_width - new_width) / 2
            img = img.crop((left, 0, left + new_width, original_height))
        elif image_ratio < target_ratio:
            new_height = int(original_width / target_ratio)
            top = (original_height - new_height) / 2
            img = img.crop((0, top, original_width, top + new_height))

        img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
        output_path = os.path.join(output_folder, f"processed_image_{index:03d}.png")
        img.save(output_path)
        return output_path
    except Exception as e:
        return None

def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
    if not processed_images:
        raise ValueError("No hay imágenes procesadas para crear el video.")

    IMAGE_DURATION = 3
    num_images = len(processed_images)
    width, height = video_size
    num_loops = math.ceil(audio_duration / (num_images * IMAGE_DURATION)) if (num_images * IMAGE_DURATION) > 0 else 1

    filter_complex_chains = []
    video_clips = []
    total_clips = num_images * num_loops

    input_commands = []
    for img_path in processed_images * num_loops:
        input_commands.extend(["-i", img_path])

    for i in range(total_clips):
        zoom = 1.2
        filter_complex_chains.append(f"[{i}:v]scale={width*zoom}:{height*zoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
        video_clips.append(f"[v{i}]")

    concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
    filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter

    command = ["ffmpeg", "-y"]
    command.extend(input_commands)
    command.extend([
        "-filter_complex", filter_complex,
        "-map", "[v]",
        "-t", str(audio_duration),
        "-c:v", "libx264",
        "-pix_fmt", "yuv420p",
        output_filename
    ])
    try:
        subprocess.run(command, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")

def combine_video_and_audio(video_path, audio_path, output_path):
    command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
    try:
        subprocess.run(command, check=True, capture_output=True, text=True)
    except subprocess.CalledProcessError as e:
        raise Exception(f"Error al combinar video y audio: {e.stderr}")

def generate_tts_only(news_text_input):
    if not news_text_input:
        return "Por favor, escribe una noticia para generar el audio.", None
    try:
        audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
        return "Audio generado con éxito.", audio_file
    except Exception as e:
        return f"Ocurrió un error al generar solo el audio: {e}", None

def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
    processed_image_folder = "temp_processed_images"
    final_output_video_path = "video_noticia_final.mp4"
    temp_video_no_audio_path = "video_sin_audio.mp4"
    temp_audio_file = "audio_para_video.mp3"

    if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
    os.makedirs(processed_image_folder)

    try:
        if not image_files: raise ValueError("Por favor, sube al menos una imagen.")
        
        if isinstance(input_audio_file, str) and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
            shutil.copy(input_audio_file, temp_audio_file)
        else:
            if not news_text_input:
                raise ValueError("Escribe una noticia para generar el audio, ya que no se proporcionó una vista previa válida.")
            text_to_speech(news_text_input, temp_audio_file)

        audio_duration = get_audio_duration(temp_audio_file)
        if audio_duration == 0: raise ValueError("La duración del audio es cero.")

        target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
        processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
        processed_images_paths = [p for p in processed_images_paths if p]
        if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")

        create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
        combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)

        return "Video generado con éxito.", final_output_video_path

    except Exception as e:
        return f"Ocurrió un error: {e}", None
    finally:
        if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
        if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
        if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
        if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# � Creador de Videos de Noticias")
    with gr.Row():
        with gr.Column(scale=2):
            news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
            image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
            video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
            with gr.Accordion("Opciones de Audio (Opcional)", open=False):
                generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
                audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
                audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
            generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
        with gr.Column(scale=3):
            output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
            video_output = gr.Video(label="Video de la Noticia Generado")

    generate_audio_button.click(
        fn=generate_tts_only,
        inputs=[news_input],
        outputs=[audio_status_message, audio_output_preview]
    )
    generate_video_button.click(
        fn=create_news_video_app,
        inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
        outputs=[output_message, video_output]
    )

demo.launch()