Spaces:

salomonsky
/

news3

Running

App Files Files Community

news3 / app.py

salomonsky

Update app.py

fa691f6 verified 3 months ago

raw

history blame contribute delete

8.04 kB

	import gradio as gr
	from gtts import gTTS
	import os
	from PIL import Image
	from pydub import AudioSegment
	import subprocess
	import shutil
	import math

	def text_to_speech(text: str, output_filename="audio.mp3"):
	try:
	tts = gTTS(text=text, lang='es')
	tts.save(output_filename)
	return output_filename
	except Exception as e:
	raise Exception(f"Error al generar el audio con gTTS: {e}")

	def get_audio_duration(audio_path):
	if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
	return 0
	try:
	audio = AudioSegment.from_file(audio_path)
	return audio.duration_seconds
	except Exception as e:
	raise Exception(f"Error al obtener la duración del audio: {e}")

	def process_image(img_path, target_width, target_height, output_folder, index):
	try:
	img = Image.open(img_path).convert("RGB")
	original_width, original_height = img.size
	target_ratio = target_width / target_height
	image_ratio = original_width / original_height

	if image_ratio > target_ratio:
	new_width = int(original_height * target_ratio)
	left = (original_width - new_width) / 2
	img = img.crop((left, 0, left + new_width, original_height))
	elif image_ratio < target_ratio:
	new_height = int(original_width / target_ratio)
	top = (original_height - new_height) / 2
	img = img.crop((0, top, original_width, top + new_height))

	img = img.resize((target_width, target_height), Image.Resampling.LANCZOS)
	output_path = os.path.join(output_folder, f"processed_image_{index:03d}.png")
	img.save(output_path)
	return output_path
	except Exception as e:
	return None

	def create_video_with_ken_burns(processed_images, audio_duration, fps, video_size, output_filename):
	if not processed_images:
	raise ValueError("No hay imágenes procesadas para crear el video.")

	IMAGE_DURATION = 3
	num_images = len(processed_images)
	width, height = video_size
	num_loops = math.ceil(audio_duration / (num_images * IMAGE_DURATION)) if (num_images * IMAGE_DURATION) > 0 else 1

	filter_complex_chains = []
	video_clips = []
	total_clips = num_images * num_loops

	input_commands = []
	for img_path in processed_images * num_loops:
	input_commands.extend(["-i", img_path])

	for i in range(total_clips):
	zoom = 1.2
	filter_complex_chains.append(f"[{i}:v]scale={widthzoom}:{heightzoom},zoompan=z='min(zoom+0.0015,1.5)':d={fps*IMAGE_DURATION}:x='iw/2-(iw/zoom/2)':y='ih/2-(ih/zoom/2)':s={width}x{height},fade=t=in:st=0:d=1,fade=t=out:st={IMAGE_DURATION-1}:d=1[v{i}]")
	video_clips.append(f"[v{i}]")

	concat_filter = f"{''.join(video_clips)}concat=n={total_clips}:v=1:a=0,format=yuv420p[v]"
	filter_complex = ";".join(filter_complex_chains) + ";" + concat_filter

	command = ["ffmpeg", "-y"]
	command.extend(input_commands)
	command.extend([
	"-filter_complex", filter_complex,
	"-map", "[v]",
	"-t", str(audio_duration),
	"-c:v", "libx264",
	"-pix_fmt", "yuv420p",
	output_filename
	])
	try:
	subprocess.run(command, check=True, capture_output=True, text=True)
	except subprocess.CalledProcessError as e:
	raise Exception(f"Error al crear video con efecto Ken Burns: {e.stderr}")

	def combine_video_and_audio(video_path, audio_path, output_path):
	command = ["ffmpeg", "-y", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", output_path]
	try:
	subprocess.run(command, check=True, capture_output=True, text=True)
	except subprocess.CalledProcessError as e:
	raise Exception(f"Error al combinar video y audio: {e.stderr}")

	def generate_tts_only(news_text_input):
	if not news_text_input:
	return "Por favor, escribe una noticia para generar el audio.", None
	try:
	audio_file = text_to_speech(news_text_input, "audio_temp_preview.mp3")
	return "Audio generado con éxito.", audio_file
	except Exception as e:
	return f"Ocurrió un error al generar solo el audio: {e}", None

	def create_news_video_app(news_text_input, image_files, video_ratio, input_audio_file):
	processed_image_folder = "temp_processed_images"
	final_output_video_path = "video_noticia_final.mp4"
	temp_video_no_audio_path = "video_sin_audio.mp4"
	temp_audio_file = "audio_para_video.mp3"

	if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
	os.makedirs(processed_image_folder)

	try:
	if not image_files: raise ValueError("Por favor, sube al menos una imagen.")

	if isinstance(input_audio_file, str) and os.path.exists(input_audio_file) and os.path.getsize(input_audio_file) > 0:
	shutil.copy(input_audio_file, temp_audio_file)
	else:
	if not news_text_input:
	raise ValueError("Escribe una noticia para generar el audio, ya que no se proporcionó una vista previa válida.")
	text_to_speech(news_text_input, temp_audio_file)

	audio_duration = get_audio_duration(temp_audio_file)
	if audio_duration == 0: raise ValueError("La duración del audio es cero.")

	target_width, target_height = (720, 1280) if video_ratio == "9:16" else (1280, 720)
	processed_images_paths = [process_image(f.name, target_width, target_height, processed_image_folder, i) for i, f in enumerate(image_files)]
	processed_images_paths = [p for p in processed_images_paths if p]
	if not processed_images_paths: raise ValueError("No se pudieron procesar las imágenes.")

	create_video_with_ken_burns(processed_images_paths, audio_duration, 30, (target_width, target_height), temp_video_no_audio_path)
	combine_video_and_audio(temp_video_no_audio_path, temp_audio_file, final_output_video_path)

	return "Video generado con éxito.", final_output_video_path

	except Exception as e:
	return f"Ocurrió un error: {e}", None
	finally:
	if os.path.exists(processed_image_folder): shutil.rmtree(processed_image_folder)
	if os.path.exists(temp_video_no_audio_path): os.remove(temp_video_no_audio_path)
	if os.path.exists(temp_audio_file): os.remove(temp_audio_file)
	if os.path.exists("audio_temp_preview.mp3"): os.remove("audio_temp_preview.mp3")

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# � Creador de Videos de Noticias")
	with gr.Row():
	with gr.Column(scale=2):
	news_input = gr.Textbox(label="1. Escribe tu noticia aquí", lines=5)
	image_upload = gr.File(label="2. Sube tus imágenes", file_count="multiple", type="filepath", file_types=[".jpg", ".jpeg", ".png"])
	video_ratio_dropdown = gr.Dropdown(label="3. Elige el Formato del Video", choices=["16:9", "9:16"], value="9:16", interactive=True)
	with gr.Accordion("Opciones de Audio (Opcional)", open=False):
	generate_audio_button = gr.Button("Generar Solo Audio (Vista Previa)")
	audio_status_message = gr.Textbox(label="Estado del Audio", interactive=False)
	audio_output_preview = gr.Audio(label="Audio de Noticia (Vista Previa)", interactive=False)
	generate_video_button = gr.Button("🎬 Generar Video Completo", variant="primary")
	with gr.Column(scale=3):
	output_message = gr.Textbox(label="Estado del Proceso", interactive=False)
	video_output = gr.Video(label="Video de la Noticia Generado")

	generate_audio_button.click(
	fn=generate_tts_only,
	inputs=[news_input],
	outputs=[audio_status_message, audio_output_preview]
	)
	generate_video_button.click(
	fn=create_news_video_app,
	inputs=[news_input, image_upload, video_ratio_dropdown, audio_output_preview],
	outputs=[output_message, video_output]
	)

	demo.launch()