Spaces:

maldons77
/

ai-storyboard-creator

Running on Zero

App Files Files Community

ai-storyboard-creator / app.py

maldons77

Update app.py

eba2043 verified 26 days ago

raw

history blame contribute delete

7.9 kB

	import os, io, math, tempfile
	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont
	import torch
	from transformers import pipeline
	from diffusers import StableDiffusionPipeline
	from spaces import GPU # ZeroGPU support
	from fpdf import FPDF # make sure requirements.txt includes: fpdf==1.7.2

	# Avoid tokenizers parallelism warning after fork
	os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

	# ------------------ Globals (CPU-safe) ------------------
	_txtgen = None # text generator stays on CPU
	_t2i_cpu = None # CPU fallback pipeline

	STYLE_PRESETS = {
	"Realistic": "realistic photography, finely detailed, natural lighting, 35mm",
	"Anime": "anime, vibrant colors, cel shading, clean lineart",
	"Comic": "comic book style, halftone, bold lines, dramatic shading",
	"Watercolor": "watercolor painting, soft edges, gentle colors, textured paper",
	"Sketch": "pencil sketch, cross-hatching, grayscale, paper texture",
	}
	NEGATIVE = "nsfw, nudity, gore, deformed, extra limbs, low quality, blurry, worst quality, lowres, text artifacts, watermark, logo"


	# ------------------ Loaders ------------------
	def get_txtgen_cpu():
	"""Load text generator on CPU (ZeroGPU-safe)."""
	global _txtgen
	if _txtgen is None:
	_txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
	return _txtgen


	def get_t2i_cpu():
	"""CPU Stable Diffusion pipeline (fallback)."""
	global _t2i_cpu
	if _t2i_cpu is None:
	_t2i_cpu = StableDiffusionPipeline.from_pretrained(
	"stabilityai/sd-turbo",
	torch_dtype=torch.float32,
	safety_checker=None,
	)
	_t2i_cpu.enable_attention_slicing()
	return _t2i_cpu


	# ------------------ GPU path (ZeroGPU) ------------------
	@GPU(duration=120)
	def t2i_generate_batch_gpu(prompts, width, height, steps, guidance, negative_prompt, seed=None):
	"""Runs inside a GPU-allocated context (ZeroGPU)."""
	pipe = StableDiffusionPipeline.from_pretrained(
	"stabilityai/sd-turbo",
	torch_dtype=torch.float16,
	safety_checker=None,
	).to("cuda")

	generator = torch.Generator(device="cuda")
	if seed is not None and str(seed).strip().isdigit():
	generator = generator.manual_seed(int(seed))

	images = []
	for p in prompts:
	img = pipe(
	prompt=p,
	negative_prompt=negative_prompt,
	num_inference_steps=steps,
	guidance_scale=guidance,
	width=width,
	height=height,
	generator=generator,
	).images[0]
	images.append(img)
	return images


	# ------------------ Helpers ------------------
	def build_prompt(user_prompt: str, style: str, panel_idx: int, num_panels: int) -> str:
	style_desc = STYLE_PRESETS.get(style, "")
	beat = ["opening shot", "rising action", "key moment", "twist", "resolution"]
	beat_text = beat[min(panel_idx, len(beat) - 1)]
	return f"{user_prompt}, {style_desc}, storyboard panel {panel_idx+1} of {num_panels}, {beat_text}, cinematic composition, wide shot"


	def generate_captions(user_prompt: str, n: int = 3):
	gen = get_txtgen_cpu()
	# Simple, fast prompts; keep it short
	outputs = []
	for i in range(n):
	text = gen(
	f"Write a very short scene caption (<=10 words) about: {user_prompt}",
	max_new_tokens=30,
	do_sample=True,
	temperature=0.9,
	top_p=0.95,
	num_return_sequences=1,
	)[0]["generated_text"].strip()
	# Fallback if something weird comes out
	if not text or len(text.split()) < 2:
	text = f"Scene {i+1}"
	outputs.append(text[:80])
	return outputs


	def add_caption_strip(img: Image.Image, text: str, width_hint: int) -> Image.Image:
	"""Add a black strip with white text at the bottom. Uses textbbox (Pillow>=10)."""
	out = img.copy()
	draw = ImageDraw.Draw(out)
	try:
	font = ImageFont.truetype("DejaVuSans.ttf", size=max(16, width_hint // 28))
	except Exception:
	font = ImageFont.load_default()

	bbox = draw.textbbox((0, 0), text, font=font)
	text_w = bbox[2] - bbox[0]
	text_h = bbox[3] - bbox[1]
	strip_h = text_h + 14

	strip = Image.new("RGB", (out.width, strip_h), (0, 0, 0))
	d2 = ImageDraw.Draw(strip)
	d2.text(((out.width - text_w) // 2, 7), text, font=font, fill=(255, 255, 255))

	combined = Image.new("RGB", (out.width, out.height + strip_h), (0, 0, 0))
	combined.paste(out, (0, 0))
	combined.paste(strip, (0, out.height))
	return combined


	def images_to_pdf_with_fpdf(images):
	"""Write a simple multipage PDF using FPDF."""
	if not images:
	return None
	pdf_path = tempfile.mktemp(suffix=".pdf")
	pdf = FPDF()
	for img in images:
	# Save temp PNG to insert in PDF
	tmp = tempfile.mktemp(suffix=".png")
	img.save(tmp)
	pdf.add_page()
	# Fit the image nicely within margins
	pdf.image(tmp, x=10, y=10, w=190)
	pdf.output(pdf_path)
	return pdf_path


	# ------------------ Core logic ------------------
	def create_storyboard(user_prompt, style, num_panels, width, height, seed):
	if not user_prompt or not user_prompt.strip():
	return [], None

	# Build prompts + captions
	captions = generate_captions(user_prompt, n=num_panels)
	prompts = [build_prompt(user_prompt, style, i, num_panels) for i in range(num_panels)]

	# Try GPU (ZeroGPU). If it fails (no GPU), fallback to CPU.
	images = None
	try:
	images = t2i_generate_batch_gpu(prompts, width, height, steps=2, guidance=0.0,
	negative_prompt=NEGATIVE, seed=seed)
	except Exception:
	# GPU not available → CPU fallback (slower)
	pipe = get_t2i_cpu()
	images = []
	# No seed control on CPU path by default; can be added with torch.Generator("cpu")
	for p in prompts:
	img = pipe(
	prompt=p,
	negative_prompt=NEGATIVE,
	num_inference_steps=4,
	guidance_scale=0.0,
	width=width,
	height=height,
	).images[0]
	images.append(img)

	# Add caption strips
	final_images = [add_caption_strip(img, cap, width_hint=width) for img, cap in zip(images, captions)]
	# Build PDF
	pdf_path = images_to_pdf_with_fpdf(final_images)
	return final_images, pdf_path


	# ------------------ UI ------------------
	with gr.Blocks(title="AI Storyboard Creator") as demo:
	gr.Markdown(
	"""
	# 🎬 AI Storyboard Creator
	Turn a single prompt into a mini storyboard: 3–6 panels, captions, and a downloadable PDF.
	Works on CPU basic and supports ZeroGPU (GPU on-demand).
	"""
	)
	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
	style = gr.Dropdown(choices=list(STYLE_PRESETS.keys()), value="Comic", label="Style")
	num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
	width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
	height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
	seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
	run_btn = gr.Button("Create Storyboard")
	with gr.Column():
	# NOTE: no .style(); use columns=2 instead
	gallery = gr.Gallery(label="Preview (grid)", columns=2, height="auto")
	pdf_file = gr.File(label="Download PDF")

	run_btn.click(
	create_storyboard,
	inputs=[prompt, style, num_panels, width, height, seed],
	outputs=[gallery, pdf_file],
	)

	if __name__ == "__main__":
	demo.launch()