maldons77's picture
Update app.py
eba2043 verified
import os, io, math, tempfile
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import torch
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from spaces import GPU # ZeroGPU support
from fpdf import FPDF # make sure requirements.txt includes: fpdf==1.7.2
# Avoid tokenizers parallelism warning after fork
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
# ------------------ Globals (CPU-safe) ------------------
_txtgen = None # text generator stays on CPU
_t2i_cpu = None # CPU fallback pipeline
STYLE_PRESETS = {
"Realistic": "realistic photography, finely detailed, natural lighting, 35mm",
"Anime": "anime, vibrant colors, cel shading, clean lineart",
"Comic": "comic book style, halftone, bold lines, dramatic shading",
"Watercolor": "watercolor painting, soft edges, gentle colors, textured paper",
"Sketch": "pencil sketch, cross-hatching, grayscale, paper texture",
}
NEGATIVE = "nsfw, nudity, gore, deformed, extra limbs, low quality, blurry, worst quality, lowres, text artifacts, watermark, logo"
# ------------------ Loaders ------------------
def get_txtgen_cpu():
"""Load text generator on CPU (ZeroGPU-safe)."""
global _txtgen
if _txtgen is None:
_txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
return _txtgen
def get_t2i_cpu():
"""CPU Stable Diffusion pipeline (fallback)."""
global _t2i_cpu
if _t2i_cpu is None:
_t2i_cpu = StableDiffusionPipeline.from_pretrained(
"stabilityai/sd-turbo",
torch_dtype=torch.float32,
safety_checker=None,
)
_t2i_cpu.enable_attention_slicing()
return _t2i_cpu
# ------------------ GPU path (ZeroGPU) ------------------
@GPU(duration=120)
def t2i_generate_batch_gpu(prompts, width, height, steps, guidance, negative_prompt, seed=None):
"""Runs inside a GPU-allocated context (ZeroGPU)."""
pipe = StableDiffusionPipeline.from_pretrained(
"stabilityai/sd-turbo",
torch_dtype=torch.float16,
safety_checker=None,
).to("cuda")
generator = torch.Generator(device="cuda")
if seed is not None and str(seed).strip().isdigit():
generator = generator.manual_seed(int(seed))
images = []
for p in prompts:
img = pipe(
prompt=p,
negative_prompt=negative_prompt,
num_inference_steps=steps,
guidance_scale=guidance,
width=width,
height=height,
generator=generator,
).images[0]
images.append(img)
return images
# ------------------ Helpers ------------------
def build_prompt(user_prompt: str, style: str, panel_idx: int, num_panels: int) -> str:
style_desc = STYLE_PRESETS.get(style, "")
beat = ["opening shot", "rising action", "key moment", "twist", "resolution"]
beat_text = beat[min(panel_idx, len(beat) - 1)]
return f"{user_prompt}, {style_desc}, storyboard panel {panel_idx+1} of {num_panels}, {beat_text}, cinematic composition, wide shot"
def generate_captions(user_prompt: str, n: int = 3):
gen = get_txtgen_cpu()
# Simple, fast prompts; keep it short
outputs = []
for i in range(n):
text = gen(
f"Write a very short scene caption (<=10 words) about: {user_prompt}",
max_new_tokens=30,
do_sample=True,
temperature=0.9,
top_p=0.95,
num_return_sequences=1,
)[0]["generated_text"].strip()
# Fallback if something weird comes out
if not text or len(text.split()) < 2:
text = f"Scene {i+1}"
outputs.append(text[:80])
return outputs
def add_caption_strip(img: Image.Image, text: str, width_hint: int) -> Image.Image:
"""Add a black strip with white text at the bottom. Uses textbbox (Pillow>=10)."""
out = img.copy()
draw = ImageDraw.Draw(out)
try:
font = ImageFont.truetype("DejaVuSans.ttf", size=max(16, width_hint // 28))
except Exception:
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), text, font=font)
text_w = bbox[2] - bbox[0]
text_h = bbox[3] - bbox[1]
strip_h = text_h + 14
strip = Image.new("RGB", (out.width, strip_h), (0, 0, 0))
d2 = ImageDraw.Draw(strip)
d2.text(((out.width - text_w) // 2, 7), text, font=font, fill=(255, 255, 255))
combined = Image.new("RGB", (out.width, out.height + strip_h), (0, 0, 0))
combined.paste(out, (0, 0))
combined.paste(strip, (0, out.height))
return combined
def images_to_pdf_with_fpdf(images):
"""Write a simple multipage PDF using FPDF."""
if not images:
return None
pdf_path = tempfile.mktemp(suffix=".pdf")
pdf = FPDF()
for img in images:
# Save temp PNG to insert in PDF
tmp = tempfile.mktemp(suffix=".png")
img.save(tmp)
pdf.add_page()
# Fit the image nicely within margins
pdf.image(tmp, x=10, y=10, w=190)
pdf.output(pdf_path)
return pdf_path
# ------------------ Core logic ------------------
def create_storyboard(user_prompt, style, num_panels, width, height, seed):
if not user_prompt or not user_prompt.strip():
return [], None
# Build prompts + captions
captions = generate_captions(user_prompt, n=num_panels)
prompts = [build_prompt(user_prompt, style, i, num_panels) for i in range(num_panels)]
# Try GPU (ZeroGPU). If it fails (no GPU), fallback to CPU.
images = None
try:
images = t2i_generate_batch_gpu(prompts, width, height, steps=2, guidance=0.0,
negative_prompt=NEGATIVE, seed=seed)
except Exception:
# GPU not available → CPU fallback (slower)
pipe = get_t2i_cpu()
images = []
# No seed control on CPU path by default; can be added with torch.Generator("cpu")
for p in prompts:
img = pipe(
prompt=p,
negative_prompt=NEGATIVE,
num_inference_steps=4,
guidance_scale=0.0,
width=width,
height=height,
).images[0]
images.append(img)
# Add caption strips
final_images = [add_caption_strip(img, cap, width_hint=width) for img, cap in zip(images, captions)]
# Build PDF
pdf_path = images_to_pdf_with_fpdf(final_images)
return final_images, pdf_path
# ------------------ UI ------------------
with gr.Blocks(title="AI Storyboard Creator") as demo:
gr.Markdown(
"""
# 🎬 AI Storyboard Creator
Turn a single prompt into a mini storyboard: 3–6 panels, captions, and a downloadable PDF.
Works on **CPU basic** and supports **ZeroGPU** (GPU on-demand).
"""
)
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
style = gr.Dropdown(choices=list(STYLE_PRESETS.keys()), value="Comic", label="Style")
num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
run_btn = gr.Button("Create Storyboard")
with gr.Column():
# NOTE: no .style(); use columns=2 instead
gallery = gr.Gallery(label="Preview (grid)", columns=2, height="auto")
pdf_file = gr.File(label="Download PDF")
run_btn.click(
create_storyboard,
inputs=[prompt, style, num_panels, width, height, seed],
outputs=[gallery, pdf_file],
)
if __name__ == "__main__":
demo.launch()