Spaces:

maldons77
/

ai-storyboard-creator

Running on Zero

File size: 7,904 Bytes

import os, io, math, tempfile
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import torch
from transformers import pipeline
from diffusers import StableDiffusionPipeline
from spaces import GPU  # ZeroGPU support
from fpdf import FPDF   # make sure requirements.txt includes: fpdf==1.7.2

# Avoid tokenizers parallelism warning after fork
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

# ------------------ Globals (CPU-safe) ------------------
_txtgen = None           # text generator stays on CPU
_t2i_cpu = None          # CPU fallback pipeline

STYLE_PRESETS = {
    "Realistic": "realistic photography, finely detailed, natural lighting, 35mm",
    "Anime": "anime, vibrant colors, cel shading, clean lineart",
    "Comic": "comic book style, halftone, bold lines, dramatic shading",
    "Watercolor": "watercolor painting, soft edges, gentle colors, textured paper",
    "Sketch": "pencil sketch, cross-hatching, grayscale, paper texture",
}
NEGATIVE = "nsfw, nudity, gore, deformed, extra limbs, low quality, blurry, worst quality, lowres, text artifacts, watermark, logo"


# ------------------ Loaders ------------------
def get_txtgen_cpu():
    """Load text generator on CPU (ZeroGPU-safe)."""
    global _txtgen
    if _txtgen is None:
        _txtgen = pipeline("text-generation", model="distilgpt2", device=-1)
    return _txtgen


def get_t2i_cpu():
    """CPU Stable Diffusion pipeline (fallback)."""
    global _t2i_cpu
    if _t2i_cpu is None:
        _t2i_cpu = StableDiffusionPipeline.from_pretrained(
            "stabilityai/sd-turbo",
            torch_dtype=torch.float32,
            safety_checker=None,
        )
        _t2i_cpu.enable_attention_slicing()
    return _t2i_cpu


# ------------------ GPU path (ZeroGPU) ------------------
@GPU(duration=120)
def t2i_generate_batch_gpu(prompts, width, height, steps, guidance, negative_prompt, seed=None):
    """Runs inside a GPU-allocated context (ZeroGPU)."""
    pipe = StableDiffusionPipeline.from_pretrained(
        "stabilityai/sd-turbo",
        torch_dtype=torch.float16,
        safety_checker=None,
    ).to("cuda")

    generator = torch.Generator(device="cuda")
    if seed is not None and str(seed).strip().isdigit():
        generator = generator.manual_seed(int(seed))

    images = []
    for p in prompts:
        img = pipe(
            prompt=p,
            negative_prompt=negative_prompt,
            num_inference_steps=steps,
            guidance_scale=guidance,
            width=width,
            height=height,
            generator=generator,
        ).images[0]
        images.append(img)
    return images


# ------------------ Helpers ------------------
def build_prompt(user_prompt: str, style: str, panel_idx: int, num_panels: int) -> str:
    style_desc = STYLE_PRESETS.get(style, "")
    beat = ["opening shot", "rising action", "key moment", "twist", "resolution"]
    beat_text = beat[min(panel_idx, len(beat) - 1)]
    return f"{user_prompt}, {style_desc}, storyboard panel {panel_idx+1} of {num_panels}, {beat_text}, cinematic composition, wide shot"


def generate_captions(user_prompt: str, n: int = 3):
    gen = get_txtgen_cpu()
    # Simple, fast prompts; keep it short
    outputs = []
    for i in range(n):
        text = gen(
            f"Write a very short scene caption (<=10 words) about: {user_prompt}",
            max_new_tokens=30,
            do_sample=True,
            temperature=0.9,
            top_p=0.95,
            num_return_sequences=1,
        )[0]["generated_text"].strip()
        # Fallback if something weird comes out
        if not text or len(text.split()) < 2:
            text = f"Scene {i+1}"
        outputs.append(text[:80])
    return outputs


def add_caption_strip(img: Image.Image, text: str, width_hint: int) -> Image.Image:
    """Add a black strip with white text at the bottom. Uses textbbox (Pillow>=10)."""
    out = img.copy()
    draw = ImageDraw.Draw(out)
    try:
        font = ImageFont.truetype("DejaVuSans.ttf", size=max(16, width_hint // 28))
    except Exception:
        font = ImageFont.load_default()

    bbox = draw.textbbox((0, 0), text, font=font)
    text_w = bbox[2] - bbox[0]
    text_h = bbox[3] - bbox[1]
    strip_h = text_h + 14

    strip = Image.new("RGB", (out.width, strip_h), (0, 0, 0))
    d2 = ImageDraw.Draw(strip)
    d2.text(((out.width - text_w) // 2, 7), text, font=font, fill=(255, 255, 255))

    combined = Image.new("RGB", (out.width, out.height + strip_h), (0, 0, 0))
    combined.paste(out, (0, 0))
    combined.paste(strip, (0, out.height))
    return combined


def images_to_pdf_with_fpdf(images):
    """Write a simple multipage PDF using FPDF."""
    if not images:
        return None
    pdf_path = tempfile.mktemp(suffix=".pdf")
    pdf = FPDF()
    for img in images:
        # Save temp PNG to insert in PDF
        tmp = tempfile.mktemp(suffix=".png")
        img.save(tmp)
        pdf.add_page()
        # Fit the image nicely within margins
        pdf.image(tmp, x=10, y=10, w=190)
    pdf.output(pdf_path)
    return pdf_path


# ------------------ Core logic ------------------
def create_storyboard(user_prompt, style, num_panels, width, height, seed):
    if not user_prompt or not user_prompt.strip():
        return [], None

    # Build prompts + captions
    captions = generate_captions(user_prompt, n=num_panels)
    prompts = [build_prompt(user_prompt, style, i, num_panels) for i in range(num_panels)]

    # Try GPU (ZeroGPU). If it fails (no GPU), fallback to CPU.
    images = None
    try:
        images = t2i_generate_batch_gpu(prompts, width, height, steps=2, guidance=0.0,
                                        negative_prompt=NEGATIVE, seed=seed)
    except Exception:
        # GPU not available → CPU fallback (slower)
        pipe = get_t2i_cpu()
        images = []
        # No seed control on CPU path by default; can be added with torch.Generator("cpu")
        for p in prompts:
            img = pipe(
                prompt=p,
                negative_prompt=NEGATIVE,
                num_inference_steps=4,
                guidance_scale=0.0,
                width=width,
                height=height,
            ).images[0]
            images.append(img)

    # Add caption strips
    final_images = [add_caption_strip(img, cap, width_hint=width) for img, cap in zip(images, captions)]
    # Build PDF
    pdf_path = images_to_pdf_with_fpdf(final_images)
    return final_images, pdf_path


# ------------------ UI ------------------
with gr.Blocks(title="AI Storyboard Creator") as demo:
    gr.Markdown(
        """
        # 🎬 AI Storyboard Creator
        Turn a single prompt into a mini storyboard: 3–6 panels, captions, and a downloadable PDF.  
        Works on **CPU basic** and supports **ZeroGPU** (GPU on-demand).
        """
    )
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Story prompt", placeholder="A cyberpunk detective in the rain", lines=2)
            style = gr.Dropdown(choices=list(STYLE_PRESETS.keys()), value="Comic", label="Style")
            num_panels = gr.Slider(3, 6, value=3, step=1, label="Number of panels")
            width = gr.Slider(384, 768, value=448, step=64, label="Panel width (px)")
            height = gr.Slider(384, 768, value=448, step=64, label="Panel height (px)")
            seed = gr.Textbox(label="Seed (optional)", placeholder="e.g., 42")
            run_btn = gr.Button("Create Storyboard")
        with gr.Column():
            # NOTE: no .style(); use columns=2 instead
            gallery = gr.Gallery(label="Preview (grid)", columns=2, height="auto")
            pdf_file = gr.File(label="Download PDF")

    run_btn.click(
        create_storyboard,
        inputs=[prompt, style, num_panels, width, height, seed],
        outputs=[gallery, pdf_file],
    )

if __name__ == "__main__":
    demo.launch()