Spaces:
Configuration error
Configuration error
# gradio_blip3o_next_min.py | |
import time | |
from dataclasses import dataclass | |
import torch | |
from PIL import Image | |
from transformers import AutoTokenizer | |
from blip3o.model import * | |
import gradio as gr | |
from huggingface_hub import snapshot_download | |
# ----------------------------- | |
# Minimal config and runner | |
# ----------------------------- | |
class T2IConfig: | |
device: str = "cuda:0" | |
dtype: torch.dtype = torch.bfloat16 | |
# fixed generation config (no UI controls) | |
scale: int = 0 | |
seq_len: int = 729 | |
top_p: float = 0.95 | |
top_k: int = 1200 | |
class TextToImageInference: | |
def __init__(self, config: T2IConfig): | |
self.config = config | |
self.device = torch.device(config.device) | |
self._load_models() | |
def _load_models(self): | |
model_path = snapshot_download(repo_id='BLIP3o/BLIP3o-NEXT-GRPO-Geneval-3B') | |
self.model = blip3oQwenForInferenceLM.from_pretrained( | |
model_path, torch_dtype=self.config.dtype | |
).to(self.device) | |
self.tokenizer = AutoTokenizer.from_pretrained(model_path) | |
if hasattr(self.tokenizer, "padding_side"): | |
self.tokenizer.padding_side = "left" | |
def generate_image(self, prompt: str) -> Image.Image: | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{ | |
"role": "user", | |
"content": f"Please generate image based on the following caption: {prompt}", | |
}, | |
] | |
input_text = self.tokenizer.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
) | |
input_text += f"<im_start><S{self.config.scale}>" | |
inputs = self.tokenizer( | |
[input_text], return_tensors="pt", padding=True, truncation=True | |
) | |
_, images = self.model.generate_images( | |
inputs.input_ids.to(self.device), | |
inputs.attention_mask.to(self.device), | |
max_new_tokens=self.config.seq_len, | |
do_sample=True, | |
top_p=self.config.top_p, | |
top_k=self.config.top_k, | |
) | |
return images[0] | |
# Try loading once at startup for simplicity | |
LOAD_ERROR = None | |
inference = None | |
try: | |
inference = TextToImageInference(T2IConfig()) | |
except Exception as e: | |
LOAD_ERROR = f"β Failed to load model: {e}" | |
def run_generate(prompt, progress=gr.Progress(track_tqdm=True)): | |
t0 = time.time() | |
if LOAD_ERROR: | |
return None, LOAD_ERROR | |
if not prompt or not prompt.strip(): | |
return None, "β οΈ Please enter a prompt." | |
try: | |
img = inference.generate_image(prompt.strip()) | |
return img, f"β Done in {time.time() - t0:.2f}s." | |
except torch.cuda.OutOfMemoryError: | |
if torch.cuda.is_available(): | |
torch.cuda.empty_cache() | |
return None, "β CUDA OOM. Try reducing other GPU workloads." | |
except Exception as e: | |
return None, f"β Error: {e}" | |
with gr.Blocks(title="BLIP3o-NEXT-GRPO-Geneval β Text β Image") as demo: | |
gr.Markdown("# BLIP3o-NEXT-GRPO-Geneval β Text β Image") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
prompt = gr.Textbox( | |
label="Prompt", | |
placeholder="Describe the image you want to generate...", | |
lines=4, | |
) | |
run_btn = gr.Button("Generate", variant="primary") | |
with gr.Column(scale=4): | |
out_img = gr.Image(label="Generated Image", format="png") | |
status = gr.Markdown("") | |
run_btn.click( | |
fn=run_generate, | |
inputs=[prompt], | |
outputs=[out_img, status], | |
queue=True, | |
api_name="generate", | |
) | |
if __name__ == "__main__": | |
demo.queue().launch(share=True) | |