Spaces:

LPX55
/

QwenStorytellerV2

Running on Zero

App Files Files Community

LPX55 commited on May 21

Commit

754d2f6

verified ·

1 Parent(s): 1110cf5

Create app.py

Browse files

Files changed (1) hide show

app.py +78 -0

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import gradio as gr
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+from PIL import Image
+import torch
+# Load the model and processor
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "daniel3303/QwenStoryteller",
+    torch_dtype="auto",
+    device_map="auto"
+)
+processor = AutoProcessor.from_pretrained("daniel3303/QwenStoryteller")
+def generate_story(images):
+    image_content = []
+    for img in images[:6]:
+        image_content.append({
+            "type": "image",
+            "image": img,
+        })
+    # Add text prompt at the end
+    image_content.append({"type": "text", "text": "Generate a story based on these images."})
+    # Create messages with system prompt
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an AI storyteller that can analyze sequences of images and create creative narratives. First think step-by-step to analyze characters, objects, settings, and narrative structure. Then create a grounded story that maintains consistent character identity and object references across frames. Use 🧠 tags to show your reasoning process before writing the final story."
+        },
+        {
+            "role": "user",
+            "content": image_content,
+        }
+    ]
+    # Preparation for inference
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt"
+    )
+    inputs = inputs.to(model.device)
+    # Inference: Generate the output
+    generated_ids = model.generate(
+        **inputs,
+        max_new_tokens=4096,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+    generated_ids_trimmed = [
+        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    story = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )[0]
+    return story
+demo = gr.Interface(
+    fn=generate_story,
+    inputs=gr.Image(type="pil", label="Upload up to 6 images", image_mode="RGB", height=300, width=300, file_types=[".jpg", ".jpeg", ".png", ".webp"]),
+    outputs=gr.Textbox(label="Generated Story", lines=10),
+    title="Qwen Storyteller",
+    description="Upload up to 6 images to generate a creative story."
+)
+if __name__ == "__main__":
+    demo.launch()