LPX55 commited on
Commit
754d2f6
Β·
verified Β·
1 Parent(s): 1110cf5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
3
+ from qwen_vl_utils import process_vision_info
4
+ from PIL import Image
5
+ import torch
6
+
7
+ # Load the model and processor
8
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
9
+ "daniel3303/QwenStoryteller",
10
+ torch_dtype="auto",
11
+ device_map="auto"
12
+ )
13
+ processor = AutoProcessor.from_pretrained("daniel3303/QwenStoryteller")
14
+
15
+ def generate_story(images):
16
+ image_content = []
17
+ for img in images[:6]:
18
+ image_content.append({
19
+ "type": "image",
20
+ "image": img,
21
+ })
22
+
23
+ # Add text prompt at the end
24
+ image_content.append({"type": "text", "text": "Generate a story based on these images."})
25
+
26
+ # Create messages with system prompt
27
+ messages = [
28
+ {
29
+ "role": "system",
30
+ "content": "You are an AI storyteller that can analyze sequences of images and create creative narratives. First think step-by-step to analyze characters, objects, settings, and narrative structure. Then create a grounded story that maintains consistent character identity and object references across frames. Use 🧠 tags to show your reasoning process before writing the final story."
31
+ },
32
+ {
33
+ "role": "user",
34
+ "content": image_content,
35
+ }
36
+ ]
37
+
38
+ # Preparation for inference
39
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40
+ image_inputs, video_inputs = process_vision_info(messages)
41
+ inputs = processor(
42
+ text=[text],
43
+ images=image_inputs,
44
+ videos=video_inputs,
45
+ padding=True,
46
+ return_tensors="pt"
47
+ )
48
+ inputs = inputs.to(model.device)
49
+
50
+ # Inference: Generate the output
51
+ generated_ids = model.generate(
52
+ **inputs,
53
+ max_new_tokens=4096,
54
+ do_sample=True,
55
+ temperature=0.7,
56
+ top_p=0.9
57
+ )
58
+ generated_ids_trimmed = [
59
+ out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
60
+ ]
61
+ story = processor.batch_decode(
62
+ generated_ids_trimmed,
63
+ skip_special_tokens=True,
64
+ clean_up_tokenization_spaces=False
65
+ )[0]
66
+
67
+ return story
68
+
69
+ demo = gr.Interface(
70
+ fn=generate_story,
71
+ inputs=gr.Image(type="pil", label="Upload up to 6 images", image_mode="RGB", height=300, width=300, file_types=[".jpg", ".jpeg", ".png", ".webp"]),
72
+ outputs=gr.Textbox(label="Generated Story", lines=10),
73
+ title="Qwen Storyteller",
74
+ description="Upload up to 6 images to generate a creative story."
75
+ )
76
+
77
+ if __name__ == "__main__":
78
+ demo.launch()