File size: 4,518 Bytes
1cca399
4748a2b
1cca399
de31d7b
 
 
 
 
1cca399
4748a2b
 
 
 
 
 
 
1cca399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4748a2b
1cca399
4748a2b
1cca399
 
 
4748a2b
 
 
1cca399
de31d7b
1cca399
 
 
 
4748a2b
1cca399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4748a2b
 
 
1cca399
 
 
 
be2aa05
1cca399
be2aa05
 
191a377
be2aa05
1cca399
be2aa05
1cca399
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import openai, gradio as gr, json, plotly.graph_objects as go
from pathlib import Path

# --- Try loading CSS, fallback to "" if not found ---
try:
    CUSTOM_CSS = Path("style.css").read_text()
except Exception:
    CUSTOM_CSS = ""

SYSTEM_PROMPT = """
You are ZEN Multimodal Assistant by ZEN AI Co.
Choose only ONE of these output modes per reply:
- Image: when a visual or illustration is most useful. Respond only with JSON: {"type":"image","prompt":"<prompt for DALL-E-3>"}
- Chart: when a user requests or needs a data visualization. Respond only with JSON: {"type":"chart","title":"<chart title>","data":[{"x":[...], "y":[...], "label":"<series name>"}]}
- Text: for all other situations, reply with a helpful, complete, conversational answer. Never reply with the word "text" or any label, just the response itself. Never reply in JSON unless for image or chart.
Never use markdown code fences, never add comments.
"""

def build_messages(history, user_msg):
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for u, a in history:
        messages.append({"role": "user", "content": u})
        messages.append({"role": "assistant", "content": a})
    messages.append({"role": "user", "content": user_msg})
    return messages

def multimodal_chat(api_key, user_msg, history):
    if not api_key:
        raise gr.Error("🔑  Please paste your OpenAI API key first.")
    openai.api_key = api_key

    history = history or []
    messages = build_messages(history, user_msg)
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        temperature=0.6,
    )
    assistant_content = response.choices[0].message.content.strip()

    if assistant_content.lower() == "text":
        assistant_content = "(I'm sorry, I didn't understand. Could you rephrase?)"

    img_url, fig = None, None
    try:
        parsed = json.loads(assistant_content)
        if parsed.get("type") == "image":
            dalle = openai.images.generate(
                model="dall-e-3",
                prompt=parsed.get("prompt", "high quality illustration, cinematic, best quality"),
                n=1,
                size="1024x1024",
            )
            img_url = dalle.data[0].url
            history.append([user_msg, f"![generated image]({img_url})"])
        elif parsed.get("type") == "chart":
            fig = go.Figure()
            for s in parsed["data"]:
                fig.add_trace(
                    go.Scatter(
                        x=s["x"],
                        y=s["y"],
                        mode="lines+markers",
                        name=s.get("label", ""),
                    )
                )
            fig.update_layout(title=parsed.get("title", "Chart"))
            history.append([user_msg, parsed.get("title", "Chart below")])
        else:
            history.append([user_msg, str(assistant_content)])
    except (json.JSONDecodeError, KeyError, TypeError):
        history.append([user_msg, assistant_content])

    return history, img_url, fig

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(
        "🧠 ZEN Multimodal Assistant\n"
        "Paste your OpenAI API key (never saved).\n"
        "This assistant intelligently responds with text, an image, or an interactive chart. MODULE 3",
        elem_id="zen-header"          # ← add this line
    )
    # … keep the rest unchanged …
    api_key   = gr.Textbox(label="OpenAI API Key", type="password", placeholder="sk-...")
    chatbot   = gr.Chatbot(label="Conversation")
    with gr.Row():
        user_msg = gr.Textbox(placeholder="Ask me anything…", label="Your message", scale=4)
        send_btn = gr.Button("Send", variant="primary")
    img_out   = gr.Image(label="Generated image")
    chart_out = gr.Plot(label="Interactive chart")

    def respond(api_key, user_msg, chat_history):
        chat_history, img_url, fig = multimodal_chat(api_key, user_msg, chat_history)
        img_update  = gr.update(value=img_url) if img_url else gr.update(value=None)
        fig_update  = gr.update(value=fig)     if fig     else gr.update(value=None)
        return chat_history, img_update, fig_update

    send_btn.click(
        respond,
        inputs=[api_key, user_msg, chatbot],
        outputs=[chatbot, img_out, chart_out],
    )
    user_msg.submit(
        respond,
        inputs=[api_key, user_msg, chatbot],
        outputs=[chatbot, img_out, chart_out],
    )

if __name__ == "__main__":
    demo.queue(max_size=50).launch()