File size: 11,792 Bytes
f485648
 
 
 
 
 
 
 
 
 
481f639
46eaca5
 
 
f485648
 
 
b5873da
f485648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56c6de1
f485648
 
 
46eaca5
f485648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56c6de1
f485648
56c6de1
f485648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46eaca5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9d7aef5
 
 
46eaca5
 
 
 
 
 
 
 
 
 
 
 
2702e18
46eaca5
 
 
 
f485648
 
 
46eaca5
56c6de1
f485648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481f639
 
56c6de1
481f639
56c6de1
481f639
 
 
2702e18
f485648
481f639
 
 
 
2702e18
481f639
 
 
56c6de1
481f639
 
 
2702e18
481f639
 
f485648
 
 
9b25d3a
f485648
481f639
 
56c6de1
46eaca5
 
 
 
 
 
 
 
f485648
46eaca5
 
 
 
 
 
 
 
 
 
 
 
 
f485648
 
 
 
 
 
2702e18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f485648
 
 
 
 
 
 
 
 
 
 
 
2702e18
f485648
 
 
 
 
 
 
 
 
2702e18
 
f485648
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# gradio_manim_gemini_app.py – **v3**
"""Gradio demo 
============
β€” third revision β€”
β€’ **ΠŸΡ€Π°Π²ΠΈΠ»ΡŒΠ½Π°Ρ структура history** β€” Ρ‚Π΅ΠΏΠ΅Ρ€ΡŒ `Chatbot` ΠΏΠΎΠ»ΡƒΡ‡Π°Π΅Ρ‚ список *ΠΏΠ°Ρ€*
  `(user_text, bot_text)`.  Π§Π°Π½ΠΊΠΈ Π±ΠΎΡ‚Π° апдСй‑тят Π²Ρ‚ΠΎΡ€ΠΎΠΉ элСмСнт послСднСй ΠΏΠ°Ρ€Ρ‹,
  поэтому Β«Π΄ΡƒΠ±Π»ΠΈΒ» ΠΈ Β«Ρ€ΠΎΠ±ΠΎΡ‚β€‘ΡŽΠ·Π΅Ρ€Β» ΠΈΡΡ‡Π΅Π·Π°ΡŽΡ‚.  
β€’ **Ошибки Ρ€Π΅Π½Π΄Π΅Ρ€Π°** ΠΏΡƒΠ±Π»ΠΈΠΊΡƒΡŽΡ‚ΡΡ *ΠΊΠ°ΠΊ ΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚Π΅Π»ΡŒΡΠΊΠΎΠ΅ сообщСниС* ΠΈ Π½Π΅ΠΌΠ΅Π΄Π»Π΅Π½Π½ΠΎ
  ΠΎΡ‚ΠΏΡ€Π°Π²Π»ΡΡŽΡ‚ΡΡ Π² Gemini; модСль ΠΎΡ‚Π²Π΅Ρ‡Π°Π΅Ρ‚, ΠΌΡ‹ снова пытаСмся ΡΠ³Π΅Π½Π΅Ρ€ΠΈΡ€ΠΎΠ²Π°Ρ‚ΡŒ ΠΊΠΎΠ΄ β€”
  ΠΏΠΎΠ»Π½ΠΎΡΡ‚ΡŒΡŽ автоматичСский Ρ†ΠΈΠΊΠ», ΠΊΠ°ΠΊ Π² вашСм CLI‑скриптС.  
β€’ Π£ΠΏΡ€Π°Π²Π»Π΅Π½ΠΈΠ΅ состояниСм свСдСно ΠΊ Ρ‡Ρ‘Ρ‚ΠΊΠΈΠΌ этапам: `await_task`, `coding_loop`,
  `await_feedback`, `finished`.
β€’ ПослС ΠΊΠ°ΠΆΠ΄ΠΎΠ³ΠΎ Ρ€Π΅Π½Π΄Π΅Ρ€Π° ΠΏΠΎΠ»ΡŒΠ·ΠΎΠ²Π°Ρ‚Π΅Π»ΡŒ ΠΌΠΎΠΆΠ΅Ρ‚ Π΄Π°Ρ‚ΡŒ Π΄ΠΎΠΏΠΎΠ»Π½ΠΈΡ‚Π΅Π»ΡŒΠ½Ρ‹Π΅ указания β€”
  Π²ΠΈΠ΄Π΅ΠΎ отправляСтся Π² Gemini ΠΈ ΠΊΠΎΠ΄ гСнСрируСтся Π·Π°Π½ΠΎΠ²ΠΎ с ΡƒΡ‡Ρ‘Ρ‚ΠΎΠΌ Π·Π°ΠΌΠ΅Ρ‡Π°Π½ΠΈΠΉ.

Запуск:
```bash
pip install --upgrade gradio google-genai manim_video_generator manim-ml
export GEMINI_API_KEY="YOUR_KEY"
python gradio_manim_gemini_app.py
```
"""
from __future__ import annotations

import asyncio
import os
import re
import traceback
from pathlib import Path
from typing import List, Tuple

import gradio as gr
from google import genai
from google.genai.chats import Chat, AsyncChat
from google.genai.types import GenerateContentConfig, ThinkingConfig, UploadFileConfig

from manim_video_generator.video_executor import VideoExecutor  # type: ignore
from prompts import SYSTEM_PROMPT_SCENARIO_GENERATOR, SYSTEM_PROMPT_CODEGEN

# ────────────────────────────────  Config  ─────────────────────────────────────

API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY:
    raise EnvironmentError("GEMINI_API_KEY env variable not set.")

client = genai.Client(api_key=API_KEY)
MODEL = "gemini-2.5-flash-preview-05-20"
video_executor = VideoExecutor()

# ───────────────────────  Helpers to work with Chatbot  ─────────────────────────

def add_user_msg(history: List[Tuple[str, str]], text: str):
    """Append new (user, «») pair."""
    history.append((text, ""))


def append_bot_chunk(history: List[Tuple[str, str]], chunk: str):
    """Add chunk to bot part of the last pair."""
    user, bot = history[-1]
    history[-1] = (user, bot + chunk)


class StreamPart:
    def __init__(self, text: str):
        self.text = text

class ThinkingStreamPart(StreamPart): pass
class TextStreamPart(StreamPart): pass


async def stream_parts(chat, prompt):
    cfg = GenerateContentConfig(thinking_config=ThinkingConfig(include_thoughts=True))
    async for chunk in await chat.send_message_stream(prompt, config=cfg):
        if chunk.candidates:
            cand = chunk.candidates[0]
            if cand.content and cand.content.parts:
                for part in cand.content.parts:
                    if part.text:
                        if part.thought:
                            yield ThinkingStreamPart(part.text)
                        else:
                            yield TextStreamPart(part.text)


def extract_python(md: str) -> str:
    m = re.search(r"```python(.*?)```", md, re.S)
    if not m:
        raise ValueError("No ```python``` block found in model output.")
    return m.group(1).strip()


async def coding_cycle(state: "Session", history: List[Tuple[str, str]], prompt):
    """Generate code, render video and return once rendering succeeds."""
    while True:
        async for chunk in stream_parts(state.chat, prompt):
            append_bot_chunk(history, chunk.text)
            yield history, state, state.last_video
            await asyncio.sleep(0)

        full_answer = history[-1][1]
        try:
            py_code = extract_python(full_answer)
        except ValueError as e:
            err_msg = f"Error: {e}. Please wrap the code in ```python``` fence."
            prompt = err_msg
            add_user_msg(history, err_msg)
            yield history, state, state.last_video
            continue

        try:
            append_bot_chunk(history, "\n⏳ Rendering... It can take a few minutes")
            yield history, state, state.last_video
            await asyncio.sleep(0)
            video_path = video_executor.execute_manim_code(py_code)
            state.last_video = video_path
        except Exception as e:
            tb = traceback.format_exc(limit=10)
            err_msg = (
                f"Error, your code is not valid: {e}. Traceback: {tb}. Please fix this error and regenerate the code again."
            )
            prompt = err_msg
            add_user_msg(history, err_msg)
            yield history, state, state.last_video
            continue

        append_bot_chunk(history, "\n🎞️ Rendering done! Feel free to request changes or press **Next Step** to end.")
        state.phase = "await_feedback"
        yield history, state, state.last_video
        return

# ──────────────────────────  Session state  ────────────────────────────────────

class Session(dict):
    phase: str  # await_task | coding_loop | await_feedback | finished
    chat: AsyncChat | None
    last_video: Path | None

    def __init__(self):
        super().__init__(phase="await_task", chat=None, last_video=None)
        self.phase = "await_task"
        self.chat = None
        self.last_video = None

# ────────────────────────  Main chat handler  ──────────────────────────────────

async def chat_handler(user_msg: str, history: List[Tuple[str, str]], state: Session):
    history = history or []

    # 0. Always reflect user input
    add_user_msg(history, user_msg)
    yield history, state, state.last_video

    # bootstrap chat on very first user request
    if state.phase == "await_task":
        if not state.chat:
            # First time - create chat and generate scenario
            state.chat = client.aio.chats.create(model=MODEL)
            scenario_prompt = f"{SYSTEM_PROMPT_SCENARIO_GENERATOR}\n\n{user_msg}"
            async for txt in stream_parts(state.chat, scenario_prompt):
                append_bot_chunk(history, txt.text)
                yield history, state, state.last_video
                await asyncio.sleep(0)
            append_bot_chunk(history, "\n\n*(press **Next Step** to proceed to code generation)*")
            yield history, state, state.last_video
            return
        else:
            # Chat exists - check if user wants to proceed or modify scenario
            if user_msg.strip().lower() in {"c", "continue", "с"}:
                # Legacy behaviour: typed command to proceed
                state.phase = "coding_loop"
            else:
                # User wants to discuss/modify scenario
                async for chunk in stream_parts(state.chat, user_msg):
                    append_bot_chunk(history, chunk.text)
                    yield history, state, state.last_video
                    await asyncio.sleep(0)
                append_bot_chunk(history, "\n\n*(press **Next Step** when ready to proceed to code generation)*")
                yield history, state, state.last_video
                return

    # later phases require chat obj
    if not state.chat:
        raise ValueError("Chat not found")

    # ── Coding loop ─────────────────────────────────────────────────────────────
    if state.phase == "coding_loop":
        prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN
        async for out in coding_cycle(state, history, prompt):
            yield out
        return
    # ── Awaiting user feedback after rendering ────────────────────────────────
    if state.phase == "await_feedback":
        if user_msg.strip().lower() in {"finish", "done", "f"}:
            state.phase = "finished"
            append_bot_chunk(history, "Session complete. Refresh page to start over.")
            yield history, state, state.last_video
            return
        file_ref = client.files.upload(file=state.last_video, config=UploadFileConfig(display_name=state.last_video.name))
        while file_ref.state and file_ref.state.name == "PROCESSING":
            await asyncio.sleep(3)
            if file_ref.name:
                file_ref = client.files.get(name=file_ref.name)
        if file_ref.state and file_ref.state.name == "FAILED":
            raise RuntimeError("Gemini failed to process upload")
        prompt = [file_ref, f"{user_msg}\n\n{SYSTEM_PROMPT_CODEGEN}"]
        state.phase = "coding_loop"
        async for out in coding_cycle(state, history, prompt):
            yield out
        return

    # ── Finished phase ──────────────────────────────────────────────────────────
    if state.phase == "finished":
        append_bot_chunk(history, "Session complete. Refresh page to start over.")
        yield history, state, state.last_video

async def next_step_handler(history: List[Tuple[str, str]], state: Session):
    """Advance the conversation without typing control words."""
    history = history or []
    if state.phase == "await_task" and state.chat:
        state.phase = "coding_loop"
        prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN
        async for out in coding_cycle(state, history, prompt):
            yield out
        return

    if state.phase == "await_feedback":
        state.phase = "finished"
        append_bot_chunk(history, "Session complete. Refresh page to start over.")
        yield history, state, state.last_video
        return

    yield history, state, state.last_video


# ───────────────────────────────  UI  ──────────────────────────────────────────

def build_app():
    with gr.Blocks(title="Gemini‑Manim Video Creator") as demo:
        gr.Markdown("# 🎬 Gemini‑Manim Video Creator\nCreate an explanatory animation from a single prompt.")

        history = gr.Chatbot(height=850)
        session = gr.State(Session())

        with gr.Row():
            txt = gr.Textbox(placeholder="Describe the concept…", scale=4)
            btn = gr.Button("Send", variant="primary")
            next_btn = gr.Button("Next Step")

        vid = gr.Video(label="Rendered video", interactive=False)

        def get_vid(state: Session):
            return state.last_video if state.last_video else None

        btn.click(chat_handler, [txt, history, session], [history, session, vid]) \
           .then(lambda: "", None, txt)

        next_btn.click(next_step_handler, [history, session], [history, session, vid])

    return demo


if __name__ == "__main__":
    build_app().launch()