Spaces:

freQuensy23
/

manim-gpt

Sleeping

File size: 11,792 Bytes

# gradio_manim_gemini_app.py – **v3**
"""Gradio demo 
============
— third revision —
• **Правильная структура history** — теперь `Chatbot` получает список *пар*
  `(user_text, bot_text)`.  Чанки бота апдей‑тят второй элемент последней пары,
  поэтому «дубли» и «робот‑юзер» исчезают.  
• **Ошибки рендера** публикуются *как пользовательское сообщение* и немедленно
  отправляются в Gemini; модель отвечает, мы снова пытаемся сгенерировать код —
  полностью автоматический цикл, как в вашем CLI‑скрипте.  
• Управление состоянием сведено к чётким этапам: `await_task`, `coding_loop`,
  `await_feedback`, `finished`.
• После каждого рендера пользователь может дать дополнительные указания —
  видео отправляется в Gemini и код генерируется заново с учётом замечаний.

Запуск:
```bash
pip install --upgrade gradio google-genai manim_video_generator manim-ml
export GEMINI_API_KEY="YOUR_KEY"
python gradio_manim_gemini_app.py
```
"""
from __future__ import annotations

import asyncio
import os
import re
import traceback
from pathlib import Path
from typing import List, Tuple

import gradio as gr
from google import genai
from google.genai.chats import Chat, AsyncChat
from google.genai.types import GenerateContentConfig, ThinkingConfig, UploadFileConfig

from manim_video_generator.video_executor import VideoExecutor  # type: ignore
from prompts import SYSTEM_PROMPT_SCENARIO_GENERATOR, SYSTEM_PROMPT_CODEGEN

# ────────────────────────────────  Config  ─────────────────────────────────────

API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY:
    raise EnvironmentError("GEMINI_API_KEY env variable not set.")

client = genai.Client(api_key=API_KEY)
MODEL = "gemini-2.5-flash-preview-05-20"
video_executor = VideoExecutor()

# ───────────────────────  Helpers to work with Chatbot  ─────────────────────────

def add_user_msg(history: List[Tuple[str, str]], text: str):
    """Append new (user, «») pair."""
    history.append((text, ""))


def append_bot_chunk(history: List[Tuple[str, str]], chunk: str):
    """Add chunk to bot part of the last pair."""
    user, bot = history[-1]
    history[-1] = (user, bot + chunk)


class StreamPart:
    def __init__(self, text: str):
        self.text = text

class ThinkingStreamPart(StreamPart): pass
class TextStreamPart(StreamPart): pass


async def stream_parts(chat, prompt):
    cfg = GenerateContentConfig(thinking_config=ThinkingConfig(include_thoughts=True))
    async for chunk in await chat.send_message_stream(prompt, config=cfg):
        if chunk.candidates:
            cand = chunk.candidates[0]
            if cand.content and cand.content.parts:
                for part in cand.content.parts:
                    if part.text:
                        if part.thought:
                            yield ThinkingStreamPart(part.text)
                        else:
                            yield TextStreamPart(part.text)


def extract_python(md: str) -> str:
    m = re.search(r"```python(.*?)```", md, re.S)
    if not m:
        raise ValueError("No ```python``` block found in model output.")
    return m.group(1).strip()


async def coding_cycle(state: "Session", history: List[Tuple[str, str]], prompt):
    """Generate code, render video and return once rendering succeeds."""
    while True:
        async for chunk in stream_parts(state.chat, prompt):
            append_bot_chunk(history, chunk.text)
            yield history, state, state.last_video
            await asyncio.sleep(0)

        full_answer = history[-1][1]
        try:
            py_code = extract_python(full_answer)
        except ValueError as e:
            err_msg = f"Error: {e}. Please wrap the code in ```python``` fence."
            prompt = err_msg
            add_user_msg(history, err_msg)
            yield history, state, state.last_video
            continue

        try:
            append_bot_chunk(history, "\n⏳ Rendering... It can take a few minutes")
            yield history, state, state.last_video
            await asyncio.sleep(0)
            video_path = video_executor.execute_manim_code(py_code)
            state.last_video = video_path
        except Exception as e:
            tb = traceback.format_exc(limit=10)
            err_msg = (
                f"Error, your code is not valid: {e}. Traceback: {tb}. Please fix this error and regenerate the code again."
            )
            prompt = err_msg
            add_user_msg(history, err_msg)
            yield history, state, state.last_video
            continue

        append_bot_chunk(history, "\n🎞️ Rendering done! Feel free to request changes or press **Next Step** to end.")
        state.phase = "await_feedback"
        yield history, state, state.last_video
        return

# ──────────────────────────  Session state  ────────────────────────────────────

class Session(dict):
    phase: str  # await_task | coding_loop | await_feedback | finished
    chat: AsyncChat | None
    last_video: Path | None

    def __init__(self):
        super().__init__(phase="await_task", chat=None, last_video=None)
        self.phase = "await_task"
        self.chat = None
        self.last_video = None

# ────────────────────────  Main chat handler  ──────────────────────────────────

async def chat_handler(user_msg: str, history: List[Tuple[str, str]], state: Session):
    history = history or []

    # 0. Always reflect user input
    add_user_msg(history, user_msg)
    yield history, state, state.last_video

    # bootstrap chat on very first user request
    if state.phase == "await_task":
        if not state.chat:
            # First time - create chat and generate scenario
            state.chat = client.aio.chats.create(model=MODEL)
            scenario_prompt = f"{SYSTEM_PROMPT_SCENARIO_GENERATOR}\n\n{user_msg}"
            async for txt in stream_parts(state.chat, scenario_prompt):
                append_bot_chunk(history, txt.text)
                yield history, state, state.last_video
                await asyncio.sleep(0)
            append_bot_chunk(history, "\n\n*(press **Next Step** to proceed to code generation)*")
            yield history, state, state.last_video
            return
        else:
            # Chat exists - check if user wants to proceed or modify scenario
            if user_msg.strip().lower() in {"c", "continue", "с"}:
                # Legacy behaviour: typed command to proceed
                state.phase = "coding_loop"
            else:
                # User wants to discuss/modify scenario
                async for chunk in stream_parts(state.chat, user_msg):
                    append_bot_chunk(history, chunk.text)
                    yield history, state, state.last_video
                    await asyncio.sleep(0)
                append_bot_chunk(history, "\n\n*(press **Next Step** when ready to proceed to code generation)*")
                yield history, state, state.last_video
                return

    # later phases require chat obj
    if not state.chat:
        raise ValueError("Chat not found")

    # ── Coding loop ─────────────────────────────────────────────────────────────
    if state.phase == "coding_loop":
        prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN
        async for out in coding_cycle(state, history, prompt):
            yield out
        return
    # ── Awaiting user feedback after rendering ────────────────────────────────
    if state.phase == "await_feedback":
        if user_msg.strip().lower() in {"finish", "done", "f"}:
            state.phase = "finished"
            append_bot_chunk(history, "Session complete. Refresh page to start over.")
            yield history, state, state.last_video
            return
        file_ref = client.files.upload(file=state.last_video, config=UploadFileConfig(display_name=state.last_video.name))
        while file_ref.state and file_ref.state.name == "PROCESSING":
            await asyncio.sleep(3)
            if file_ref.name:
                file_ref = client.files.get(name=file_ref.name)
        if file_ref.state and file_ref.state.name == "FAILED":
            raise RuntimeError("Gemini failed to process upload")
        prompt = [file_ref, f"{user_msg}\n\n{SYSTEM_PROMPT_CODEGEN}"]
        state.phase = "coding_loop"
        async for out in coding_cycle(state, history, prompt):
            yield out
        return

    # ── Finished phase ──────────────────────────────────────────────────────────
    if state.phase == "finished":
        append_bot_chunk(history, "Session complete. Refresh page to start over.")
        yield history, state, state.last_video

async def next_step_handler(history: List[Tuple[str, str]], state: Session):
    """Advance the conversation without typing control words."""
    history = history or []
    if state.phase == "await_task" and state.chat:
        state.phase = "coding_loop"
        prompt = "Thanks. It is good scenario. Now generate code for it.\n\n" + SYSTEM_PROMPT_CODEGEN
        async for out in coding_cycle(state, history, prompt):
            yield out
        return

    if state.phase == "await_feedback":
        state.phase = "finished"
        append_bot_chunk(history, "Session complete. Refresh page to start over.")
        yield history, state, state.last_video
        return

    yield history, state, state.last_video


# ───────────────────────────────  UI  ──────────────────────────────────────────

def build_app():
    with gr.Blocks(title="Gemini‑Manim Video Creator") as demo:
        gr.Markdown("# 🎬 Gemini‑Manim Video Creator\nCreate an explanatory animation from a single prompt.")

        history = gr.Chatbot(height=850)
        session = gr.State(Session())

        with gr.Row():
            txt = gr.Textbox(placeholder="Describe the concept…", scale=4)
            btn = gr.Button("Send", variant="primary")
            next_btn = gr.Button("Next Step")

        vid = gr.Video(label="Rendered video", interactive=False)

        def get_vid(state: Session):
            return state.last_video if state.last_video else None

        btn.click(chat_handler, [txt, history, session], [history, session, vid]) \
           .then(lambda: "", None, txt)

        next_btn.click(next_step_handler, [history, session], [history, session, vid])

    return demo


if __name__ == "__main__":
    build_app().launch()