VibeGame / src /lib /server /langgraph-agent.ts
dylanebert
improved prompting/UX
db9635c
import { InferenceClient } from "@huggingface/inference";
import { StateGraph, Annotation, START, END } from "@langchain/langgraph";
import {
HumanMessage,
AIMessage,
BaseMessage,
ToolMessage,
} from "@langchain/core/messages";
import { observeConsoleTool } from "./tools";
import { mcpClientManager, setMCPWebSocketConnection } from "./mcp-client";
import { planTasksTool, updateTaskTool, viewTasksTool } from "./task-tracker";
import { documentationService } from "./documentation";
import {
StreamingToolCallParser,
extractToolCalls,
} from "../utils/tool-call-parser";
import { virtualFileSystem } from "../services/virtual-fs";
import type { WebSocket } from "ws";
const AgentState = Annotation.Root({
messages: Annotation<BaseMessage[]>({
reducer: (x, y) => x.concat(y),
}),
hasToolCalls: Annotation<boolean>({
reducer: (_, y) => y,
default: () => false,
}),
});
export class LangGraphAgent {
private client: InferenceClient | null = null;
private graph!: ReturnType<typeof StateGraph.prototype.compile>;
private model: string = "Qwen/Qwen3-Next-80B-A3B-Instruct";
private documentation: string = "";
private ws: WebSocket | null = null;
constructor() {
this.setupGraph();
}
async initialize(hfToken: string, ws?: WebSocket) {
if (!hfToken) {
throw new Error("Hugging Face authentication required");
}
this.client = new InferenceClient(hfToken);
if (ws) {
this.ws = ws;
setMCPWebSocketConnection(ws);
}
await mcpClientManager.initialize();
const docs = await documentationService.load();
this.documentation = docs || "";
}
private setupGraph() {
const graph = new StateGraph(AgentState);
graph.addNode("agent", async (state, config) => {
const systemPrompt = this.buildSystemPrompt();
const messages = this.formatMessages(state.messages, systemPrompt);
let fullResponse = "";
const parser = new StreamingToolCallParser();
let currentSegmentId: string | null = null;
const messageId = config?.metadata?.messageId;
const abortSignal = config?.metadata?.abortSignal as
| AbortSignal
| undefined;
for await (const token of this.streamModelResponse(
messages,
abortSignal,
)) {
fullResponse += token;
config?.writer?.({ type: "token", content: token });
// Process token through the parser
const parseResult = parser.process(token);
// Only stream safe text (without tool calls)
if (parseResult.safeText) {
// Start a text segment if needed
if (!currentSegmentId && parseResult.safeText.trim() && this.ws) {
currentSegmentId = `seg_${Date.now()}_${Math.random()}`;
this.ws.send(
JSON.stringify({
type: "segment_start",
payload: {
segmentId: currentSegmentId,
segmentType: "text",
messageId,
},
timestamp: Date.now(),
}),
);
}
// Stream the safe text
if (currentSegmentId && this.ws) {
this.ws.send(
JSON.stringify({
type: "segment_token",
payload: {
segmentId: currentSegmentId,
token: parseResult.safeText,
messageId,
},
timestamp: Date.now(),
}),
);
}
}
// If we detected a tool call start, end the current text segment
if (parseResult.pendingToolCall && currentSegmentId && this.ws) {
this.ws.send(
JSON.stringify({
type: "segment_end",
payload: {
segmentId: currentSegmentId,
messageId,
},
timestamp: Date.now(),
}),
);
currentSegmentId = null;
}
}
// Finalize any remaining text segment
if (currentSegmentId && this.ws) {
// Process any remaining buffered content
const remainingBuffer = parser.getBuffer();
if (remainingBuffer && !parser.isInToolCall()) {
const finalResult = parser.process("");
if (finalResult.safeText) {
this.ws.send(
JSON.stringify({
type: "segment_token",
payload: {
segmentId: currentSegmentId,
token: finalResult.safeText,
messageId,
},
timestamp: Date.now(),
}),
);
}
}
this.ws.send(
JSON.stringify({
type: "segment_end",
payload: {
segmentId: currentSegmentId,
messageId,
},
timestamp: Date.now(),
}),
);
}
const toolCalls = extractToolCalls(fullResponse);
if (toolCalls.length > 0) {
const toolResults = await this.executeToolsWithSegments(
toolCalls,
config?.metadata?.messageId as string | undefined,
);
return {
messages: [
new AIMessage({
content: fullResponse,
additional_kwargs: { has_tool_calls: true },
}),
...toolResults,
],
hasToolCalls: true,
};
}
if (state.messages.length > 0) {
const lastUserMessage = state.messages[state.messages.length - 1];
if (lastUserMessage instanceof HumanMessage) {
const needsTools = this.shouldUseTools(
lastUserMessage.content as string,
);
if (needsTools && !state.hasToolCalls) {
const reminderMessage = new AIMessage(
"I need to use tools to complete this task. Let me try again with the appropriate tool.",
);
return {
messages: [reminderMessage],
hasToolCalls: false,
};
}
}
}
return {
messages: [new AIMessage(fullResponse)],
hasToolCalls: false,
};
});
// @ts-expect-error - LangGraph type mismatch with START constant
graph.addEdge(START, "agent");
// @ts-expect-error - LangGraph type mismatch with conditional edges
graph.addConditionalEdges("agent", (state) => this.shouldContinue(state), {
continue: "agent",
end: END,
});
this.graph = graph.compile();
}
private shouldContinue(state: typeof AgentState.State): string {
const lastMessage = state.messages[state.messages.length - 1];
if (lastMessage instanceof ToolMessage) {
return "continue";
}
if (
lastMessage instanceof AIMessage &&
lastMessage.additional_kwargs?.has_tool_calls
) {
return "continue";
}
return "end";
}
private buildSystemPrompt(): string {
return `## Role & Primary Guidance
You are a VibeGame Engine Specialist operating in a single-file editor environment. You work with ONE game file that users can see and edit in real-time, similar to JSFiddle or CodePen.
## Live Coding Environment Context
- **SINGLE EDITOR**: There is exactly ONE editor file containing the game's XML/HTML code
- **"The code" ALWAYS refers to**: The content currently in the editor
- **Live Preview**: Changes to the editor automatically reload the game
- **User's View**: Users see the same editor you're modifying
- **GAME is PRE-IMPORTED**: The GAME object is automatically available - NEVER write \`import * as GAME from 'vibegame'\`
- **Auto-run enabled**: GAME.run() is called automatically - use \`GAME.withPlugin(MyPlugin)\` NOT \`GAME.withPlugin(MyPlugin).run()\`
## VibeGame Expert Knowledge
${this.documentation}
## MCP Tool Execution Protocol
### Format Requirements
- MANDATORY format: <tool name="tool_name">{"param": "value"}</tool>
- JSON arguments must be valid JSON (use double quotes for strings)
- **CRITICAL**: Execute ONE tool at a time and wait for the result
- The parser will handle unclosed tags gracefully for recovery
- **NEVER** generate multiple tool calls in a single response
- After each tool execution, analyze the result before deciding next action
- ALWAYS check console after making changes
- The game auto-reloads after editor changes
### Tool Execution Rules
1. Execute a single tool
2. Read and analyze the tool's response
3. Only then decide if another tool is needed
4. If multiple edits are needed, use plan_tasks first to organize them
### Available MCP Tools (All operate on the SINGLE editor file)
#### Understanding the Editor Content
- search_editor: Find text/patterns in the current game code
Parameters:
- query: string - Text or regex pattern to search for
- mode: "text" | "regex" - Search mode (optional, default: "text")
- contextLines: number - Lines of context (optional, default: 2, max: 5)
Example: <tool name="search_editor">{"query": "dynamic-part", "mode": "text"}</tool>
- read_editor_lines: Read specific lines from the current game code
Parameters:
- startLine: number - Starting line (1-indexed)
- endLine: number - Ending line (optional, defaults to startLine)
Example: <tool name="read_editor_lines">{"startLine": 10, "endLine": 20}</tool>
- read_editor: Read the complete game code currently in the editor
Parameters: none
Example: <tool name="read_editor">{}</tool>
Note: Use when user asks to "explain the code", "show the code", or needs full context
#### Modifying the Game
- edit_editor: Make targeted changes to the game code
Parameters:
- oldText: string - Exact text to find and replace (max ~20 lines). MUST include actual content, not just whitespace
- newText: string - Replacement text
Example: <tool name="edit_editor">{"oldText": "color='#ff0000'", "newText": "color='#00ff00'"}</tool>
IMPORTANT: Always include meaningful content (tags, comments, or code) in oldText, never just spaces/newlines
Note: Returns standardized response with game status and console output
- write_editor: Replace all game code with new content
Parameters:
- content: string - Complete new game code
Example: <tool name="write_editor">{"content": "<world>...</world>"}</tool>
Note: Use ONLY for starting fresh or complete rewrites
#### Task Management
- plan_tasks: Create task list for complex operations
Parameters:
- tasks: string[] - Array of task descriptions
Example: <tool name="plan_tasks">{"tasks": ["Add physics component", "Create gravity system", "Test gravity effect"]}</tool>
- update_task: Update task status
Parameters:
- taskId: number - Task ID to update
- status: "pending" | "in_progress" | "completed"
Example: <tool name="update_task">{"taskId": 1, "status": "completed"}</tool>
- view_tasks: View current task list
Parameters: none
Example: <tool name="view_tasks">{}</tool>
#### Runtime Monitoring
- observe_console: Check console messages and game state
Parameters: none
Example: <tool name="observe_console">{}</tool>
Note: Returns recent console messages and game status
## Tool Response Format
All editor modification tools return:
- Success indicator (✅ or ❌)
- Action description
- Game status (Running/Error/Loading)
- Console output from the game
## Common User Requests (Context Clarification)
When users say:
- "explain the code" → Read and explain the CURRENT editor content
- "what's in the game?" → Describe the entities/elements in the editor
- "fix the error" → Check console, then modify the editor content
- "add a..." → Add new elements to the existing editor content
- "change the..." → Modify existing elements in the editor
## Execution Patterns
### Code Writing Rules (Live Environment)
- **NO IMPORTS**: GAME is pre-imported globally - NEVER write \`import * as GAME from 'vibegame'\`
- **NO .run()**: Auto-run is enabled - write \`GAME.withPlugin(MyPlugin)\` not \`GAME.withPlugin(MyPlugin).run()\`
- **Direct access**: Use GAME.defineComponent, GAME.Types, etc. directly
- **Script tags**: When adding JavaScript, use \`<script>\` tags in the XML/HTML
### Standard Workflow (ONE TOOL AT A TIME)
1. Execute: <tool name="read_editor">{}</tool> (if needed) → WAIT for result
2. Execute: <tool name="search_editor">{"query": "target_element"}</tool> → WAIT for result
3. Execute: <tool name="read_editor_lines">{"startLine": X, "endLine": Y}</tool> → WAIT for result
4. Execute: <tool name="edit_editor">{"oldText": "...", "newText": "..."}</tool> → WAIT for result
5. Execute: <tool name="observe_console">{}</tool> → WAIT for result
6. Based on console output, decide if another edit is needed
### IMPORTANT: Sequential Tool Execution
- NEVER chain multiple TOOL: commands in one response
- Each tool call must be in its own separate message
- Always analyze the tool result before proceeding
- If you need to make multiple edits, use plan_tasks to organize them first
### Error Recovery
When you see an error in tool results:
1. Read the error message carefully
2. Search for the problematic code
3. Make corrections with edit_editor
4. Verify fix with observe_console
### Complex Changes
For changes requiring multiple edits:
1. <tool name="plan_tasks">{"tasks": [...]}</tool>
2. Update task status as you progress
3. Break edits into small chunks (<20 lines each)
4. Test after each significant change
## Critical Rules
- REMEMBER: You work with ONE editor file - all references to "the code" mean this single file
- ALWAYS check console output after edits
- NEVER skip tool execution - implement directly
- Use exact text matches for edit_editor
- Keep individual edits small and focused
- When unsure what user refers to, assume they mean the editor content
Remember: You're in a live coding environment. Users see the same editor you're modifying. Tool responses include console output - read them carefully to understand game state.`;
}
private async *streamModelResponse(
messages: Array<{ role: string; content: string }>,
abortSignal?: AbortSignal,
): AsyncGenerator<string, string, unknown> {
if (!this.client) {
throw new Error("Agent not initialized");
}
let fullContent = "";
const stream = this.client.chatCompletionStream({
model: this.model,
messages,
temperature: 0.3,
max_tokens: 2048,
});
for await (const chunk of stream) {
if (abortSignal?.aborted) {
throw new Error("AbortError");
}
const token = chunk.choices[0]?.delta?.content || "";
if (token) {
fullContent += token;
yield token;
}
}
return fullContent;
}
private shouldUseTools(content: string): boolean {
const lowerContent = content.toLowerCase();
const actionKeywords = [
"find",
"search",
"look for",
"where",
"change",
"modify",
"update",
"edit",
"replace",
"show",
"display",
"read",
"what",
"check",
"create",
"write",
"add",
"implement",
"fix",
"debug",
"error",
"console",
];
return actionKeywords.some((keyword) => lowerContent.includes(keyword));
}
/**
* Deduplicate tool calls to prevent redundant operations
*/
private deduplicateToolCalls(
toolCalls: Array<{ name: string; args: Record<string, unknown> }>,
): Array<{ name: string; args: Record<string, unknown> }> {
const deduplicated: Array<{ name: string; args: Record<string, unknown> }> =
[];
const seenOperations = new Set<string>();
for (const call of toolCalls) {
let signature = `${call.name}:`;
if (call.name === "edit_editor") {
const oldText = call.args.oldText as string;
signature += oldText?.substring(0, 100);
} else if (call.name === "write_editor") {
signature += "FULL_REPLACE";
} else {
signature += JSON.stringify(call.args);
}
if (!seenOperations.has(signature)) {
seenOperations.add(signature);
deduplicated.push(call);
} else {
console.warn(`Skipping duplicate tool call: ${call.name}`);
}
}
return deduplicated;
}
private formatMessages(
messages: BaseMessage[],
systemPrompt: string,
): Array<{ role: string; content: string }> {
const formatted = [
{ role: "system", content: systemPrompt },
...messages.map((msg) => {
let role = "assistant";
if (msg instanceof HumanMessage) {
role = "user";
} else if (msg instanceof ToolMessage) {
const content = `Tool result for ${msg.name}: ${
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content)
}`;
return { role: "assistant", content };
}
const content =
typeof msg.content === "string"
? msg.content
: JSON.stringify(msg.content);
return { role, content };
}),
];
return formatted;
}
private async executeToolsWithSegments(
toolCalls: Array<{ name: string; args: Record<string, unknown> }>,
messageId?: string,
): Promise<BaseMessage[]> {
const results = [];
const processedCalls = this.deduplicateToolCalls(toolCalls);
const stateTracker = {
editorModified: false,
lastEditContent: "",
};
for (const call of processedCalls) {
const segmentId = `seg_tool_${Date.now()}_${Math.random()}`;
const validation = this.validateToolCall(call);
if (!validation.valid) {
console.error(`Invalid tool call: ${call.name}`, validation.error);
results.push(
new ToolMessage({
content: `Error: ${validation.error}`,
tool_call_id: segmentId,
name: call.name,
}),
);
continue;
}
if (call.name === "edit_editor" && stateTracker.editorModified) {
const currentContent = virtualFileSystem.getGameFile().content;
const oldText = call.args.oldText as string;
if (!currentContent.includes(oldText)) {
console.warn(
`Skipping edit_editor: Text no longer exists after previous edit`,
);
results.push(
new ToolMessage({
content: `Skipped: The text to replace was already modified by a previous edit. The current operation is no longer needed.`,
tool_call_id: segmentId,
name: call.name,
}),
);
continue;
}
}
try {
const argString = JSON.stringify(call.args);
const estimatedTokens = argString.length / 4;
if (
estimatedTokens > 1000 &&
(call.name === "edit_editor" || call.name === "write_editor")
) {
console.warn(
`Warning: Tool ${call.name} arguments are large (${estimatedTokens} estimated tokens)`,
);
if (call.name === "edit_editor" && call.args.oldText) {
const oldText = call.args.oldText as string;
if (oldText.split("\n").length > 20) {
results.push(
new ToolMessage({
content: `Error: The edit is too large (${oldText.split("\n").length} lines). Please break this into smaller edits of max 20 lines each. Use plan_tasks to organize multiple edits.`,
tool_call_id: segmentId,
name: call.name,
}),
);
continue;
}
}
}
if (this.ws && this.ws.readyState === this.ws.OPEN) {
this.ws.send(
JSON.stringify({
type: "segment_start",
payload: {
segmentId,
segmentType: "tool-invocation",
messageId,
toolName: call.name,
toolArgs: call.args,
},
timestamp: Date.now(),
}),
);
this.ws.send(
JSON.stringify({
type: "segment_end",
payload: {
segmentId,
toolStatus: "running",
messageId,
},
timestamp: Date.now(),
}),
);
}
let result;
let consoleOutput: string[] = [];
const mcpTools = mcpClientManager.getTools();
const tool = mcpTools.find((t) => t.name === call.name);
if (tool) {
result = await tool.func(call.args);
if (call.name === "edit_editor" || call.name === "write_editor") {
stateTracker.editorModified = true;
stateTracker.lastEditContent =
virtualFileSystem.getGameFile().content;
}
const consoleMatch = result.match(/Console output:\n([\s\S]*?)$/);
if (consoleMatch) {
consoleOutput = consoleMatch[1]
.split("\n")
.filter((line: string) => line.trim());
}
} else if (call.name === "observe_console") {
result = await observeConsoleTool.func("");
} else if (call.name === "plan_tasks") {
result = await planTasksTool.func(call.args as { tasks: string[] });
} else if (call.name === "update_task") {
result = await updateTaskTool.func(
call.args as {
taskId: number;
status: "pending" | "in_progress" | "completed";
},
);
} else if (call.name === "view_tasks") {
result = await viewTasksTool.func({});
} else {
result = `Unknown tool: ${call.name}`;
}
if (this.ws && this.ws.readyState === this.ws.OPEN) {
this.ws.send(
JSON.stringify({
type: "segment_end",
payload: {
segmentId,
toolStatus: "completed",
messageId,
},
timestamp: Date.now(),
}),
);
const resultSegmentId = `seg_result_${Date.now()}_${Math.random()}`;
this.ws.send(
JSON.stringify({
type: "segment_start",
payload: {
segmentId: resultSegmentId,
segmentType: "tool-result",
messageId,
toolName: call.name,
},
timestamp: Date.now(),
}),
);
this.ws.send(
JSON.stringify({
type: "segment_end",
payload: {
segmentId: resultSegmentId,
toolOutput: result,
consoleOutput:
consoleOutput.length > 0 ? consoleOutput : undefined,
toolStatus: "completed",
messageId,
},
timestamp: Date.now(),
}),
);
}
results.push(
new ToolMessage({
content: result,
tool_call_id: segmentId,
name: call.name,
}),
);
} catch (error) {
if (this.ws && this.ws.readyState === this.ws.OPEN) {
this.ws.send(
JSON.stringify({
type: "segment_end",
payload: {
segmentId,
toolStatus: "error",
toolError:
error instanceof Error ? error.message : String(error),
messageId,
},
timestamp: Date.now(),
}),
);
}
results.push(
new ToolMessage({
content: `Error executing ${call.name}: ${error}`,
tool_call_id: segmentId,
name: call.name,
}),
);
}
}
return results;
}
private validateToolCall(call: {
name: string;
args: Record<string, unknown>;
}): { valid: boolean; error?: string } {
// Basic validation for known tools
const toolValidations: Record<
string,
(args: Record<string, unknown>) => string | null
> = {
edit_editor: (args) => {
if (!args.oldText || typeof args.oldText !== "string") {
return "Missing or invalid 'oldText' parameter";
}
if (!args.newText || typeof args.newText !== "string") {
return "Missing or invalid 'newText' parameter";
}
const lines = (args.oldText as string).split("\n").length;
if (lines > 30) {
return `Edit too large (${lines} lines). Break into smaller edits.`;
}
return null;
},
write_editor: (args) => {
if (!args.content || typeof args.content !== "string") {
return "Missing or invalid 'content' parameter";
}
return null;
},
search_editor: (args) => {
if (!args.query || typeof args.query !== "string") {
return "Missing or invalid 'query' parameter";
}
if (args.mode && !["text", "regex"].includes(args.mode as string)) {
return "Invalid 'mode' parameter. Must be 'text' or 'regex'";
}
return null;
},
read_editor_lines: (args) => {
if (!args.startLine || typeof args.startLine !== "number") {
return "Missing or invalid 'startLine' parameter";
}
if (args.startLine < 1) {
return "'startLine' must be >= 1";
}
if (args.endLine && typeof args.endLine !== "number") {
return "Invalid 'endLine' parameter";
}
return null;
},
plan_tasks: (args) => {
if (!args.tasks || !Array.isArray(args.tasks)) {
return "Missing or invalid 'tasks' parameter. Must be an array.";
}
if (args.tasks.length === 0) {
return "'tasks' array cannot be empty";
}
return null;
},
update_task: (args) => {
if (typeof args.taskId !== "number") {
return "Missing or invalid 'taskId' parameter";
}
if (
!args.status ||
!["pending", "in_progress", "completed"].includes(
args.status as string,
)
) {
return "Invalid 'status' parameter. Must be 'pending', 'in_progress', or 'completed'";
}
return null;
},
};
const validator = toolValidations[call.name];
if (validator) {
const error = validator(call.args);
if (error) {
return { valid: false, error };
}
}
return { valid: true };
}
async processMessage(
message: string,
messageHistory: BaseMessage[] = [],
onStream?: (chunk: string) => void,
messageId?: string,
abortSignal?: AbortSignal,
): Promise<string> {
if (!this.client) {
throw new Error("Agent not initialized");
}
const stream = await this.graph.stream(
{
messages: [...messageHistory, new HumanMessage(message)],
hasToolCalls: false,
},
{
streamMode: ["custom", "updates"] as const,
metadata: { messageId, abortSignal },
},
);
let fullResponse = "";
for await (const chunk of stream) {
if (abortSignal?.aborted) {
throw new Error("AbortError");
}
if (Array.isArray(chunk)) {
const [mode, data] = chunk;
if (mode === "custom" && data?.type === "token") {
fullResponse += data.content;
onStream?.(data.content);
}
}
}
return fullResponse;
}
}