Spaces:

dylanebert
/

VibeGame

Running

VibeGame / src /lib /server /langgraph-agent.ts

dylanebert

improved prompting/UX

db9635c 2 days ago

27.6 kB

	import { InferenceClient } from "@huggingface/inference";
	import { StateGraph, Annotation, START, END } from "@langchain/langgraph";
	import {
	HumanMessage,
	AIMessage,
	BaseMessage,
	ToolMessage,
	} from "@langchain/core/messages";
	import { observeConsoleTool } from "./tools";
	import { mcpClientManager, setMCPWebSocketConnection } from "./mcp-client";
	import { planTasksTool, updateTaskTool, viewTasksTool } from "./task-tracker";
	import { documentationService } from "./documentation";
	import {
	StreamingToolCallParser,
	extractToolCalls,
	} from "../utils/tool-call-parser";
	import { virtualFileSystem } from "../services/virtual-fs";
	import type { WebSocket } from "ws";

	const AgentState = Annotation.Root({
	messages: Annotation<BaseMessage[]>({
	reducer: (x, y) => x.concat(y),
	}),
	hasToolCalls: Annotation<boolean>({
	reducer: (_, y) => y,
	default: () => false,
	}),
	});

	export class LangGraphAgent {
	private client: InferenceClient \| null = null;
	private graph!: ReturnType<typeof StateGraph.prototype.compile>;
	private model: string = "Qwen/Qwen3-Next-80B-A3B-Instruct";
	private documentation: string = "";
	private ws: WebSocket \| null = null;

	constructor() {
	this.setupGraph();
	}

	async initialize(hfToken: string, ws?: WebSocket) {
	if (!hfToken) {
	throw new Error("Hugging Face authentication required");
	}
	this.client = new InferenceClient(hfToken);

	if (ws) {
	this.ws = ws;
	setMCPWebSocketConnection(ws);
	}

	await mcpClientManager.initialize();

	const docs = await documentationService.load();
	this.documentation = docs \|\| "";
	}

	private setupGraph() {
	const graph = new StateGraph(AgentState);

	graph.addNode("agent", async (state, config) => {
	const systemPrompt = this.buildSystemPrompt();
	const messages = this.formatMessages(state.messages, systemPrompt);

	let fullResponse = "";
	const parser = new StreamingToolCallParser();
	let currentSegmentId: string \| null = null;
	const messageId = config?.metadata?.messageId;
	const abortSignal = config?.metadata?.abortSignal as
	\| AbortSignal
	\| undefined;

	for await (const token of this.streamModelResponse(
	messages,
	abortSignal,
	)) {
	fullResponse += token;
	config?.writer?.({ type: "token", content: token });

	// Process token through the parser
	const parseResult = parser.process(token);

	// Only stream safe text (without tool calls)
	if (parseResult.safeText) {
	// Start a text segment if needed
	if (!currentSegmentId && parseResult.safeText.trim() && this.ws) {
	currentSegmentId = `seg_${Date.now()}_${Math.random()}`;
	this.ws.send(
	JSON.stringify({
	type: "segment_start",
	payload: {
	segmentId: currentSegmentId,
	segmentType: "text",
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}

	// Stream the safe text
	if (currentSegmentId && this.ws) {
	this.ws.send(
	JSON.stringify({
	type: "segment_token",
	payload: {
	segmentId: currentSegmentId,
	token: parseResult.safeText,
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}
	}

	// If we detected a tool call start, end the current text segment
	if (parseResult.pendingToolCall && currentSegmentId && this.ws) {
	this.ws.send(
	JSON.stringify({
	type: "segment_end",
	payload: {
	segmentId: currentSegmentId,
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	currentSegmentId = null;
	}
	}

	// Finalize any remaining text segment
	if (currentSegmentId && this.ws) {
	// Process any remaining buffered content
	const remainingBuffer = parser.getBuffer();
	if (remainingBuffer && !parser.isInToolCall()) {
	const finalResult = parser.process("");
	if (finalResult.safeText) {
	this.ws.send(
	JSON.stringify({
	type: "segment_token",
	payload: {
	segmentId: currentSegmentId,
	token: finalResult.safeText,
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}
	}

	this.ws.send(
	JSON.stringify({
	type: "segment_end",
	payload: {
	segmentId: currentSegmentId,
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}

	const toolCalls = extractToolCalls(fullResponse);

	if (toolCalls.length > 0) {
	const toolResults = await this.executeToolsWithSegments(
	toolCalls,
	config?.metadata?.messageId as string \| undefined,
	);
	return {
	messages: [
	new AIMessage({
	content: fullResponse,
	additional_kwargs: { has_tool_calls: true },
	}),
	...toolResults,
	],
	hasToolCalls: true,
	};
	}

	if (state.messages.length > 0) {
	const lastUserMessage = state.messages[state.messages.length - 1];
	if (lastUserMessage instanceof HumanMessage) {
	const needsTools = this.shouldUseTools(
	lastUserMessage.content as string,
	);
	if (needsTools && !state.hasToolCalls) {
	const reminderMessage = new AIMessage(
	"I need to use tools to complete this task. Let me try again with the appropriate tool.",
	);
	return {
	messages: [reminderMessage],
	hasToolCalls: false,
	};
	}
	}
	}

	return {
	messages: [new AIMessage(fullResponse)],
	hasToolCalls: false,
	};
	});

	// @ts-expect-error - LangGraph type mismatch with START constant
	graph.addEdge(START, "agent");

	// @ts-expect-error - LangGraph type mismatch with conditional edges
	graph.addConditionalEdges("agent", (state) => this.shouldContinue(state), {
	continue: "agent",
	end: END,
	});

	this.graph = graph.compile();
	}

	private shouldContinue(state: typeof AgentState.State): string {
	const lastMessage = state.messages[state.messages.length - 1];

	if (lastMessage instanceof ToolMessage) {
	return "continue";
	}

	if (
	lastMessage instanceof AIMessage &&
	lastMessage.additional_kwargs?.has_tool_calls
	) {
	return "continue";
	}

	return "end";
	}

	private buildSystemPrompt(): string {
	return `## Role & Primary Guidance
	You are a VibeGame Engine Specialist operating in a single-file editor environment. You work with ONE game file that users can see and edit in real-time, similar to JSFiddle or CodePen.

	## Live Coding Environment Context
	- SINGLE EDITOR: There is exactly ONE editor file containing the game's XML/HTML code
	- "The code" ALWAYS refers to: The content currently in the editor
	- Live Preview: Changes to the editor automatically reload the game
	- User's View: Users see the same editor you're modifying
	- GAME is PRE-IMPORTED: The GAME object is automatically available - NEVER write \`import * as GAME from 'vibegame'\`
	- Auto-run enabled: GAME.run() is called automatically - use \`GAME.withPlugin(MyPlugin)\` NOT \`GAME.withPlugin(MyPlugin).run()\`

	## VibeGame Expert Knowledge
	${this.documentation}

	## MCP Tool Execution Protocol

	### Format Requirements
	- MANDATORY format: <tool name="tool_name">{"param": "value"}</tool>
	- JSON arguments must be valid JSON (use double quotes for strings)
	- CRITICAL: Execute ONE tool at a time and wait for the result
	- The parser will handle unclosed tags gracefully for recovery
	- NEVER generate multiple tool calls in a single response
	- After each tool execution, analyze the result before deciding next action
	- ALWAYS check console after making changes
	- The game auto-reloads after editor changes

	### Tool Execution Rules
	1. Execute a single tool
	2. Read and analyze the tool's response
	3. Only then decide if another tool is needed
	4. If multiple edits are needed, use plan_tasks first to organize them

	### Available MCP Tools (All operate on the SINGLE editor file)

	#### Understanding the Editor Content
	- search_editor: Find text/patterns in the current game code
	Parameters:
	- query: string - Text or regex pattern to search for
	- mode: "text" \| "regex" - Search mode (optional, default: "text")
	- contextLines: number - Lines of context (optional, default: 2, max: 5)
	Example: <tool name="search_editor">{"query": "dynamic-part", "mode": "text"}</tool>

	- read_editor_lines: Read specific lines from the current game code
	Parameters:
	- startLine: number - Starting line (1-indexed)
	- endLine: number - Ending line (optional, defaults to startLine)
	Example: <tool name="read_editor_lines">{"startLine": 10, "endLine": 20}</tool>

	- read_editor: Read the complete game code currently in the editor
	Parameters: none
	Example: <tool name="read_editor">{}</tool>
	Note: Use when user asks to "explain the code", "show the code", or needs full context

	#### Modifying the Game
	- edit_editor: Make targeted changes to the game code
	Parameters:
	- oldText: string - Exact text to find and replace (max ~20 lines). MUST include actual content, not just whitespace
	- newText: string - Replacement text
	Example: <tool name="edit_editor">{"oldText": "color='#ff0000'", "newText": "color='#00ff00'"}</tool>
	IMPORTANT: Always include meaningful content (tags, comments, or code) in oldText, never just spaces/newlines
	Note: Returns standardized response with game status and console output

	- write_editor: Replace all game code with new content
	Parameters:
	- content: string - Complete new game code
	Example: <tool name="write_editor">{"content": "<world>...</world>"}</tool>
	Note: Use ONLY for starting fresh or complete rewrites

	#### Task Management
	- plan_tasks: Create task list for complex operations
	Parameters:
	- tasks: string[] - Array of task descriptions
	Example: <tool name="plan_tasks">{"tasks": ["Add physics component", "Create gravity system", "Test gravity effect"]}</tool>

	- update_task: Update task status
	Parameters:
	- taskId: number - Task ID to update
	- status: "pending" \| "in_progress" \| "completed"
	Example: <tool name="update_task">{"taskId": 1, "status": "completed"}</tool>

	- view_tasks: View current task list
	Parameters: none
	Example: <tool name="view_tasks">{}</tool>

	#### Runtime Monitoring
	- observe_console: Check console messages and game state
	Parameters: none
	Example: <tool name="observe_console">{}</tool>
	Note: Returns recent console messages and game status

	## Tool Response Format
	All editor modification tools return:
	- Success indicator (✅ or ❌)
	- Action description
	- Game status (Running/Error/Loading)
	- Console output from the game

	## Common User Requests (Context Clarification)
	When users say:
	- "explain the code" → Read and explain the CURRENT editor content
	- "what's in the game?" → Describe the entities/elements in the editor
	- "fix the error" → Check console, then modify the editor content
	- "add a..." → Add new elements to the existing editor content
	- "change the..." → Modify existing elements in the editor

	## Execution Patterns

	### Code Writing Rules (Live Environment)
	- NO IMPORTS: GAME is pre-imported globally - NEVER write \`import * as GAME from 'vibegame'\`
	- NO .run(): Auto-run is enabled - write \`GAME.withPlugin(MyPlugin)\` not \`GAME.withPlugin(MyPlugin).run()\`
	- Direct access: Use GAME.defineComponent, GAME.Types, etc. directly
	- Script tags: When adding JavaScript, use \`<script>\` tags in the XML/HTML

	### Standard Workflow (ONE TOOL AT A TIME)
	1. Execute: <tool name="read_editor">{}</tool> (if needed) → WAIT for result
	2. Execute: <tool name="search_editor">{"query": "target_element"}</tool> → WAIT for result
	3. Execute: <tool name="read_editor_lines">{"startLine": X, "endLine": Y}</tool> → WAIT for result
	4. Execute: <tool name="edit_editor">{"oldText": "...", "newText": "..."}</tool> → WAIT for result
	5. Execute: <tool name="observe_console">{}</tool> → WAIT for result
	6. Based on console output, decide if another edit is needed

	### IMPORTANT: Sequential Tool Execution
	- NEVER chain multiple TOOL: commands in one response
	- Each tool call must be in its own separate message
	- Always analyze the tool result before proceeding
	- If you need to make multiple edits, use plan_tasks to organize them first

	### Error Recovery
	When you see an error in tool results:
	1. Read the error message carefully
	2. Search for the problematic code
	3. Make corrections with edit_editor
	4. Verify fix with observe_console

	### Complex Changes
	For changes requiring multiple edits:
	1. <tool name="plan_tasks">{"tasks": [...]}</tool>
	2. Update task status as you progress
	3. Break edits into small chunks (<20 lines each)
	4. Test after each significant change

	## Critical Rules
	- REMEMBER: You work with ONE editor file - all references to "the code" mean this single file
	- ALWAYS check console output after edits
	- NEVER skip tool execution - implement directly
	- Use exact text matches for edit_editor
	- Keep individual edits small and focused
	- When unsure what user refers to, assume they mean the editor content

	Remember: You're in a live coding environment. Users see the same editor you're modifying. Tool responses include console output - read them carefully to understand game state.`;
	}

	private async *streamModelResponse(
	messages: Array<{ role: string; content: string }>,
	abortSignal?: AbortSignal,
	): AsyncGenerator<string, string, unknown> {
	if (!this.client) {
	throw new Error("Agent not initialized");
	}

	let fullContent = "";

	const stream = this.client.chatCompletionStream({
	model: this.model,
	messages,
	temperature: 0.3,
	max_tokens: 2048,
	});

	for await (const chunk of stream) {
	if (abortSignal?.aborted) {
	throw new Error("AbortError");
	}

	const token = chunk.choices[0]?.delta?.content \|\| "";
	if (token) {
	fullContent += token;
	yield token;
	}
	}

	return fullContent;
	}

	private shouldUseTools(content: string): boolean {
	const lowerContent = content.toLowerCase();

	const actionKeywords = [
	"find",
	"search",
	"look for",
	"where",
	"change",
	"modify",
	"update",
	"edit",
	"replace",
	"show",
	"display",
	"read",
	"what",
	"check",
	"create",
	"write",
	"add",
	"implement",
	"fix",
	"debug",
	"error",
	"console",
	];

	return actionKeywords.some((keyword) => lowerContent.includes(keyword));
	}

	/**
	* Deduplicate tool calls to prevent redundant operations
	*/
	private deduplicateToolCalls(
	toolCalls: Array<{ name: string; args: Record<string, unknown> }>,
	): Array<{ name: string; args: Record<string, unknown> }> {
	const deduplicated: Array<{ name: string; args: Record<string, unknown> }> =
	[];
	const seenOperations = new Set<string>();

	for (const call of toolCalls) {
	let signature = `${call.name}:`;

	if (call.name === "edit_editor") {
	const oldText = call.args.oldText as string;
	signature += oldText?.substring(0, 100);
	} else if (call.name === "write_editor") {
	signature += "FULL_REPLACE";
	} else {
	signature += JSON.stringify(call.args);
	}

	if (!seenOperations.has(signature)) {
	seenOperations.add(signature);
	deduplicated.push(call);
	} else {
	console.warn(`Skipping duplicate tool call: ${call.name}`);
	}
	}

	return deduplicated;
	}

	private formatMessages(
	messages: BaseMessage[],
	systemPrompt: string,
	): Array<{ role: string; content: string }> {
	const formatted = [
	{ role: "system", content: systemPrompt },
	...messages.map((msg) => {
	let role = "assistant";
	if (msg instanceof HumanMessage) {
	role = "user";
	} else if (msg instanceof ToolMessage) {
	const content = `Tool result for ${msg.name}: ${
	typeof msg.content === "string"
	? msg.content
	: JSON.stringify(msg.content)
	}`;
	return { role: "assistant", content };
	}

	const content =
	typeof msg.content === "string"
	? msg.content
	: JSON.stringify(msg.content);
	return { role, content };
	}),
	];
	return formatted;
	}

	private async executeToolsWithSegments(
	toolCalls: Array<{ name: string; args: Record<string, unknown> }>,
	messageId?: string,
	): Promise<BaseMessage[]> {
	const results = [];

	const processedCalls = this.deduplicateToolCalls(toolCalls);
	const stateTracker = {
	editorModified: false,
	lastEditContent: "",
	};

	for (const call of processedCalls) {
	const segmentId = `seg_tool_${Date.now()}_${Math.random()}`;

	const validation = this.validateToolCall(call);
	if (!validation.valid) {
	console.error(`Invalid tool call: ${call.name}`, validation.error);
	results.push(
	new ToolMessage({
	content: `Error: ${validation.error}`,
	tool_call_id: segmentId,
	name: call.name,
	}),
	);
	continue;
	}

	if (call.name === "edit_editor" && stateTracker.editorModified) {
	const currentContent = virtualFileSystem.getGameFile().content;
	const oldText = call.args.oldText as string;

	if (!currentContent.includes(oldText)) {
	console.warn(
	`Skipping edit_editor: Text no longer exists after previous edit`,
	);
	results.push(
	new ToolMessage({
	content: `Skipped: The text to replace was already modified by a previous edit. The current operation is no longer needed.`,
	tool_call_id: segmentId,
	name: call.name,
	}),
	);
	continue;
	}
	}

	try {
	const argString = JSON.stringify(call.args);
	const estimatedTokens = argString.length / 4;

	if (
	estimatedTokens > 1000 &&
	(call.name === "edit_editor" \|\| call.name === "write_editor")
	) {
	console.warn(
	`Warning: Tool ${call.name} arguments are large (${estimatedTokens} estimated tokens)`,
	);

	if (call.name === "edit_editor" && call.args.oldText) {
	const oldText = call.args.oldText as string;

	if (oldText.split("\n").length > 20) {
	results.push(
	new ToolMessage({
	content: `Error: The edit is too large (${oldText.split("\n").length} lines). Please break this into smaller edits of max 20 lines each. Use plan_tasks to organize multiple edits.`,
	tool_call_id: segmentId,
	name: call.name,
	}),
	);
	continue;
	}
	}
	}
	if (this.ws && this.ws.readyState === this.ws.OPEN) {
	this.ws.send(
	JSON.stringify({
	type: "segment_start",
	payload: {
	segmentId,
	segmentType: "tool-invocation",
	messageId,
	toolName: call.name,
	toolArgs: call.args,
	},
	timestamp: Date.now(),
	}),
	);

	this.ws.send(
	JSON.stringify({
	type: "segment_end",
	payload: {
	segmentId,
	toolStatus: "running",
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}

	let result;
	let consoleOutput: string[] = [];

	const mcpTools = mcpClientManager.getTools();
	const tool = mcpTools.find((t) => t.name === call.name);

	if (tool) {
	result = await tool.func(call.args);

	if (call.name === "edit_editor" \|\| call.name === "write_editor") {
	stateTracker.editorModified = true;
	stateTracker.lastEditContent =
	virtualFileSystem.getGameFile().content;
	}

	const consoleMatch = result.match(/Console output:\n([\s\S]*?)$/);
	if (consoleMatch) {
	consoleOutput = consoleMatch[1]
	.split("\n")
	.filter((line: string) => line.trim());
	}
	} else if (call.name === "observe_console") {
	result = await observeConsoleTool.func("");
	} else if (call.name === "plan_tasks") {
	result = await planTasksTool.func(call.args as { tasks: string[] });
	} else if (call.name === "update_task") {
	result = await updateTaskTool.func(
	call.args as {
	taskId: number;
	status: "pending" \| "in_progress" \| "completed";
	},
	);
	} else if (call.name === "view_tasks") {
	result = await viewTasksTool.func({});
	} else {
	result = `Unknown tool: ${call.name}`;
	}

	if (this.ws && this.ws.readyState === this.ws.OPEN) {
	this.ws.send(
	JSON.stringify({
	type: "segment_end",
	payload: {
	segmentId,
	toolStatus: "completed",
	messageId,
	},
	timestamp: Date.now(),
	}),
	);

	const resultSegmentId = `seg_result_${Date.now()}_${Math.random()}`;
	this.ws.send(
	JSON.stringify({
	type: "segment_start",
	payload: {
	segmentId: resultSegmentId,
	segmentType: "tool-result",
	messageId,
	toolName: call.name,
	},
	timestamp: Date.now(),
	}),
	);

	this.ws.send(
	JSON.stringify({
	type: "segment_end",
	payload: {
	segmentId: resultSegmentId,
	toolOutput: result,
	consoleOutput:
	consoleOutput.length > 0 ? consoleOutput : undefined,
	toolStatus: "completed",
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}

	results.push(
	new ToolMessage({
	content: result,
	tool_call_id: segmentId,
	name: call.name,
	}),
	);
	} catch (error) {
	if (this.ws && this.ws.readyState === this.ws.OPEN) {
	this.ws.send(
	JSON.stringify({
	type: "segment_end",
	payload: {
	segmentId,
	toolStatus: "error",
	toolError:
	error instanceof Error ? error.message : String(error),
	messageId,
	},
	timestamp: Date.now(),
	}),
	);
	}

	results.push(
	new ToolMessage({
	content: `Error executing ${call.name}: ${error}`,
	tool_call_id: segmentId,
	name: call.name,
	}),
	);
	}
	}

	return results;
	}

	private validateToolCall(call: {
	name: string;
	args: Record<string, unknown>;
	}): { valid: boolean; error?: string } {
	// Basic validation for known tools
	const toolValidations: Record<
	string,
	(args: Record<string, unknown>) => string \| null
	> = {
	edit_editor: (args) => {
	if (!args.oldText \|\| typeof args.oldText !== "string") {
	return "Missing or invalid 'oldText' parameter";
	}
	if (!args.newText \|\| typeof args.newText !== "string") {
	return "Missing or invalid 'newText' parameter";
	}
	const lines = (args.oldText as string).split("\n").length;
	if (lines > 30) {
	return `Edit too large (${lines} lines). Break into smaller edits.`;
	}
	return null;
	},
	write_editor: (args) => {
	if (!args.content \|\| typeof args.content !== "string") {
	return "Missing or invalid 'content' parameter";
	}
	return null;
	},
	search_editor: (args) => {
	if (!args.query \|\| typeof args.query !== "string") {
	return "Missing or invalid 'query' parameter";
	}
	if (args.mode && !["text", "regex"].includes(args.mode as string)) {
	return "Invalid 'mode' parameter. Must be 'text' or 'regex'";
	}
	return null;
	},
	read_editor_lines: (args) => {
	if (!args.startLine \|\| typeof args.startLine !== "number") {
	return "Missing or invalid 'startLine' parameter";
	}
	if (args.startLine < 1) {
	return "'startLine' must be >= 1";
	}
	if (args.endLine && typeof args.endLine !== "number") {
	return "Invalid 'endLine' parameter";
	}
	return null;
	},
	plan_tasks: (args) => {
	if (!args.tasks \|\| !Array.isArray(args.tasks)) {
	return "Missing or invalid 'tasks' parameter. Must be an array.";
	}
	if (args.tasks.length === 0) {
	return "'tasks' array cannot be empty";
	}
	return null;
	},
	update_task: (args) => {
	if (typeof args.taskId !== "number") {
	return "Missing or invalid 'taskId' parameter";
	}
	if (
	!args.status \|\|
	!["pending", "in_progress", "completed"].includes(
	args.status as string,
	)
	) {
	return "Invalid 'status' parameter. Must be 'pending', 'in_progress', or 'completed'";
	}
	return null;
	},
	};

	const validator = toolValidations[call.name];
	if (validator) {
	const error = validator(call.args);
	if (error) {
	return { valid: false, error };
	}
	}

	return { valid: true };
	}

	async processMessage(
	message: string,
	messageHistory: BaseMessage[] = [],
	onStream?: (chunk: string) => void,
	messageId?: string,
	abortSignal?: AbortSignal,
	): Promise<string> {
	if (!this.client) {
	throw new Error("Agent not initialized");
	}

	const stream = await this.graph.stream(
	{
	messages: [...messageHistory, new HumanMessage(message)],
	hasToolCalls: false,
	},
	{
	streamMode: ["custom", "updates"] as const,
	metadata: { messageId, abortSignal },
	},
	);

	let fullResponse = "";

	for await (const chunk of stream) {
	if (abortSignal?.aborted) {
	throw new Error("AbortError");
	}

	if (Array.isArray(chunk)) {
	const [mode, data] = chunk;
	if (mode === "custom" && data?.type === "token") {
	fullResponse += data.content;
	onStream?.(data.content);
	}
	}
	}

	return fullResponse;
	}
	}