import json from fastapi import APIRouter, HTTPException from fastapi.responses import StreamingResponse from core.llm import LLMClient from core.memory import save_user_state, load_user_state from core.session import session_manager import logging router = APIRouter() logger = logging.getLogger(__name__) llm_client = LLMClient(provider="ollama") # Default to Ollama @router.post("/chat") async def chat(user_id: str, message: str): if not message: raise HTTPException(status_code=400, detail="Message is required") try: # Use session manager for better session handling session = session_manager.get_session(user_id) conversation_history = session.get("conversation", []) # Add user message to history conversation_history.append({"role": "user", "content": message}) # Generate AI response full_response = "" response_stream = llm_client.generate(prompt=message, stream=True) # Collect streamed response for chunk in response_stream: full_response += chunk # Save updated conversation using session manager conversation_history.append({"role": "assistant", "content": full_response}) session_manager.update_session(user_id, {"conversation": conversation_history}) logger.info(f"Successfully processed chat for user {user_id}") return {"response": full_response} except Exception as e: logger.error(f"LLM generation failed for user {user_id}: {e}") raise HTTPException(status_code=500, detail=f"LLM generation failed: {e}")