myspace134v / modules /rag /rag_chain.py
rdune71's picture
Optimize for Hugging Face Inference API with streaming support and RAG integration
03da349
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from modules.rag.vector_store import VectorStore
from modules.analyzer import client
import os
class RAGChain:
def __init__(self):
self.vector_store = VectorStore()
self.retriever = self.vector_store.vector_store.as_retriever(
search_type="similarity",
search_kwargs={"k": 5}
)
# Custom prompt template optimized for your model
self.prompt_template = """
You are an expert research assistant with access to relevant documents.
Use the following context to answer the question accurately.
If the context doesn't contain enough information, say so.
Always cite specific parts of the context in your response.
Context: {context}
Question: {question}
Answer:
"""
self.prompt = PromptTemplate(
template=self.prompt_template,
input_variables=["context", "question"]
)
def query(self, question):
"""Query the RAG system"""
try:
# Search for relevant documents
search_result = self.vector_store.search(question)
if search_result["status"] != "success":
return {"status": "error", "message": search_result["message"]}
# Format context
context = "\n\n".join([doc.page_content for doc in search_result["documents"]])
# Create enhanced prompt
enhanced_prompt = self.prompt.format(context=context, question=question)
# For streaming, we'll return the prompt for the analyzer to handle
return {
"status": "success",
"prompt": enhanced_prompt,
"context_docs": search_result["documents"]
}
except Exception as e:
return {"status": "error", "message": str(e)}