Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from openai import AzureOpenAI | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain_community.vectorstores import Chroma | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
# Load PDF (Tiruvāsagam) | |
loader = PyPDFLoader("tiru.pdf") | |
docs = loader.load() | |
# Split into chunks | |
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
chunks = splitter.split_documents(docs) | |
# Local embedding model (Tamil capable) | |
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
# Store in Chroma | |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model) | |
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3}) | |
# Azure OpenAI client | |
client = AzureOpenAI( | |
api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(), | |
api_version="2025-01-01-preview", | |
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip() | |
) | |
# Chat function | |
def chat_fn(message, history): | |
docs = retriever.get_relevant_documents(message) | |
context = "\n\n".join([d.page_content for d in docs]) | |
completion = client.chat.completions.create( | |
model="gpt-4.1", # your Azure deployment name | |
messages=[ | |
{ | |
"role": "system", | |
"content": ( | |
"You are a helpful assistant answering only from Tiruvāsagam. " | |
"Always reply in Tamil with simple, clear, and correct grammar. " | |
"Be token efficient. " | |
"If the question is not related to Tiruvāsagam, Lord Shiva, or " | |
"Manikkavasagar, just reply: 'எனக்கு தெரியாது'." | |
) | |
}, | |
{ | |
"role": "user", | |
"content": f"Context:\n{context}\n\nQuestion: {message}" | |
} | |
], | |
temperature=0.8, | |
max_tokens=500 | |
) | |
return completion.choices[0].message.content | |
# Gradio UI | |
chatbot = gr.ChatInterface( | |
fn=chat_fn, | |
title="திருவாசகம் RAG Chatbot", | |
description="திருவாசகத்தை அடிப்படையாகக் கொண்டு கேள்விகளை கேளுங்கள் (Tamil/English supported)." | |
) | |
if __name__ == "__main__": | |
chatbot.launch(server_name="0.0.0.0", server_port=7860, debug=True) | |