thiruvasagmbot / app.py
PraneshJs's picture
added new model for embeddings
9d59494 verified
import os
import gradio as gr
from openai import AzureOpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
# Load PDF (Tiruvāsagam)
loader = PyPDFLoader("tiru.pdf")
docs = loader.load()
# Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_documents(docs)
# Local embedding model (Tamil capable)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
# Store in Chroma
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k":3})
# Azure OpenAI client
client = AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_API_KEY").strip(),
api_version="2025-01-01-preview",
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT").strip()
)
# Chat function
def chat_fn(message, history):
docs = retriever.get_relevant_documents(message)
context = "\n\n".join([d.page_content for d in docs])
completion = client.chat.completions.create(
model="gpt-4.1", # your Azure deployment name
messages=[
{
"role": "system",
"content": (
"You are a helpful assistant answering only from Tiruvāsagam. "
"Always reply in Tamil with simple, clear, and correct grammar. "
"Be token efficient. "
"If the question is not related to Tiruvāsagam, Lord Shiva, or "
"Manikkavasagar, just reply: 'எனக்கு தெரியாது'."
)
},
{
"role": "user",
"content": f"Context:\n{context}\n\nQuestion: {message}"
}
],
temperature=0.8,
max_tokens=500
)
return completion.choices[0].message.content
# Gradio UI
chatbot = gr.ChatInterface(
fn=chat_fn,
title="திருவாசகம் RAG Chatbot",
description="திருவாசகத்தை அடிப்படையாகக் கொண்டு கேள்விகளை கேளுங்கள் (Tamil/English supported)."
)
if __name__ == "__main__":
chatbot.launch(server_name="0.0.0.0", server_port=7860, debug=True)