Spaces:

shivamkrai
/

dhamm-ai-chatbot

Sleeping

dhamm-ai-chatbot / app.py

shivam rai

Initial commit - AI Chatbot

1d16be5 6 months ago

2.65 kB

	import os
	import faiss
	import numpy as np
	import re
	import nltk
	import google.generativeai as genai
	from sentence_transformers import SentenceTransformer
	from nltk.tokenize import sent_tokenize
	import gradio as gr

	# Download necessary NLTK data
	nltk.download("punkt")
	nltk.download('all')

	# Load transcript file
	TRANSCRIPT_FILE = "transcript.txt" # Upload this file manually or use an existing one

	# Read and clean transcript
	def clean_text(text):
	text = re.sub(r"\s+", " ", text)
	text = text.replace("\n", " ")
	return text.strip()

	with open(TRANSCRIPT_FILE, "r", encoding="utf-8") as f:
	transcript_text = f.read()

	cleaned_text = clean_text(transcript_text)

	# Tokenize into sentences
	sentences = sent_tokenize(cleaned_text)

	# Split into chunks
	chunk_size = 500
	chunks = []
	current_chunk = ""

	for sentence in sentences:
	if len(current_chunk) + len(sentence) < chunk_size:
	current_chunk += " " + sentence
	else:
	chunks.append(current_chunk.strip())
	current_chunk = sentence

	if current_chunk:
	chunks.append(current_chunk.strip())

	# Load embedding model
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

	# Encode chunks
	query_embeddings = np.array([embedding_model.encode(chunk) for chunk in chunks])
	chunk_map = {i: chunks[i] for i in range(len(chunks))}

	# Save to FAISS index
	dimension = query_embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(query_embeddings)

	# Configure Google Generative AI
	genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Use environment variable for security
	model = genai.GenerativeModel("gemini-1.5-pro-latest")

	# Function to search transcript
	def search_transcript(query, top_k=3):
	query_embedding = embedding_model.encode(query).astype("float32").reshape(1, -1)
	distances, indices = index.search(query_embedding, top_k)
	return " ".join([chunk_map[i] for i in indices[0]])

	# Function to generate AI response
	def generate_response(query):
	relevant_text = search_transcript(query)
	prompt = f"""
	You are an AI tutor. Answer the following question based on the given lecture transcript:

	Lecture Context: {relevant_text}

	Question: {query}
	"""
	response = model.generate_content(prompt)
	return response.text

	# Gradio Interface
	def chatbot(query):
	return generate_response(query) if query.lower() != "exit" else "Goodbye!"

	iface = gr.Interface(
	fn=chatbot,
	inputs=gr.Textbox(placeholder="Ask anything about the lecture..."),
	outputs="text",
	title="Dhamm AI Chatbot",
	description="Ask questions about any topic and get AI-generated answers!"
	)

	iface.launch()