shivam rai
Initial commit - AI Chatbot
1d16be5
import os
import faiss
import numpy as np
import re
import nltk
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
from nltk.tokenize import sent_tokenize
import gradio as gr
# Download necessary NLTK data
nltk.download("punkt")
nltk.download('all')
# Load transcript file
TRANSCRIPT_FILE = "transcript.txt" # Upload this file manually or use an existing one
# Read and clean transcript
def clean_text(text):
text = re.sub(r"\s+", " ", text)
text = text.replace("\n", " ")
return text.strip()
with open(TRANSCRIPT_FILE, "r", encoding="utf-8") as f:
transcript_text = f.read()
cleaned_text = clean_text(transcript_text)
# Tokenize into sentences
sentences = sent_tokenize(cleaned_text)
# Split into chunks
chunk_size = 500
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < chunk_size:
current_chunk += " " + sentence
else:
chunks.append(current_chunk.strip())
current_chunk = sentence
if current_chunk:
chunks.append(current_chunk.strip())
# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
# Encode chunks
query_embeddings = np.array([embedding_model.encode(chunk) for chunk in chunks])
chunk_map = {i: chunks[i] for i in range(len(chunks))}
# Save to FAISS index
dimension = query_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(query_embeddings)
# Configure Google Generative AI
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Use environment variable for security
model = genai.GenerativeModel("gemini-1.5-pro-latest")
# Function to search transcript
def search_transcript(query, top_k=3):
query_embedding = embedding_model.encode(query).astype("float32").reshape(1, -1)
distances, indices = index.search(query_embedding, top_k)
return " ".join([chunk_map[i] for i in indices[0]])
# Function to generate AI response
def generate_response(query):
relevant_text = search_transcript(query)
prompt = f"""
You are an AI tutor. Answer the following question based on the given lecture transcript:
Lecture Context: {relevant_text}
Question: {query}
"""
response = model.generate_content(prompt)
return response.text
# Gradio Interface
def chatbot(query):
return generate_response(query) if query.lower() != "exit" else "Goodbye!"
iface = gr.Interface(
fn=chatbot,
inputs=gr.Textbox(placeholder="Ask anything about the lecture..."),
outputs="text",
title="Dhamm AI Chatbot",
description="Ask questions about any topic and get AI-generated answers!"
)
iface.launch()