import os import faiss import numpy as np import re import nltk import google.generativeai as genai from sentence_transformers import SentenceTransformer from nltk.tokenize import sent_tokenize import gradio as gr # Download necessary NLTK data nltk.download("punkt") nltk.download('all') # Load transcript file TRANSCRIPT_FILE = "transcript.txt" # Upload this file manually or use an existing one # Read and clean transcript def clean_text(text): text = re.sub(r"\s+", " ", text) text = text.replace("\n", " ") return text.strip() with open(TRANSCRIPT_FILE, "r", encoding="utf-8") as f: transcript_text = f.read() cleaned_text = clean_text(transcript_text) # Tokenize into sentences sentences = sent_tokenize(cleaned_text) # Split into chunks chunk_size = 500 chunks = [] current_chunk = "" for sentence in sentences: if len(current_chunk) + len(sentence) < chunk_size: current_chunk += " " + sentence else: chunks.append(current_chunk.strip()) current_chunk = sentence if current_chunk: chunks.append(current_chunk.strip()) # Load embedding model embedding_model = SentenceTransformer("all-MiniLM-L6-v2") # Encode chunks query_embeddings = np.array([embedding_model.encode(chunk) for chunk in chunks]) chunk_map = {i: chunks[i] for i in range(len(chunks))} # Save to FAISS index dimension = query_embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(query_embeddings) # Configure Google Generative AI genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Use environment variable for security model = genai.GenerativeModel("gemini-1.5-pro-latest") # Function to search transcript def search_transcript(query, top_k=3): query_embedding = embedding_model.encode(query).astype("float32").reshape(1, -1) distances, indices = index.search(query_embedding, top_k) return " ".join([chunk_map[i] for i in indices[0]]) # Function to generate AI response def generate_response(query): relevant_text = search_transcript(query) prompt = f""" You are an AI tutor. Answer the following question based on the given lecture transcript: Lecture Context: {relevant_text} Question: {query} """ response = model.generate_content(prompt) return response.text # Gradio Interface def chatbot(query): return generate_response(query) if query.lower() != "exit" else "Goodbye!" iface = gr.Interface( fn=chatbot, inputs=gr.Textbox(placeholder="Ask anything about the lecture..."), outputs="text", title="Dhamm AI Chatbot", description="Ask questions about any topic and get AI-generated answers!" ) iface.launch()