import os import gradio as gr import fitz # PyMuPDF from datetime import date from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub # ✅ 1. Extract text from PDF def extract_text_from_pdf(file_path): doc = fitz.open(file_path) text = "" for page in doc: text += page.get_text() return text # ✅ 2. Chunk text def chunk_text(text): splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100 ) return splitter.split_text(text) # ✅ 3. Create embeddings and retriever def create_retriever(chunks): print("⚙️ Generating embeddings...") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") vectorstore = FAISS.from_texts(chunks, embedding=embeddings) retriever = vectorstore.as_retriever() return retriever # ✅ 4. Set up LLM chain (e.g., HuggingFaceHub, GROQ, etc.) def create_chain(retriever): llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1, "max_length": 256}) chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) return chain # 🔄 5. Upload file → extract → chunk → embed def upload_file(file): try: text = extract_text_from_pdf(file.name) print("📄 Extracted text:", text[:300]) chunks = chunk_text(text) print("✅ Chunks created:", len(chunks)) return chunks except Exception as e: print("❌ Upload error:", e) return f"Error while uploading file:\n{e}" # 💬 6. Query & Generate Answer def generate_response(name, today, query, chunks, model_choice): try: print("🧑‍🏫 Teacher:", name, "| 📅 Date:", today) print("🔍 Query:", query) print("📦 Chunks received:", len(chunks)) print("🤖 Model selected:", model_choice) retriever = create_retriever(chunks) rag_chain = create_chain(retriever) answer = rag_chain.run(query) print("✅ Answer:", answer) formatted = f"👩‍🏫 Teacher: {name}\n📅 Date: {today}\n\n📌 Question: {query}\n\n📘 Answer:\n{answer}" return formatted except Exception as e: print("❌ Error in response generation:", e) return f"❌ Error:\n{e}" # 🧱 7. Gradio UI with gr.Blocks(title="📚 Curriculum Assistant") as demo: gr.Markdown("# 🧠 Curriculum Assistant\nUpload your curriculum PDF and ask questions from it!") with gr.Row(): teacher_name = gr.Textbox(label="👩‍🏫 Teacher Name") today_date = gr.Textbox(value=str(date.today()), label="📅 Date") with gr.Row(): file_input = gr.File(label="📎 Upload Curriculum PDF") upload_btn = gr.Button("📥 Extract") chunks_state = gr.State([]) with gr.Row(): query_input = gr.Textbox(label="💡 Enter your question") model_selector = gr.Dropdown(choices=["flan-t5-base"], value="flan-t5-base", label="🤖 Choose Model") submit_button = gr.Button("🚀 Generate Resource") answer_output = gr.Textbox(label="📘 Output", lines=12) # 🔁 Button connections upload_btn.click(fn=upload_file, inputs=[file_input], outputs=[chunks_state]) submit_button.click( fn=generate_response, inputs=[teacher_name, today_date, query_input, chunks_state, model_selector], outputs=[answer_output] ) # 🚀 Launch the app demo.launch()