import streamlit as st import PyPDF2 import pytesseract from PIL import Image import io import faiss import numpy as np from transformers import AutoTokenizer, AutoModel from docx import Document from docx.shared import Inches import torch import os from datetime import datetime # Set page config st.set_page_config(page_title="Curriculum Assistant", layout="wide") # Initialize session state for FAISS index and chunks if "faiss_index" not in st.session_state: st.session_state.faiss_index = None st.session_state.chunks = [] st.session_state.embeddings = None # Multilingual embedding model MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModel.from_pretrained(MODEL_NAME) # Helper functions def extract_text_from_pdf(pdf_file): try: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() or "" if not text.strip(): st.warning("No text found in PDF. Attempting OCR...") text = extract_text_with_ocr(pdf_file) return text except Exception as e: st.error(f"Error extracting text: {str(e)} / متن نکالنے میں خرابی: {str(e)}") return "" def extract_text_with_ocr(pdf_file): try: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page in pdf_reader.pages: img = page.images[0].image if page.images else None if img: text += pytesseract.image_to_string(Image.open(io.BytesIO(img.data)), lang="eng+urd") return text except Exception as e: st.error(f"OCR failed: {str(e)} / OCR ناکام: {str(e)}") return "" def chunk_text(text, chunk_size=400, overlap=80): words = text.split() chunks = [] for i in range(0, len(words), chunk_size - overlap): chunk = " ".join(words[i:i + chunk_size]) chunks.append(chunk) return chunks def create_embeddings(chunks): embeddings = [] for chunk in chunks: inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): embedding = model(**inputs).last_hidden_state.mean(dim=1).numpy() embeddings.append(embedding) return np.vstack(embeddings) def setup_faiss_index(embeddings): dimension = embeddings.shape[1] index = faiss.IndexFlatL2(dimension) index.add(embeddings) return index def retrieve_relevant_chunks(query, index, chunks, k=3): query_inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): query_embedding = model(**query_inputs).last_hidden_state.mean(dim=1).numpy() distances, indices = index.search(query_embedding, k) return [chunks[i] for i in indices[0]] def generate_lesson_plan_boppps(grade, subject, topic, slo, duration, context): doc = Document() doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0) doc.add_paragraph(f"SLO: {slo}") doc.add_paragraph(f"Duration: {duration} minutes") doc.add_heading("BOPPPS Model", level=1) doc.add_heading("Bridge-in", level=2) doc.add_paragraph(f"Engaging activity for {topic}: [Generated activity based on {context}]") doc.add_heading("Outcome", level=2) doc.add_paragraph(f"Objective: {slo}") doc.add_heading("Pre-assessment", level=2) doc.add_paragraph("Quick quiz or question to gauge prior knowledge.") doc.add_heading("Participatory Learning", level=2) doc.add_paragraph(f"Interactive activity: [Generated from {context}]") doc.add_heading("Post-assessment", level=2) doc.add_paragraph("Evaluate SLO achievement with a short task.") doc.add_heading("Summary", level=2) doc.add_paragraph("Recap key points of the lesson.") return doc def generate_lesson_plan_backward(grade, subject, topic, slo, duration, context): doc = Document() doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0) doc.add_paragraph(f"SLO: {slo}") doc.add_paragraph(f"Duration: {duration} minutes") doc.add_heading("Backward Design", level=1) doc.add_heading("Desired Results", level=2) doc.add_paragraph(f"Goals: {slo}") doc.add_heading("Acceptable Evidence", level=2) doc.add_paragraph("Assessment criteria based on SLO.") doc.add_heading("Learning Experiences", level=2) doc.add_paragraph(f"Instructional strategies: [Generated from {context}]") return doc def generate_flashcards(grade, subject, topic, slo, context): doc = Document() doc.add_heading(f"Grade {grade} {subject} Flashcards: {topic}", 0) doc.add_paragraph(f"SLO: {slo}") table = doc.add_table(rows=6, cols=2) table.style = "Table Grid" table.cell(0, 0).text = "Front (Question)" table.cell(0, 1).text = "Back (Answer)" for i in range(1, 6): table.cell(i, 0).text = f"Question {i} about {topic}?" table.cell(i, 1).text = f"Answer {i} based on {context}." return doc def generate_worksheet(grade, subject, topic, slo, context): doc = Document() doc.add_heading(f"Grade {grade} {subject} Worksheet: {topic}", 0) doc.add_paragraph(f"SLO: {slo}") doc.add_heading("Instructions", level=1) doc.add_paragraph("Complete the following questions.") doc.add_heading("Multiple Choice", level=2) for i in range(1, 4): doc.add_paragraph(f"{i}. Sample MCQ about {topic}? a) Option1 b) Option2 c) Option3 d) Option4") doc.add_heading("Short Answer", level=2) for i in range(1, 3): doc.add_paragraph(f"{i}. Short answer question about {topic}?") doc.add_heading("Activity", level=2) doc.add_paragraph(f"Activity based on {context}.") return doc def save_docx(doc, filename): buffer = io.BytesIO() doc.save(buffer) buffer.seek(0) return buffer # Streamlit UI st.title("Curriculum Assistant / نصابی اسسٹنٹ") st.write("Upload a curriculum PDF and generate lesson plans, flashcards, or worksheets / نصابی پی ڈی ایف اپ لوڈ کریں اور سبق کے منصوبے، فلیش کارڈز، یا ورک شیٹس بنائیں") st.info("BOPPPS is great for structured lessons; Backward Design focuses on learning goals / BOPPPS منظم اسباق کے لیے بہترین ہے؛ Backward Design سیکھنے کے اہداف پر مرکوز ہے") # File uploader uploaded_file = st.file_uploader("Upload Curriculum PDF / پی ڈی ایف اپ لوڈ کریں", type="pdf") # Input form with st.form("input_form"): grade = st.selectbox("Grade / گریڈ", list(range(1, 13))) subject = st.selectbox("Subject / مضمون", ["Math", "Science", "Social Studies", "English"]) topic = st.text_input("Topic / موضوع", placeholder="e.g., Photosynthesis / مثلاً، فوٹوسنتھیسز") slo = st.text_input("Specific SLO (optional) / مخصوص SLO (اختیاری)", placeholder="e.g., Understand cell structure / مثلاً، خلیے کی ساخت کو سمجھیں") duration = st.selectbox("Lesson Duration (minutes) / سبق کا دورانیہ (منٹ)", [30, 45, 60]) output_type = st.radio("Output Type / آؤٹ پٹ کی قسم", ["Lesson Plan (BOPPPS)", "Lesson Plan (Backward Design)", "Flashcards", "Worksheet"]) submitted = st.form_submit_button("Generate / بنائیں") # Process PDF and generate output if submitted and uploaded_file: with st.spinner("Processing PDF / پی ڈی ایف پر عمل ہو رہا ہے..."): # Extract and chunk text text = extract_text_from_pdf(uploaded_file) if text: st.session_state.chunks = chunk_text(text) st.session_state.embeddings = create_embeddings(st.session_state.chunks) st.session_state.faiss_index = setup_faiss_index(st.session_state.embeddings) # Retrieve relevant context query = f"Grade {grade} {subject} {topic} {slo}" relevant_chunks = retrieve_relevant_chunks(query, st.session_state.faiss_index, st.session_state.chunks) context = " ".join(relevant_chunks) # Generate output if output_type == "Lesson Plan (BOPPPS)": doc = generate_lesson_plan_boppps(grade, subject, topic, slo or "General SLO", duration, context) filename = f"Grade_{grade}_{subject}_BOPPPS_Lesson_Plan.docx" elif output_type == "Lesson Plan (Backward Design)": doc = generate_lesson_plan_backward(grade, subject, topic, slo or "General SLO", duration, context) filename = f"Grade_{grade}_{subject}_Backward_Design_Lesson_Plan.docx" elif output_type == "Flashcards": doc = generate_flashcards(grade, subject, topic, slo or "General SLO", context) filename = f"Grade_{grade}_{subject}_Flashcards.docx" else: doc = generate_worksheet(grade, subject, topic, slo or "General SLO", context) filename = f"Grade_{grade}_{subject}_Worksheet.docx" # Preview and download st.write("**Preview / پیش منظر**:") for paragraph in doc.paragraphs: st.write(paragraph.text) for table in doc.tables: for row in table.rows: st.write(" | ".join(cell.text for cell in row.cells)) buffer = save_docx(doc, filename) st.download_button( label="Download as Word / ورڈ کے طور پر ڈاؤن لوڈ کریں", data=buffer, file_name=filename, mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" ) else: st.error("No text extracted. Please upload a valid PDF / کوئی متن نہیں نکالا گیا۔ براہ کرم ایک درست پی ڈی ایف اپ لوڈ کریں") else: if submitted: st.error("Please upload a PDF file / براہ کرم پی ڈی ایف فائل اپ لوڈ کریں") # Feedback st.text_area("Feedback (optional) / رائے (اختیاری)", placeholder="Report issues or suggestions / مسائل یا تجاویز کی اطلاع دیں")