Spaces:
Build error
Build error
import streamlit as st | |
import PyPDF2 | |
import pytesseract | |
from PIL import Image | |
import io | |
import faiss | |
import numpy as np | |
from transformers import AutoTokenizer, AutoModel | |
from docx import Document | |
from docx.shared import Inches | |
import torch | |
import os | |
from datetime import datetime | |
# Set page config | |
st.set_page_config(page_title="Curriculum Assistant", layout="wide") | |
# Initialize session state for FAISS index and chunks | |
if "faiss_index" not in st.session_state: | |
st.session_state.faiss_index = None | |
st.session_state.chunks = [] | |
st.session_state.embeddings = None | |
# Multilingual embedding model | |
MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
model = AutoModel.from_pretrained(MODEL_NAME) | |
# Helper functions | |
def extract_text_from_pdf(pdf_file): | |
try: | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() or "" | |
if not text.strip(): | |
st.warning("No text found in PDF. Attempting OCR...") | |
text = extract_text_with_ocr(pdf_file) | |
return text | |
except Exception as e: | |
st.error(f"Error extracting text: {str(e)} / متن نکالنے میں خرابی: {str(e)}") | |
return "" | |
def extract_text_with_ocr(pdf_file): | |
try: | |
pdf_reader = PyPDF2.PdfReader(pdf_file) | |
text = "" | |
for page in pdf_reader.pages: | |
img = page.images[0].image if page.images else None | |
if img: | |
text += pytesseract.image_to_string(Image.open(io.BytesIO(img.data)), lang="eng+urd") | |
return text | |
except Exception as e: | |
st.error(f"OCR failed: {str(e)} / OCR ناکام: {str(e)}") | |
return "" | |
def chunk_text(text, chunk_size=400, overlap=80): | |
words = text.split() | |
chunks = [] | |
for i in range(0, len(words), chunk_size - overlap): | |
chunk = " ".join(words[i:i + chunk_size]) | |
chunks.append(chunk) | |
return chunks | |
def create_embeddings(chunks): | |
embeddings = [] | |
for chunk in chunks: | |
inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
embedding = model(**inputs).last_hidden_state.mean(dim=1).numpy() | |
embeddings.append(embedding) | |
return np.vstack(embeddings) | |
def setup_faiss_index(embeddings): | |
dimension = embeddings.shape[1] | |
index = faiss.IndexFlatL2(dimension) | |
index.add(embeddings) | |
return index | |
def retrieve_relevant_chunks(query, index, chunks, k=3): | |
query_inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
query_embedding = model(**query_inputs).last_hidden_state.mean(dim=1).numpy() | |
distances, indices = index.search(query_embedding, k) | |
return [chunks[i] for i in indices[0]] | |
def generate_lesson_plan_boppps(grade, subject, topic, slo, duration, context): | |
doc = Document() | |
doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0) | |
doc.add_paragraph(f"SLO: {slo}") | |
doc.add_paragraph(f"Duration: {duration} minutes") | |
doc.add_heading("BOPPPS Model", level=1) | |
doc.add_heading("Bridge-in", level=2) | |
doc.add_paragraph(f"Engaging activity for {topic}: [Generated activity based on {context}]") | |
doc.add_heading("Outcome", level=2) | |
doc.add_paragraph(f"Objective: {slo}") | |
doc.add_heading("Pre-assessment", level=2) | |
doc.add_paragraph("Quick quiz or question to gauge prior knowledge.") | |
doc.add_heading("Participatory Learning", level=2) | |
doc.add_paragraph(f"Interactive activity: [Generated from {context}]") | |
doc.add_heading("Post-assessment", level=2) | |
doc.add_paragraph("Evaluate SLO achievement with a short task.") | |
doc.add_heading("Summary", level=2) | |
doc.add_paragraph("Recap key points of the lesson.") | |
return doc | |
def generate_lesson_plan_backward(grade, subject, topic, slo, duration, context): | |
doc = Document() | |
doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0) | |
doc.add_paragraph(f"SLO: {slo}") | |
doc.add_paragraph(f"Duration: {duration} minutes") | |
doc.add_heading("Backward Design", level=1) | |
doc.add_heading("Desired Results", level=2) | |
doc.add_paragraph(f"Goals: {slo}") | |
doc.add_heading("Acceptable Evidence", level=2) | |
doc.add_paragraph("Assessment criteria based on SLO.") | |
doc.add_heading("Learning Experiences", level=2) | |
doc.add_paragraph(f"Instructional strategies: [Generated from {context}]") | |
return doc | |
def generate_flashcards(grade, subject, topic, slo, context): | |
doc = Document() | |
doc.add_heading(f"Grade {grade} {subject} Flashcards: {topic}", 0) | |
doc.add_paragraph(f"SLO: {slo}") | |
table = doc.add_table(rows=6, cols=2) | |
table.style = "Table Grid" | |
table.cell(0, 0).text = "Front (Question)" | |
table.cell(0, 1).text = "Back (Answer)" | |
for i in range(1, 6): | |
table.cell(i, 0).text = f"Question {i} about {topic}?" | |
table.cell(i, 1).text = f"Answer {i} based on {context}." | |
return doc | |
def generate_worksheet(grade, subject, topic, slo, context): | |
doc = Document() | |
doc.add_heading(f"Grade {grade} {subject} Worksheet: {topic}", 0) | |
doc.add_paragraph(f"SLO: {slo}") | |
doc.add_heading("Instructions", level=1) | |
doc.add_paragraph("Complete the following questions.") | |
doc.add_heading("Multiple Choice", level=2) | |
for i in range(1, 4): | |
doc.add_paragraph(f"{i}. Sample MCQ about {topic}? a) Option1 b) Option2 c) Option3 d) Option4") | |
doc.add_heading("Short Answer", level=2) | |
for i in range(1, 3): | |
doc.add_paragraph(f"{i}. Short answer question about {topic}?") | |
doc.add_heading("Activity", level=2) | |
doc.add_paragraph(f"Activity based on {context}.") | |
return doc | |
def save_docx(doc, filename): | |
buffer = io.BytesIO() | |
doc.save(buffer) | |
buffer.seek(0) | |
return buffer | |
# Streamlit UI | |
st.title("Curriculum Assistant / نصابی اسسٹنٹ") | |
st.write("Upload a curriculum PDF and generate lesson plans, flashcards, or worksheets / نصابی پی ڈی ایف اپ لوڈ کریں اور سبق کے منصوبے، فلیش کارڈز، یا ورک شیٹس بنائیں") | |
st.info("BOPPPS is great for structured lessons; Backward Design focuses on learning goals / BOPPPS منظم اسباق کے لیے بہترین ہے؛ Backward Design سیکھنے کے اہداف پر مرکوز ہے") | |
# File uploader | |
uploaded_file = st.file_uploader("Upload Curriculum PDF / پی ڈی ایف اپ لوڈ کریں", type="pdf") | |
# Input form | |
with st.form("input_form"): | |
grade = st.selectbox("Grade / گریڈ", list(range(1, 13))) | |
subject = st.selectbox("Subject / مضمون", ["Math", "Science", "Social Studies", "English"]) | |
topic = st.text_input("Topic / موضوع", placeholder="e.g., Photosynthesis / مثلاً، فوٹوسنتھیسز") | |
slo = st.text_input("Specific SLO (optional) / مخصوص SLO (اختیاری)", placeholder="e.g., Understand cell structure / مثلاً، خلیے کی ساخت کو سمجھیں") | |
duration = st.selectbox("Lesson Duration (minutes) / سبق کا دورانیہ (منٹ)", [30, 45, 60]) | |
output_type = st.radio("Output Type / آؤٹ پٹ کی قسم", ["Lesson Plan (BOPPPS)", "Lesson Plan (Backward Design)", "Flashcards", "Worksheet"]) | |
submitted = st.form_submit_button("Generate / بنائیں") | |
# Process PDF and generate output | |
if submitted and uploaded_file: | |
with st.spinner("Processing PDF / پی ڈی ایف پر عمل ہو رہا ہے..."): | |
# Extract and chunk text | |
text = extract_text_from_pdf(uploaded_file) | |
if text: | |
st.session_state.chunks = chunk_text(text) | |
st.session_state.embeddings = create_embeddings(st.session_state.chunks) | |
st.session_state.faiss_index = setup_faiss_index(st.session_state.embeddings) | |
# Retrieve relevant context | |
query = f"Grade {grade} {subject} {topic} {slo}" | |
relevant_chunks = retrieve_relevant_chunks(query, st.session_state.faiss_index, st.session_state.chunks) | |
context = " ".join(relevant_chunks) | |
# Generate output | |
if output_type == "Lesson Plan (BOPPPS)": | |
doc = generate_lesson_plan_boppps(grade, subject, topic, slo or "General SLO", duration, context) | |
filename = f"Grade_{grade}_{subject}_BOPPPS_Lesson_Plan.docx" | |
elif output_type == "Lesson Plan (Backward Design)": | |
doc = generate_lesson_plan_backward(grade, subject, topic, slo or "General SLO", duration, context) | |
filename = f"Grade_{grade}_{subject}_Backward_Design_Lesson_Plan.docx" | |
elif output_type == "Flashcards": | |
doc = generate_flashcards(grade, subject, topic, slo or "General SLO", context) | |
filename = f"Grade_{grade}_{subject}_Flashcards.docx" | |
else: | |
doc = generate_worksheet(grade, subject, topic, slo or "General SLO", context) | |
filename = f"Grade_{grade}_{subject}_Worksheet.docx" | |
# Preview and download | |
st.write("**Preview / پیش منظر**:") | |
for paragraph in doc.paragraphs: | |
st.write(paragraph.text) | |
for table in doc.tables: | |
for row in table.rows: | |
st.write(" | ".join(cell.text for cell in row.cells)) | |
buffer = save_docx(doc, filename) | |
st.download_button( | |
label="Download as Word / ورڈ کے طور پر ڈاؤن لوڈ کریں", | |
data=buffer, | |
file_name=filename, | |
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
) | |
else: | |
st.error("No text extracted. Please upload a valid PDF / کوئی متن نہیں نکالا گیا۔ براہ کرم ایک درست پی ڈی ایف اپ لوڈ کریں") | |
else: | |
if submitted: | |
st.error("Please upload a PDF file / براہ کرم پی ڈی ایف فائل اپ لوڈ کریں") | |
# Feedback | |
st.text_area("Feedback (optional) / رائے (اختیاری)", placeholder="Report issues or suggestions / مسائل یا تجاویز کی اطلاع دیں") |