Spaces:
Build error
Build error
File size: 10,314 Bytes
3a84894 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
import streamlit as st
import PyPDF2
import pytesseract
from PIL import Image
import io
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
from docx import Document
from docx.shared import Inches
import torch
import os
from datetime import datetime
# Set page config
st.set_page_config(page_title="Curriculum Assistant", layout="wide")
# Initialize session state for FAISS index and chunks
if "faiss_index" not in st.session_state:
st.session_state.faiss_index = None
st.session_state.chunks = []
st.session_state.embeddings = None
# Multilingual embedding model
MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)
# Helper functions
def extract_text_from_pdf(pdf_file):
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() or ""
if not text.strip():
st.warning("No text found in PDF. Attempting OCR...")
text = extract_text_with_ocr(pdf_file)
return text
except Exception as e:
st.error(f"Error extracting text: {str(e)} / متن نکالنے میں خرابی: {str(e)}")
return ""
def extract_text_with_ocr(pdf_file):
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
img = page.images[0].image if page.images else None
if img:
text += pytesseract.image_to_string(Image.open(io.BytesIO(img.data)), lang="eng+urd")
return text
except Exception as e:
st.error(f"OCR failed: {str(e)} / OCR ناکام: {str(e)}")
return ""
def chunk_text(text, chunk_size=400, overlap=80):
words = text.split()
chunks = []
for i in range(0, len(words), chunk_size - overlap):
chunk = " ".join(words[i:i + chunk_size])
chunks.append(chunk)
return chunks
def create_embeddings(chunks):
embeddings = []
for chunk in chunks:
inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
embedding = model(**inputs).last_hidden_state.mean(dim=1).numpy()
embeddings.append(embedding)
return np.vstack(embeddings)
def setup_faiss_index(embeddings):
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
return index
def retrieve_relevant_chunks(query, index, chunks, k=3):
query_inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True)
with torch.no_grad():
query_embedding = model(**query_inputs).last_hidden_state.mean(dim=1).numpy()
distances, indices = index.search(query_embedding, k)
return [chunks[i] for i in indices[0]]
def generate_lesson_plan_boppps(grade, subject, topic, slo, duration, context):
doc = Document()
doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0)
doc.add_paragraph(f"SLO: {slo}")
doc.add_paragraph(f"Duration: {duration} minutes")
doc.add_heading("BOPPPS Model", level=1)
doc.add_heading("Bridge-in", level=2)
doc.add_paragraph(f"Engaging activity for {topic}: [Generated activity based on {context}]")
doc.add_heading("Outcome", level=2)
doc.add_paragraph(f"Objective: {slo}")
doc.add_heading("Pre-assessment", level=2)
doc.add_paragraph("Quick quiz or question to gauge prior knowledge.")
doc.add_heading("Participatory Learning", level=2)
doc.add_paragraph(f"Interactive activity: [Generated from {context}]")
doc.add_heading("Post-assessment", level=2)
doc.add_paragraph("Evaluate SLO achievement with a short task.")
doc.add_heading("Summary", level=2)
doc.add_paragraph("Recap key points of the lesson.")
return doc
def generate_lesson_plan_backward(grade, subject, topic, slo, duration, context):
doc = Document()
doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0)
doc.add_paragraph(f"SLO: {slo}")
doc.add_paragraph(f"Duration: {duration} minutes")
doc.add_heading("Backward Design", level=1)
doc.add_heading("Desired Results", level=2)
doc.add_paragraph(f"Goals: {slo}")
doc.add_heading("Acceptable Evidence", level=2)
doc.add_paragraph("Assessment criteria based on SLO.")
doc.add_heading("Learning Experiences", level=2)
doc.add_paragraph(f"Instructional strategies: [Generated from {context}]")
return doc
def generate_flashcards(grade, subject, topic, slo, context):
doc = Document()
doc.add_heading(f"Grade {grade} {subject} Flashcards: {topic}", 0)
doc.add_paragraph(f"SLO: {slo}")
table = doc.add_table(rows=6, cols=2)
table.style = "Table Grid"
table.cell(0, 0).text = "Front (Question)"
table.cell(0, 1).text = "Back (Answer)"
for i in range(1, 6):
table.cell(i, 0).text = f"Question {i} about {topic}?"
table.cell(i, 1).text = f"Answer {i} based on {context}."
return doc
def generate_worksheet(grade, subject, topic, slo, context):
doc = Document()
doc.add_heading(f"Grade {grade} {subject} Worksheet: {topic}", 0)
doc.add_paragraph(f"SLO: {slo}")
doc.add_heading("Instructions", level=1)
doc.add_paragraph("Complete the following questions.")
doc.add_heading("Multiple Choice", level=2)
for i in range(1, 4):
doc.add_paragraph(f"{i}. Sample MCQ about {topic}? a) Option1 b) Option2 c) Option3 d) Option4")
doc.add_heading("Short Answer", level=2)
for i in range(1, 3):
doc.add_paragraph(f"{i}. Short answer question about {topic}?")
doc.add_heading("Activity", level=2)
doc.add_paragraph(f"Activity based on {context}.")
return doc
def save_docx(doc, filename):
buffer = io.BytesIO()
doc.save(buffer)
buffer.seek(0)
return buffer
# Streamlit UI
st.title("Curriculum Assistant / نصابی اسسٹنٹ")
st.write("Upload a curriculum PDF and generate lesson plans, flashcards, or worksheets / نصابی پی ڈی ایف اپ لوڈ کریں اور سبق کے منصوبے، فلیش کارڈز، یا ورک شیٹس بنائیں")
st.info("BOPPPS is great for structured lessons; Backward Design focuses on learning goals / BOPPPS منظم اسباق کے لیے بہترین ہے؛ Backward Design سیکھنے کے اہداف پر مرکوز ہے")
# File uploader
uploaded_file = st.file_uploader("Upload Curriculum PDF / پی ڈی ایف اپ لوڈ کریں", type="pdf")
# Input form
with st.form("input_form"):
grade = st.selectbox("Grade / گریڈ", list(range(1, 13)))
subject = st.selectbox("Subject / مضمون", ["Math", "Science", "Social Studies", "English"])
topic = st.text_input("Topic / موضوع", placeholder="e.g., Photosynthesis / مثلاً، فوٹوسنتھیسز")
slo = st.text_input("Specific SLO (optional) / مخصوص SLO (اختیاری)", placeholder="e.g., Understand cell structure / مثلاً، خلیے کی ساخت کو سمجھیں")
duration = st.selectbox("Lesson Duration (minutes) / سبق کا دورانیہ (منٹ)", [30, 45, 60])
output_type = st.radio("Output Type / آؤٹ پٹ کی قسم", ["Lesson Plan (BOPPPS)", "Lesson Plan (Backward Design)", "Flashcards", "Worksheet"])
submitted = st.form_submit_button("Generate / بنائیں")
# Process PDF and generate output
if submitted and uploaded_file:
with st.spinner("Processing PDF / پی ڈی ایف پر عمل ہو رہا ہے..."):
# Extract and chunk text
text = extract_text_from_pdf(uploaded_file)
if text:
st.session_state.chunks = chunk_text(text)
st.session_state.embeddings = create_embeddings(st.session_state.chunks)
st.session_state.faiss_index = setup_faiss_index(st.session_state.embeddings)
# Retrieve relevant context
query = f"Grade {grade} {subject} {topic} {slo}"
relevant_chunks = retrieve_relevant_chunks(query, st.session_state.faiss_index, st.session_state.chunks)
context = " ".join(relevant_chunks)
# Generate output
if output_type == "Lesson Plan (BOPPPS)":
doc = generate_lesson_plan_boppps(grade, subject, topic, slo or "General SLO", duration, context)
filename = f"Grade_{grade}_{subject}_BOPPPS_Lesson_Plan.docx"
elif output_type == "Lesson Plan (Backward Design)":
doc = generate_lesson_plan_backward(grade, subject, topic, slo or "General SLO", duration, context)
filename = f"Grade_{grade}_{subject}_Backward_Design_Lesson_Plan.docx"
elif output_type == "Flashcards":
doc = generate_flashcards(grade, subject, topic, slo or "General SLO", context)
filename = f"Grade_{grade}_{subject}_Flashcards.docx"
else:
doc = generate_worksheet(grade, subject, topic, slo or "General SLO", context)
filename = f"Grade_{grade}_{subject}_Worksheet.docx"
# Preview and download
st.write("**Preview / پیش منظر**:")
for paragraph in doc.paragraphs:
st.write(paragraph.text)
for table in doc.tables:
for row in table.rows:
st.write(" | ".join(cell.text for cell in row.cells))
buffer = save_docx(doc, filename)
st.download_button(
label="Download as Word / ورڈ کے طور پر ڈاؤن لوڈ کریں",
data=buffer,
file_name=filename,
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
else:
st.error("No text extracted. Please upload a valid PDF / کوئی متن نہیں نکالا گیا۔ براہ کرم ایک درست پی ڈی ایف اپ لوڈ کریں")
else:
if submitted:
st.error("Please upload a PDF file / براہ کرم پی ڈی ایف فائل اپ لوڈ کریں")
# Feedback
st.text_area("Feedback (optional) / رائے (اختیاری)", placeholder="Report issues or suggestions / مسائل یا تجاویز کی اطلاع دیں") |