File size: 10,314 Bytes
3a84894
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import streamlit as st
import PyPDF2
import pytesseract
from PIL import Image
import io
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModel
from docx import Document
from docx.shared import Inches
import torch
import os
from datetime import datetime

# Set page config
st.set_page_config(page_title="Curriculum Assistant", layout="wide")

# Initialize session state for FAISS index and chunks
if "faiss_index" not in st.session_state:
    st.session_state.faiss_index = None
    st.session_state.chunks = []
    st.session_state.embeddings = None

# Multilingual embedding model
MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModel.from_pretrained(MODEL_NAME)

# Helper functions
def extract_text_from_pdf(pdf_file):
    try:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() or ""
        if not text.strip():
            st.warning("No text found in PDF. Attempting OCR...")
            text = extract_text_with_ocr(pdf_file)
        return text
    except Exception as e:
        st.error(f"Error extracting text: {str(e)} / متن نکالنے میں خرابی: {str(e)}")
        return ""

def extract_text_with_ocr(pdf_file):
    try:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page in pdf_reader.pages:
            img = page.images[0].image if page.images else None
            if img:
                text += pytesseract.image_to_string(Image.open(io.BytesIO(img.data)), lang="eng+urd")
        return text
    except Exception as e:
        st.error(f"OCR failed: {str(e)} / OCR ناکام: {str(e)}")
        return ""

def chunk_text(text, chunk_size=400, overlap=80):
    words = text.split()
    chunks = []
    for i in range(0, len(words), chunk_size - overlap):
        chunk = " ".join(words[i:i + chunk_size])
        chunks.append(chunk)
    return chunks

def create_embeddings(chunks):
    embeddings = []
    for chunk in chunks:
        inputs = tokenizer(chunk, return_tensors="pt", truncation=True, padding=True)
        with torch.no_grad():
            embedding = model(**inputs).last_hidden_state.mean(dim=1).numpy()
        embeddings.append(embedding)
    return np.vstack(embeddings)

def setup_faiss_index(embeddings):
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index

def retrieve_relevant_chunks(query, index, chunks, k=3):
    query_inputs = tokenizer(query, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        query_embedding = model(**query_inputs).last_hidden_state.mean(dim=1).numpy()
    distances, indices = index.search(query_embedding, k)
    return [chunks[i] for i in indices[0]]

def generate_lesson_plan_boppps(grade, subject, topic, slo, duration, context):
    doc = Document()
    doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0)
    doc.add_paragraph(f"SLO: {slo}")
    doc.add_paragraph(f"Duration: {duration} minutes")
    doc.add_heading("BOPPPS Model", level=1)
    doc.add_heading("Bridge-in", level=2)
    doc.add_paragraph(f"Engaging activity for {topic}: [Generated activity based on {context}]")
    doc.add_heading("Outcome", level=2)
    doc.add_paragraph(f"Objective: {slo}")
    doc.add_heading("Pre-assessment", level=2)
    doc.add_paragraph("Quick quiz or question to gauge prior knowledge.")
    doc.add_heading("Participatory Learning", level=2)
    doc.add_paragraph(f"Interactive activity: [Generated from {context}]")
    doc.add_heading("Post-assessment", level=2)
    doc.add_paragraph("Evaluate SLO achievement with a short task.")
    doc.add_heading("Summary", level=2)
    doc.add_paragraph("Recap key points of the lesson.")
    return doc

def generate_lesson_plan_backward(grade, subject, topic, slo, duration, context):
    doc = Document()
    doc.add_heading(f"Grade {grade} {subject} Lesson Plan: {topic}", 0)
    doc.add_paragraph(f"SLO: {slo}")
    doc.add_paragraph(f"Duration: {duration} minutes")
    doc.add_heading("Backward Design", level=1)
    doc.add_heading("Desired Results", level=2)
    doc.add_paragraph(f"Goals: {slo}")
    doc.add_heading("Acceptable Evidence", level=2)
    doc.add_paragraph("Assessment criteria based on SLO.")
    doc.add_heading("Learning Experiences", level=2)
    doc.add_paragraph(f"Instructional strategies: [Generated from {context}]")
    return doc

def generate_flashcards(grade, subject, topic, slo, context):
    doc = Document()
    doc.add_heading(f"Grade {grade} {subject} Flashcards: {topic}", 0)
    doc.add_paragraph(f"SLO: {slo}")
    table = doc.add_table(rows=6, cols=2)
    table.style = "Table Grid"
    table.cell(0, 0).text = "Front (Question)"
    table.cell(0, 1).text = "Back (Answer)"
    for i in range(1, 6):
        table.cell(i, 0).text = f"Question {i} about {topic}?"
        table.cell(i, 1).text = f"Answer {i} based on {context}."
    return doc

def generate_worksheet(grade, subject, topic, slo, context):
    doc = Document()
    doc.add_heading(f"Grade {grade} {subject} Worksheet: {topic}", 0)
    doc.add_paragraph(f"SLO: {slo}")
    doc.add_heading("Instructions", level=1)
    doc.add_paragraph("Complete the following questions.")
    doc.add_heading("Multiple Choice", level=2)
    for i in range(1, 4):
        doc.add_paragraph(f"{i}. Sample MCQ about {topic}? a) Option1 b) Option2 c) Option3 d) Option4")
    doc.add_heading("Short Answer", level=2)
    for i in range(1, 3):
        doc.add_paragraph(f"{i}. Short answer question about {topic}?")
    doc.add_heading("Activity", level=2)
    doc.add_paragraph(f"Activity based on {context}.")
    return doc

def save_docx(doc, filename):
    buffer = io.BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return buffer

# Streamlit UI
st.title("Curriculum Assistant / نصابی اسسٹنٹ")
st.write("Upload a curriculum PDF and generate lesson plans, flashcards, or worksheets / نصابی پی ڈی ایف اپ لوڈ کریں اور سبق کے منصوبے، فلیش کارڈز، یا ورک شیٹس بنائیں")
st.info("BOPPPS is great for structured lessons; Backward Design focuses on learning goals / BOPPPS منظم اسباق کے لیے بہترین ہے؛ Backward Design سیکھنے کے اہداف پر مرکوز ہے")

# File uploader
uploaded_file = st.file_uploader("Upload Curriculum PDF / پی ڈی ایف اپ لوڈ کریں", type="pdf")

# Input form
with st.form("input_form"):
    grade = st.selectbox("Grade / گریڈ", list(range(1, 13)))
    subject = st.selectbox("Subject / مضمون", ["Math", "Science", "Social Studies", "English"])
    topic = st.text_input("Topic / موضوع", placeholder="e.g., Photosynthesis / مثلاً، فوٹوسنتھیسز")
    slo = st.text_input("Specific SLO (optional) / مخصوص SLO (اختیاری)", placeholder="e.g., Understand cell structure / مثلاً، خلیے کی ساخت کو سمجھیں")
    duration = st.selectbox("Lesson Duration (minutes) / سبق کا دورانیہ (منٹ)", [30, 45, 60])
    output_type = st.radio("Output Type / آؤٹ پٹ کی قسم", ["Lesson Plan (BOPPPS)", "Lesson Plan (Backward Design)", "Flashcards", "Worksheet"])
    submitted = st.form_submit_button("Generate / بنائیں")

# Process PDF and generate output
if submitted and uploaded_file:
    with st.spinner("Processing PDF / پی ڈی ایف پر عمل ہو رہا ہے..."):
        # Extract and chunk text
        text = extract_text_from_pdf(uploaded_file)
        if text:
            st.session_state.chunks = chunk_text(text)
            st.session_state.embeddings = create_embeddings(st.session_state.chunks)
            st.session_state.faiss_index = setup_faiss_index(st.session_state.embeddings)

            # Retrieve relevant context
            query = f"Grade {grade} {subject} {topic} {slo}"
            relevant_chunks = retrieve_relevant_chunks(query, st.session_state.faiss_index, st.session_state.chunks)
            context = " ".join(relevant_chunks)

            # Generate output
            if output_type == "Lesson Plan (BOPPPS)":
                doc = generate_lesson_plan_boppps(grade, subject, topic, slo or "General SLO", duration, context)
                filename = f"Grade_{grade}_{subject}_BOPPPS_Lesson_Plan.docx"
            elif output_type == "Lesson Plan (Backward Design)":
                doc = generate_lesson_plan_backward(grade, subject, topic, slo or "General SLO", duration, context)
                filename = f"Grade_{grade}_{subject}_Backward_Design_Lesson_Plan.docx"
            elif output_type == "Flashcards":
                doc = generate_flashcards(grade, subject, topic, slo or "General SLO", context)
                filename = f"Grade_{grade}_{subject}_Flashcards.docx"
            else:
                doc = generate_worksheet(grade, subject, topic, slo or "General SLO", context)
                filename = f"Grade_{grade}_{subject}_Worksheet.docx"

            # Preview and download
            st.write("**Preview / پیش منظر**:")
            for paragraph in doc.paragraphs:
                st.write(paragraph.text)
            for table in doc.tables:
                for row in table.rows:
                    st.write(" | ".join(cell.text for cell in row.cells))
            
            buffer = save_docx(doc, filename)
            st.download_button(
                label="Download as Word / ورڈ کے طور پر ڈاؤن لوڈ کریں",
                data=buffer,
                file_name=filename,
                mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
            )
        else:
            st.error("No text extracted. Please upload a valid PDF / کوئی متن نہیں نکالا گیا۔ براہ کرم ایک درست پی ڈی ایف اپ لوڈ کریں")
else:
    if submitted:
        st.error("Please upload a PDF file / براہ کرم پی ڈی ایف فائل اپ لوڈ کریں")

# Feedback
st.text_area("Feedback (optional) / رائے (اختیاری)", placeholder="Report issues or suggestions / مسائل یا تجاویز کی اطلاع دیں")