File size: 3,632 Bytes
92f0bd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import gradio as gr
import fitz  # PyMuPDF
from datetime import date
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub

# βœ… 1. Extract text from PDF
def extract_text_from_pdf(file_path):
    doc = fitz.open(file_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# βœ… 2. Chunk text
def chunk_text(text):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )
    return splitter.split_text(text)

# βœ… 3. Create embeddings and retriever
def create_retriever(chunks):
    print("βš™οΈ Generating embeddings...")
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
    retriever = vectorstore.as_retriever()
    return retriever

# βœ… 4. Set up LLM chain (e.g., HuggingFaceHub, GROQ, etc.)
def create_chain(retriever):
    llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1, "max_length": 256})
    chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return chain

# πŸ”„ 5. Upload file β†’ extract β†’ chunk β†’ embed
def upload_file(file):
    try:
        text = extract_text_from_pdf(file.name)
        print("πŸ“„ Extracted text:", text[:300])
        chunks = chunk_text(text)
        print("βœ… Chunks created:", len(chunks))
        return chunks
    except Exception as e:
        print("❌ Upload error:", e)
        return f"Error while uploading file:\n{e}"

# πŸ’¬ 6. Query & Generate Answer
def generate_response(name, today, query, chunks, model_choice):
    try:
        print("πŸ§‘β€πŸ« Teacher:", name, "| πŸ“… Date:", today)
        print("πŸ” Query:", query)
        print("πŸ“¦ Chunks received:", len(chunks))
        print("πŸ€– Model selected:", model_choice)

        retriever = create_retriever(chunks)
        rag_chain = create_chain(retriever)
        answer = rag_chain.run(query)

        print("βœ… Answer:", answer)
        formatted = f"πŸ‘©β€πŸ« Teacher: {name}\nπŸ“… Date: {today}\n\nπŸ“Œ Question: {query}\n\nπŸ“˜ Answer:\n{answer}"
        return formatted
    except Exception as e:
        print("❌ Error in response generation:", e)
        return f"❌ Error:\n{e}"

# 🧱 7. Gradio UI
with gr.Blocks(title="πŸ“š Curriculum Assistant") as demo:
    gr.Markdown("# 🧠 Curriculum Assistant\nUpload your curriculum PDF and ask questions from it!")

    with gr.Row():
        teacher_name = gr.Textbox(label="πŸ‘©β€πŸ« Teacher Name")
        today_date = gr.Textbox(value=str(date.today()), label="πŸ“… Date")

    with gr.Row():
        file_input = gr.File(label="πŸ“Ž Upload Curriculum PDF")
        upload_btn = gr.Button("πŸ“₯ Extract")

    chunks_state = gr.State([])

    with gr.Row():
        query_input = gr.Textbox(label="πŸ’‘ Enter your question")
        model_selector = gr.Dropdown(choices=["flan-t5-base"], value="flan-t5-base", label="πŸ€– Choose Model")
        submit_button = gr.Button("πŸš€ Generate Resource")

    answer_output = gr.Textbox(label="πŸ“˜ Output", lines=12)

    # πŸ” Button connections
    upload_btn.click(fn=upload_file, inputs=[file_input], outputs=[chunks_state])
    submit_button.click(
        fn=generate_response,
        inputs=[teacher_name, today_date, query_input, chunks_state, model_selector],
        outputs=[answer_output]
    )

# πŸš€ Launch the app
demo.launch()