Turbiling's picture
Update app.py
ef4baeb verified
import os
import gradio as gr
import fitz # PyMuPDF
from datetime import date
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
# βœ… 1. Extract text from PDF
def extract_text_from_pdf(file_path):
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
return text
# βœ… 2. Chunk text
def chunk_text(text):
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
return splitter.split_text(text)
# βœ… 3. Create embeddings and retriever
def create_retriever(chunks):
print("βš™οΈ Generating embeddings...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
retriever = vectorstore.as_retriever()
return retriever
# βœ… 4. Set up LLM chain (e.g., HuggingFaceHub, GROQ, etc.)
def create_chain(retriever):
llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1, "max_length": 256})
chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return chain
# πŸ”„ 5. Upload file β†’ extract β†’ chunk β†’ embed
def upload_file(file):
try:
text = extract_text_from_pdf(file.name)
print("πŸ“„ Extracted text:", text[:300])
chunks = chunk_text(text)
print("βœ… Chunks created:", len(chunks))
return chunks
except Exception as e:
print("❌ Upload error:", e)
return f"Error while uploading file:\n{e}"
# πŸ’¬ 6. Query & Generate Answer
def generate_response(name, today, query, chunks, model_choice):
try:
print("πŸ§‘β€πŸ« Teacher:", name, "| πŸ“… Date:", today)
print("πŸ” Query:", query)
print("πŸ“¦ Chunks received:", len(chunks))
print("πŸ€– Model selected:", model_choice)
retriever = create_retriever(chunks)
rag_chain = create_chain(retriever)
answer = rag_chain.run(query)
print("βœ… Answer:", answer)
formatted = f"πŸ‘©β€πŸ« Teacher: {name}\nπŸ“… Date: {today}\n\nπŸ“Œ Question: {query}\n\nπŸ“˜ Answer:\n{answer}"
return formatted
except Exception as e:
print("❌ Error in response generation:", e)
return f"❌ Error:\n{e}"
# 🧱 7. Gradio UI
with gr.Blocks(title="πŸ“š Curriculum Assistant") as demo:
gr.Markdown("# 🧠 Curriculum Assistant\nUpload your curriculum PDF and ask questions from it!")
with gr.Row():
teacher_name = gr.Textbox(label="πŸ‘©β€πŸ« Teacher Name")
today_date = gr.Textbox(value=str(date.today()), label="πŸ“… Date")
with gr.Row():
file_input = gr.File(label="πŸ“Ž Upload Curriculum PDF")
upload_btn = gr.Button("πŸ“₯ Extract")
chunks_state = gr.State([])
with gr.Row():
query_input = gr.Textbox(label="πŸ’‘ Enter your question")
model_selector = gr.Dropdown(choices=["flan-t5-base"], value="flan-t5-base", label="πŸ€– Choose Model")
submit_button = gr.Button("πŸš€ Generate Resource")
answer_output = gr.Textbox(label="πŸ“˜ Output", lines=12)
# πŸ” Button connections
upload_btn.click(fn=upload_file, inputs=[file_input], outputs=[chunks_state])
submit_button.click(
fn=generate_response,
inputs=[teacher_name, today_date, query_input, chunks_state, model_selector],
outputs=[answer_output]
)
# πŸš€ Launch the app
demo.launch()