Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import fitz # PyMuPDF | |
from datetime import date | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain.llms import HuggingFaceHub | |
# β 1. Extract text from PDF | |
def extract_text_from_pdf(file_path): | |
doc = fitz.open(file_path) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
# β 2. Chunk text | |
def chunk_text(text): | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, | |
chunk_overlap=100 | |
) | |
return splitter.split_text(text) | |
# β 3. Create embeddings and retriever | |
def create_retriever(chunks): | |
print("βοΈ Generating embeddings...") | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
vectorstore = FAISS.from_texts(chunks, embedding=embeddings) | |
retriever = vectorstore.as_retriever() | |
return retriever | |
# β 4. Set up LLM chain (e.g., HuggingFaceHub, GROQ, etc.) | |
def create_chain(retriever): | |
llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1, "max_length": 256}) | |
chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever) | |
return chain | |
# π 5. Upload file β extract β chunk β embed | |
def upload_file(file): | |
try: | |
text = extract_text_from_pdf(file.name) | |
print("π Extracted text:", text[:300]) | |
chunks = chunk_text(text) | |
print("β Chunks created:", len(chunks)) | |
return chunks | |
except Exception as e: | |
print("β Upload error:", e) | |
return f"Error while uploading file:\n{e}" | |
# π¬ 6. Query & Generate Answer | |
def generate_response(name, today, query, chunks, model_choice): | |
try: | |
print("π§βπ« Teacher:", name, "| π Date:", today) | |
print("π Query:", query) | |
print("π¦ Chunks received:", len(chunks)) | |
print("π€ Model selected:", model_choice) | |
retriever = create_retriever(chunks) | |
rag_chain = create_chain(retriever) | |
answer = rag_chain.run(query) | |
print("β Answer:", answer) | |
formatted = f"π©βπ« Teacher: {name}\nπ Date: {today}\n\nπ Question: {query}\n\nπ Answer:\n{answer}" | |
return formatted | |
except Exception as e: | |
print("β Error in response generation:", e) | |
return f"β Error:\n{e}" | |
# π§± 7. Gradio UI | |
with gr.Blocks(title="π Curriculum Assistant") as demo: | |
gr.Markdown("# π§ Curriculum Assistant\nUpload your curriculum PDF and ask questions from it!") | |
with gr.Row(): | |
teacher_name = gr.Textbox(label="π©βπ« Teacher Name") | |
today_date = gr.Textbox(value=str(date.today()), label="π Date") | |
with gr.Row(): | |
file_input = gr.File(label="π Upload Curriculum PDF") | |
upload_btn = gr.Button("π₯ Extract") | |
chunks_state = gr.State([]) | |
with gr.Row(): | |
query_input = gr.Textbox(label="π‘ Enter your question") | |
model_selector = gr.Dropdown(choices=["flan-t5-base"], value="flan-t5-base", label="π€ Choose Model") | |
submit_button = gr.Button("π Generate Resource") | |
answer_output = gr.Textbox(label="π Output", lines=12) | |
# π Button connections | |
upload_btn.click(fn=upload_file, inputs=[file_input], outputs=[chunks_state]) | |
submit_button.click( | |
fn=generate_response, | |
inputs=[teacher_name, today_date, query_input, chunks_state, model_selector], | |
outputs=[answer_output] | |
) | |
# π Launch the app | |
demo.launch() | |