Spaces:
Sleeping
Sleeping
File size: 3,632 Bytes
92f0bd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import os
import gradio as gr
import fitz # PyMuPDF
from datetime import date
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
# β
1. Extract text from PDF
def extract_text_from_pdf(file_path):
doc = fitz.open(file_path)
text = ""
for page in doc:
text += page.get_text()
return text
# β
2. Chunk text
def chunk_text(text):
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=100
)
return splitter.split_text(text)
# β
3. Create embeddings and retriever
def create_retriever(chunks):
print("βοΈ Generating embeddings...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_texts(chunks, embedding=embeddings)
retriever = vectorstore.as_retriever()
return retriever
# β
4. Set up LLM chain (e.g., HuggingFaceHub, GROQ, etc.)
def create_chain(retriever):
llm = HuggingFaceHub(repo_id="google/flan-t5-base", model_kwargs={"temperature": 0.1, "max_length": 256})
chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
return chain
# π 5. Upload file β extract β chunk β embed
def upload_file(file):
try:
text = extract_text_from_pdf(file.name)
print("π Extracted text:", text[:300])
chunks = chunk_text(text)
print("β
Chunks created:", len(chunks))
return chunks
except Exception as e:
print("β Upload error:", e)
return f"Error while uploading file:\n{e}"
# π¬ 6. Query & Generate Answer
def generate_response(name, today, query, chunks, model_choice):
try:
print("π§βπ« Teacher:", name, "| π
Date:", today)
print("π Query:", query)
print("π¦ Chunks received:", len(chunks))
print("π€ Model selected:", model_choice)
retriever = create_retriever(chunks)
rag_chain = create_chain(retriever)
answer = rag_chain.run(query)
print("β
Answer:", answer)
formatted = f"π©βπ« Teacher: {name}\nπ
Date: {today}\n\nπ Question: {query}\n\nπ Answer:\n{answer}"
return formatted
except Exception as e:
print("β Error in response generation:", e)
return f"β Error:\n{e}"
# π§± 7. Gradio UI
with gr.Blocks(title="π Curriculum Assistant") as demo:
gr.Markdown("# π§ Curriculum Assistant\nUpload your curriculum PDF and ask questions from it!")
with gr.Row():
teacher_name = gr.Textbox(label="π©βπ« Teacher Name")
today_date = gr.Textbox(value=str(date.today()), label="π
Date")
with gr.Row():
file_input = gr.File(label="π Upload Curriculum PDF")
upload_btn = gr.Button("π₯ Extract")
chunks_state = gr.State([])
with gr.Row():
query_input = gr.Textbox(label="π‘ Enter your question")
model_selector = gr.Dropdown(choices=["flan-t5-base"], value="flan-t5-base", label="π€ Choose Model")
submit_button = gr.Button("π Generate Resource")
answer_output = gr.Textbox(label="π Output", lines=12)
# π Button connections
upload_btn.click(fn=upload_file, inputs=[file_input], outputs=[chunks_state])
submit_button.click(
fn=generate_response,
inputs=[teacher_name, today_date, query_input, chunks_state, model_selector],
outputs=[answer_output]
)
# π Launch the app
demo.launch()
|