|
import gradio as gr |
|
import pdfplumber |
|
from transformers import pipeline |
|
|
|
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad") |
|
|
|
def extract_text_from_pdf(file): |
|
with pdfplumber.open(file.name) as pdf: |
|
return "\n".join(page.extract_text() or '' for page in pdf.pages) |
|
|
|
def answer_question(file, question): |
|
if file.name.endswith('.pdf'): |
|
context = extract_text_from_pdf(file) |
|
elif file.name.endswith('.txt'): |
|
context = file.read().decode("utf-8") |
|
else: |
|
return "Unsupported file format. Please upload a PDF or .txt file." |
|
|
|
if not question.strip(): |
|
return "Please enter a question." |
|
|
|
result = qa_pipeline(question=question, context=context) |
|
return result["answer"] |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## π Document Question Answering with DistilBERT") |
|
with gr.Row(): |
|
file_input = gr.File(label="Upload a PDF or TXT file") |
|
question_input = gr.Textbox(label="Ask a question", placeholder="e.g. What is the main topic?") |
|
answer_output = gr.Textbox(label="Answer", placeholder="The answer will appear here.") |
|
|
|
submit_btn = gr.Button("Get Answer") |
|
|
|
submit_btn.click(fn=answer_question, inputs=[file_input, question_input], outputs=answer_output) |
|
|
|
demo.launch() |