Spaces:
Sleeping
Sleeping
import gradio as gr | |
from PyPDF2 import PdfReader | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
model_name = "ArtifactAI/led_large_16384_arxiv_summarization" | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def summarize_pdf_abstract(pdf_file): | |
try: | |
reader = PdfReader(pdf_file) | |
abstract_text = "" | |
for page in reader.pages: | |
if "Abstract" in page.extract_text() or "Introduction" in page.extract_text(): | |
abstract_text = page.extract_text() | |
break | |
inputs = tokenizer(abstract_text, return_tensors="pt") | |
outputs = model.generate(**inputs) | |
summary = tokenizer.decode(outputs[0]) | |
return {"summary": summary} | |
except Exception as e: | |
raise Exception(str(e)) | |
interface = gr.Interface( | |
fn=summarize_pdf_abstract, | |
inputs=[gr.File(label="Upload PDF")], | |
outputs=[gr.Textbox(label="Summary")] | |
) | |
interface.launch(share=True) | |