import gradio as gr from PyPDF2 import PdfReader from transformers import AutoModelForSeq2SeqLM, AutoTokenizer model_name = "ArtifactAI/led_large_16384_arxiv_summarization" model = AutoModelForSeq2SeqLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) def summarize_pdf_abstract(pdf_file): try: reader = PdfReader(pdf_file) abstract_text = "" for page in reader.pages: if "Abstract" in page.extract_text() or "Introduction" in page.extract_text(): abstract_text = page.extract_text() break inputs = tokenizer(abstract_text, return_tensors="pt") outputs = model.generate(**inputs) summary = tokenizer.decode(outputs[0]) return {"summary": summary} except Exception as e: raise Exception(str(e)) interface = gr.Interface( fn=summarize_pdf_abstract, inputs=[gr.File(label="Upload PDF")], outputs=[gr.Textbox(label="Summary")] ) interface.launch(share=True)