Spaces:
Runtime error
Runtime error
File size: 1,585 Bytes
a93b356 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# https://huggingface.co/spaces/flintrepa/audio_pdf_summary
import pdfproc
import gradio as gr
from transformers import pipeline, AutoProcessor, AutoModel
def pdf_summary(file_obj):
print(file_obj)
# Read pdf
text_per_page = pdfproc.read_pdf(file_obj)
# Extract Abstract from pdf
i = text_per_page['Page_0'][0].index('Abstract\n')
abstract = text_per_page['Page_0'][0][i+1]
abstract = abstract.replace('-\n', '').replace('\n', ' ')
print(f"\nABSTRACT\n-----\n{abstract}\n-----\n")
# Summarize the abstract
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(abstract, min_length=1, max_length=20, do_sample=False)[0]["summary_text"]
print(f"\nSUMMARY\n-----\n{summary}\n-----\n")
# Text-to-Speech
processor = AutoProcessor.from_pretrained("suno/bark-small")
model = AutoModel.from_pretrained("suno/bark-small")
inputs = processor(
text=summary,
return_tensors="pt",
)
speech_values = model.generate(**inputs, do_sample=True)
sampling_rate = model.generation_config.sample_rate
print("\nAUDIO GENERATED\n")
return sampling_rate, speech_values.cpu().numpy().squeeze()
if __name__ == "__main__":
example_file_name = './hidden-debt-article.pdf'
iface = gr.Interface(
fn=pdf_summary,
inputs="file",
outputs="audio",
title="Audio PDF summary",
description="Upload a PDF file with an Abstract to get an audio summary of it",
examples=[example_file_name]
)
iface.launch() |