Spaces:
Runtime error
Runtime error
# https://huggingface.co/spaces/flintrepa/audio_pdf_summary | |
import pdfproc | |
import gradio as gr | |
from transformers import pipeline, AutoProcessor, AutoModel | |
def pdf_summary(file_obj): | |
print(file_obj) | |
# Read pdf | |
text_per_page = pdfproc.read_pdf(file_obj) | |
# Extract Abstract from pdf | |
i = text_per_page['Page_0'][0].index('Abstract\n') | |
abstract = text_per_page['Page_0'][0][i+1] | |
abstract = abstract.replace('-\n', '').replace('\n', ' ') | |
print(f"\nABSTRACT\n-----\n{abstract}\n-----\n") | |
# Summarize the abstract | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(abstract, min_length=1, max_length=20, do_sample=False)[0]["summary_text"] | |
print(f"\nSUMMARY\n-----\n{summary}\n-----\n") | |
# Text-to-Speech | |
processor = AutoProcessor.from_pretrained("suno/bark-small") | |
model = AutoModel.from_pretrained("suno/bark-small") | |
inputs = processor( | |
text=summary, | |
return_tensors="pt", | |
) | |
speech_values = model.generate(**inputs, do_sample=True) | |
sampling_rate = model.generation_config.sample_rate | |
print("\nAUDIO GENERATED\n") | |
return sampling_rate, speech_values.cpu().numpy().squeeze() | |
if __name__ == "__main__": | |
example_file_name = './hidden-debt-article.pdf' | |
iface = gr.Interface( | |
fn=pdf_summary, | |
inputs="file", | |
outputs="audio", | |
title="Audio PDF summary", | |
description="Upload a PDF file with an Abstract to get an audio summary of it", | |
examples=[example_file_name] | |
) | |
iface.launch() |