Spaces:

flintrepa
/

audio_pdf_summary

Runtime error

audio_pdf_summary / app.py

Add app files

a93b356 over 1 year ago

1.59 kB

	# https://huggingface.co/spaces/flintrepa/audio_pdf_summary

	import pdfproc

	import gradio as gr
	from transformers import pipeline, AutoProcessor, AutoModel


	def pdf_summary(file_obj):
	print(file_obj)
	# Read pdf
	text_per_page = pdfproc.read_pdf(file_obj)

	# Extract Abstract from pdf
	i = text_per_page['Page_0'][0].index('Abstract\n')
	abstract = text_per_page['Page_0'][0][i+1]
	abstract = abstract.replace('-\n', '').replace('\n', ' ')
	print(f"\nABSTRACT\n-----\n{abstract}\n-----\n")

	# Summarize the abstract
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	summary = summarizer(abstract, min_length=1, max_length=20, do_sample=False)[0]["summary_text"]
	print(f"\nSUMMARY\n-----\n{summary}\n-----\n")

	# Text-to-Speech
	processor = AutoProcessor.from_pretrained("suno/bark-small")
	model = AutoModel.from_pretrained("suno/bark-small")
	inputs = processor(
	text=summary,
	return_tensors="pt",
	)
	speech_values = model.generate(**inputs, do_sample=True)
	sampling_rate = model.generation_config.sample_rate
	print("\nAUDIO GENERATED\n")

	return sampling_rate, speech_values.cpu().numpy().squeeze()


	if __name__ == "__main__":

	example_file_name = './hidden-debt-article.pdf'
	iface = gr.Interface(
	fn=pdf_summary,
	inputs="file",
	outputs="audio",
	title="Audio PDF summary",
	description="Upload a PDF file with an Abstract to get an audio summary of it",
	examples=[example_file_name]
	)
	iface.launch()