Spaces:

karthikmn
/

audio.py

Build error

App Files Files Community

audio.py / app.py

karthikmn

Update app.py

7a1c4b9 verified 3 months ago

raw

history blame contribute delete

4.41 kB

	import gradio as gr
	import os
	import tempfile
	import speech_recognition as sr
	import nltk
	from nltk.stem import PorterStemmer
	from nltk.tokenize import word_tokenize
	from moviepy.editor import VideoFileClip
	from pytesseract import image_to_string
	from PIL import Image
	import cv2
	from transformers import pipeline
	import concurrent.futures

	# Downloads
	nltk.download('punkt')
	nltk.download('averaged_perceptron_tagger')

	# Use faster summarization model
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

	# Gradio interface
	def extract_audio(video_path):
	video = VideoFileClip(video_path)
	audio_path = "extracted_audio.wav"
	video.audio.write_audiofile(audio_path, verbose=False, logger=None)
	return audio_path

	def transcribe_audio(audio_path):
	recognizer = sr.Recognizer()
	with sr.AudioFile(audio_path) as source:
	audio = recognizer.record(source, duration=30) # limit to 30s
	return recognizer.recognize_google(audio)

	def extract_keywords(text):
	tokens = word_tokenize(text)
	pos_tags = nltk.pos_tag(tokens)
	stemmer = PorterStemmer()
	return list(set(f"{stemmer.stem(w.lower())} ({t})" for w, t in pos_tags if t.startswith("NN") or t.startswith("VB")))

	def summarize_text(text, ratio="short"):
	max_len, min_len = (100, 30) if ratio == "short" else (150, 50) if ratio == "medium" else (250, 80)
	if len(text.split()) < min_len:
	return "Transcript is too short to summarize."
	chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
	summary = ""
	for chunk in chunks:
	sum_out = summarizer(chunk, max_length=max_len, min_length=min_len, do_sample=False)
	summary += sum_out[0]['summary_text'] + " "
	return summary.strip()

	def extract_slide_text(video_path):
	cap = cv2.VideoCapture(video_path)
	frame_count = 0
	ocr_texts = set()
	while cap.isOpened() and frame_count < 20:
	ret, frame = cap.read()
	if not ret:
	break
	if frame_count % 30 == 0:
	image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	text = image_to_string(image)
	if text.strip():
	ocr_texts.add(text.strip())
	frame_count += 1
	cap.release()
	return "\n\n".join(ocr_texts)

	def process_uploaded_file(uploaded_file):
	with tempfile.NamedTemporaryFile(delete=False, suffix=uploaded_file.name) as temp_file:
	temp_file.write(uploaded_file.read())
	file_path = temp_file.name

	audio_path = file_path
	slide_text = ""

	try:
	if file_path.lower().endswith((".mp4", ".mov", ".avi", ".mkv")):
	audio_path = extract_audio(file_path)

	with concurrent.futures.ThreadPoolExecutor() as executor:
	st.info("🚀 Running OCR and transcription in parallel...")
	ocr_future = executor.submit(extract_slide_text, file_path) if file_path.endswith((".mp4", ".mov", ".avi", ".mkv")) else None
	trans_future = executor.submit(transcribe_audio, audio_path)

	transcript = trans_future.result()
	slide_text = ocr_future.result() if ocr_future else ""

	return transcript, slide_text

	except Exception as e:
	return f"🚫 Error: {e}", ""

	finally:
	os.remove(file_path)
	if audio_path != file_path and os.path.exists(audio_path):
	os.remove(audio_path)

	# Gradio Interface for input and output
	def generate_notes(uploaded_file):
	transcript, slide_text = process_uploaded_file(uploaded_file)

	if slide_text:
	slide_text_display = f"🖼️ Slide/Whiteboard Extracted Text: \n{slide_text}"
	else:
	slide_text_display = "No slide/whiteboard text extracted."

	if len(transcript.split()) < 30:
	transcript_display = "Transcript too short for a meaningful summary."
	else:
	summary_mode = "short" # Default summary mode
	summary = summarize_text(transcript, ratio=summary_mode)
	transcript_display = f"📜 Full Transcription: \n{transcript}\n\n📋 Lecture Summary: \n{summary}"

	return slide_text_display, transcript_display


	# Gradio Interface
	inputs = gr.File(label="Upload Audio/Video File (Any Format)", type="file")
	outputs = [gr.Textbox(label="Slide Text"), gr.Textbox(label="Lecture Transcript and Summary")]

	gr.Interface(fn=generate_notes, inputs=inputs, outputs=outputs, live=True).launch()