Update app.py
Browse files
app.py
CHANGED
@@ -2,25 +2,31 @@ import gradio as gr
|
|
2 |
import os
|
3 |
from transformers import pipeline
|
4 |
|
5 |
-
# Load ASR (Speech-to-Text) pipeline
|
6 |
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
|
7 |
|
8 |
# Load Summarization model
|
9 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
10 |
|
11 |
-
# Function to transcribe and summarize
|
12 |
def transcribe_and_summarize(audio_file):
|
13 |
if audio_file is None:
|
14 |
return "Error: No audio file provided.", ""
|
15 |
|
16 |
try:
|
17 |
-
# Transcribe audio
|
18 |
-
transcription_result = asr(audio_file)
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
return transcribed_text, summarized_text
|
26 |
|
|
|
2 |
import os
|
3 |
from transformers import pipeline
|
4 |
|
5 |
+
# Load ASR (Speech-to-Text) pipeline with timestamp handling
|
6 |
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
|
7 |
|
8 |
# Load Summarization model
|
9 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
10 |
|
11 |
+
# Function to transcribe and summarize audio
|
12 |
def transcribe_and_summarize(audio_file):
|
13 |
if audio_file is None:
|
14 |
return "Error: No audio file provided.", ""
|
15 |
|
16 |
try:
|
17 |
+
# Transcribe audio (handling long-form audio)
|
18 |
+
transcription_result = asr(audio_file, return_timestamps=True)
|
19 |
+
|
20 |
+
# Extract transcribed text
|
21 |
+
transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']])
|
22 |
+
|
23 |
+
# Ensure the transcribed text isn't too short for summarization
|
24 |
+
if len(transcribed_text.split()) < 50:
|
25 |
+
summarized_text = "Text too short to summarize."
|
26 |
+
else:
|
27 |
+
# Summarize the transcribed text
|
28 |
+
summary_result = summarizer(transcribed_text, max_length=100, min_length=30, do_sample=False)
|
29 |
+
summarized_text = summary_result[0]['summary_text']
|
30 |
|
31 |
return transcribed_text, summarized_text
|
32 |
|