dtkne commited on
Commit
a6886bf
·
verified ·
1 Parent(s): a952e20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -2,25 +2,31 @@ import gradio as gr
2
  import os
3
  from transformers import pipeline
4
 
5
- # Load ASR (Speech-to-Text) pipeline
6
  asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
7
 
8
  # Load Summarization model
9
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
 
11
- # Function to transcribe and summarize
12
  def transcribe_and_summarize(audio_file):
13
  if audio_file is None:
14
  return "Error: No audio file provided.", ""
15
 
16
  try:
17
- # Transcribe audio
18
- transcription_result = asr(audio_file)
19
- transcribed_text = transcription_result['text']
20
-
21
- # Summarize the transcribed text
22
- summary_result = summarizer(transcribed_text, max_length=100, min_length=30, do_sample=False)
23
- summarized_text = summary_result[0]['summary_text']
 
 
 
 
 
 
24
 
25
  return transcribed_text, summarized_text
26
 
 
2
  import os
3
  from transformers import pipeline
4
 
5
+ # Load ASR (Speech-to-Text) pipeline with timestamp handling
6
  asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
7
 
8
  # Load Summarization model
9
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
 
11
+ # Function to transcribe and summarize audio
12
  def transcribe_and_summarize(audio_file):
13
  if audio_file is None:
14
  return "Error: No audio file provided.", ""
15
 
16
  try:
17
+ # Transcribe audio (handling long-form audio)
18
+ transcription_result = asr(audio_file, return_timestamps=True)
19
+
20
+ # Extract transcribed text
21
+ transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']])
22
+
23
+ # Ensure the transcribed text isn't too short for summarization
24
+ if len(transcribed_text.split()) < 50:
25
+ summarized_text = "Text too short to summarize."
26
+ else:
27
+ # Summarize the transcribed text
28
+ summary_result = summarizer(transcribed_text, max_length=100, min_length=30, do_sample=False)
29
+ summarized_text = summary_result[0]['summary_text']
30
 
31
  return transcribed_text, summarized_text
32