dtkne commited on
Commit
b51711d
·
verified ·
1 Parent(s): a6886bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -43
app.py CHANGED
@@ -1,50 +1,62 @@
1
- import gradio as gr
 
2
  import os
 
 
 
3
  from transformers import pipeline
4
 
5
- # Load ASR (Speech-to-Text) pipeline with timestamp handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
7
 
8
- # Load Summarization model
 
 
 
 
 
 
 
 
 
9
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
 
11
- # Function to transcribe and summarize audio
12
- def transcribe_and_summarize(audio_file):
13
- if audio_file is None:
14
- return "Error: No audio file provided.", ""
15
-
16
- try:
17
- # Transcribe audio (handling long-form audio)
18
- transcription_result = asr(audio_file, return_timestamps=True)
19
-
20
- # Extract transcribed text
21
- transcribed_text = " ".join([segment['text'] for segment in transcription_result['chunks']])
22
-
23
- # Ensure the transcribed text isn't too short for summarization
24
- if len(transcribed_text.split()) < 50:
25
- summarized_text = "Text too short to summarize."
26
- else:
27
- # Summarize the transcribed text
28
- summary_result = summarizer(transcribed_text, max_length=100, min_length=30, do_sample=False)
29
- summarized_text = summary_result[0]['summary_text']
30
-
31
- return transcribed_text, summarized_text
32
-
33
- except Exception as e:
34
- return f"Error: {str(e)}", ""
35
-
36
- # Create Gradio interface
37
- iface = gr.Interface(
38
- fn=transcribe_and_summarize,
39
- inputs=gr.Audio(type="filepath"), # Accepts an audio file
40
- outputs=[
41
- gr.Textbox(label="Transcribed Text"),
42
- gr.Textbox(label="Summarized Text")
43
- ]
44
- )
45
-
46
- # Get port safely (default to 7860 if not set)
47
- port = int(os.environ.get('PORT1', 7860))
48
-
49
- # Launch Gradio app
50
- iface.launch(share=True, server_port=port)
 
1
+ !pip install pytubefix moviepy transformers gradio torch
2
+
3
  import os
4
+ import torch
5
+ from pytubefix import YouTube
6
+ from moviepy.editor import VideoFileClip
7
  from transformers import pipeline
8
 
9
+ # ---- STEP 1: Download YouTube Video ----
10
+ url = "https://www.youtube.com/watch?v=VgxnyKnB3qc&ab"
11
+ yt = YouTube(url)
12
+ title = yt.title
13
+ print(f"Downloading: {title}")
14
+
15
+ video_stream = yt.streams.get_highest_resolution()
16
+ video_path = f"/content/{title}.mp4"
17
+ video_stream.download(filename=video_path)
18
+
19
+ print(f"Video saved as: {video_path}")
20
+
21
+ # ---- STEP 2: Extract Audio from Video ----
22
+ output_audio = f"/content/{title}.wav"
23
+
24
+ video = VideoFileClip(video_path)
25
+ video.audio.write_audiofile(output_audio)
26
+
27
+ print(f"Audio extracted: {output_audio}")
28
+
29
+ # ---- STEP 3: Transcribe Audio ----
30
  asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
31
 
32
+ def transcribe_audio(audio_file):
33
+ print("Transcribing audio...")
34
+ transcription_result = asr(audio_file)
35
+ transcribed_text = transcription_result["text"]
36
+ return transcribed_text
37
+
38
+ transcribed_text = transcribe_audio(output_audio)
39
+ print("Transcription Complete:\n", transcribed_text[:500]) # Preview first 500 characters
40
+
41
+ # ---- STEP 4: Summarize Transcription ----
42
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
43
 
44
+ def summarize_text(text):
45
+ if len(text.split()) < 50:
46
+ return "Text too short to summarize."
47
+
48
+ print("Summarizing text...")
49
+ summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False)
50
+ return summary_result[0]['summary_text']
51
+
52
+ summarized_text = summarize_text(transcribed_text)
53
+ print("\nSummary:\n", summarized_text)
54
+
55
+ # ---- OPTIONAL: Save Results to File ----
56
+ with open(f"/content/{title}_transcription.txt", "w") as f:
57
+ f.write(transcribed_text)
58
+
59
+ with open(f"/content/{title}_summary.txt", "w") as f:
60
+ f.write(summarized_text)
61
+
62
+ print("Transcription & Summary saved!")