|
!pip install pytubefix moviepy transformers gradio torch |
|
|
|
import os |
|
import torch |
|
from pytubefix import YouTube |
|
from moviepy.editor import VideoFileClip |
|
from transformers import pipeline |
|
|
|
|
|
url = "https://www.youtube.com/watch?v=VgxnyKnB3qc&ab" |
|
yt = YouTube(url) |
|
title = yt.title |
|
print(f"Downloading: {title}") |
|
|
|
video_stream = yt.streams.get_highest_resolution() |
|
video_path = f"/content/{title}.mp4" |
|
video_stream.download(filename=video_path) |
|
|
|
print(f"Video saved as: {video_path}") |
|
|
|
|
|
output_audio = f"/content/{title}.wav" |
|
|
|
video = VideoFileClip(video_path) |
|
video.audio.write_audiofile(output_audio) |
|
|
|
print(f"Audio extracted: {output_audio}") |
|
|
|
|
|
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") |
|
|
|
def transcribe_audio(audio_file): |
|
print("Transcribing audio...") |
|
transcription_result = asr(audio_file) |
|
transcribed_text = transcription_result["text"] |
|
return transcribed_text |
|
|
|
transcribed_text = transcribe_audio(output_audio) |
|
print("Transcription Complete:\n", transcribed_text[:500]) |
|
|
|
|
|
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
|
def summarize_text(text): |
|
if len(text.split()) < 50: |
|
return "Text too short to summarize." |
|
|
|
print("Summarizing text...") |
|
summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False) |
|
return summary_result[0]['summary_text'] |
|
|
|
summarized_text = summarize_text(transcribed_text) |
|
print("\nSummary:\n", summarized_text) |
|
|
|
|
|
with open(f"/content/{title}_transcription.txt", "w") as f: |
|
f.write(transcribed_text) |
|
|
|
with open(f"/content/{title}_summary.txt", "w") as f: |
|
f.write(summarized_text) |
|
|
|
print("Transcription & Summary saved!") |
|
|