project1 / app.py
dtkne's picture
Update app.py
b51711d verified
raw
history blame
1.93 kB
!pip install pytubefix moviepy transformers gradio torch
import os
import torch
from pytubefix import YouTube
from moviepy.editor import VideoFileClip
from transformers import pipeline
# ---- STEP 1: Download YouTube Video ----
url = "https://www.youtube.com/watch?v=VgxnyKnB3qc&ab"
yt = YouTube(url)
title = yt.title
print(f"Downloading: {title}")
video_stream = yt.streams.get_highest_resolution()
video_path = f"/content/{title}.mp4"
video_stream.download(filename=video_path)
print(f"Video saved as: {video_path}")
# ---- STEP 2: Extract Audio from Video ----
output_audio = f"/content/{title}.wav"
video = VideoFileClip(video_path)
video.audio.write_audiofile(output_audio)
print(f"Audio extracted: {output_audio}")
# ---- STEP 3: Transcribe Audio ----
asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en")
def transcribe_audio(audio_file):
print("Transcribing audio...")
transcription_result = asr(audio_file)
transcribed_text = transcription_result["text"]
return transcribed_text
transcribed_text = transcribe_audio(output_audio)
print("Transcription Complete:\n", transcribed_text[:500]) # Preview first 500 characters
# ---- STEP 4: Summarize Transcription ----
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def summarize_text(text):
if len(text.split()) < 50:
return "Text too short to summarize."
print("Summarizing text...")
summary_result = summarizer(text, max_length=100, min_length=30, do_sample=False)
return summary_result[0]['summary_text']
summarized_text = summarize_text(transcribed_text)
print("\nSummary:\n", summarized_text)
# ---- OPTIONAL: Save Results to File ----
with open(f"/content/{title}_transcription.txt", "w") as f:
f.write(transcribed_text)
with open(f"/content/{title}_summary.txt", "w") as f:
f.write(summarized_text)
print("Transcription & Summary saved!")