Final_Assignment_Template / tools /youtube_tools.py
loadedcheese's picture
Folders' Initial Submission
c66530c verified
raw
history blame
14.6 kB
# tools/youtube_tools.py (Updated with fixes)
"""
YouTube Tools Module - Fixed version using pytubefix
Addresses network issues, deprecation warnings, and playlist errors
"""
from pytubefix import YouTube, Playlist
from pytubefix.cli import on_progress
from typing import Optional, Dict, Any, List
import os
import time
import logging
from .utils import logger, validate_file_exists
class YouTubeTools:
"""YouTube tools with improved error handling and network resilience"""
def __init__(self, max_retries: int = 3, retry_delay: float = 1.0):
self.supported_formats = ['mp4', '3gp', 'webm']
self.supported_audio_formats = ['mp3', 'mp4', 'webm']
self.max_retries = max_retries
self.retry_delay = retry_delay
def _retry_operation(self, operation, *args, **kwargs):
"""Retry operation with exponential backoff for network issues"""
for attempt in range(self.max_retries):
try:
return operation(*args, **kwargs)
except Exception as e:
if attempt == self.max_retries - 1:
raise e
error_msg = str(e).lower()
if any(term in error_msg for term in ['network', 'socket', 'timeout', 'connection']):
wait_time = self.retry_delay * (2 ** attempt)
logger.warning(f"Network error (attempt {attempt + 1}/{self.max_retries}): {e}")
logger.info(f"Retrying in {wait_time} seconds...")
time.sleep(wait_time)
else:
raise e
def get_video_info(self, url: str) -> Optional[Dict[str, Any]]:
"""
Retrieve comprehensive metadata about a YouTube video using pytubefix
"""
try:
def _get_info():
yt = YouTube(url, on_progress_callback=on_progress)
# Get available streams info with better error handling
video_streams = []
try:
streams = yt.streams.filter(progressive=True, file_extension='mp4')
for stream in streams:
try:
video_streams.append({
'resolution': getattr(stream, 'resolution', 'unknown'),
'fps': getattr(stream, 'fps', 'unknown'),
'video_codec': getattr(stream, 'video_codec', 'unknown'),
'audio_codec': getattr(stream, 'audio_codec', 'unknown'),
'filesize': getattr(stream, 'filesize', None),
'mime_type': getattr(stream, 'mime_type', 'unknown')
})
except Exception as stream_error:
logger.debug(f"Error processing stream: {stream_error}")
continue
except Exception as e:
logger.warning(f"Could not retrieve stream details: {e}")
# Get caption languages safely
captions_available = []
try:
if yt.captions:
captions_available = list(yt.captions.keys())
except Exception as e:
logger.warning(f"Could not retrieve captions list: {e}")
info = {
'title': getattr(yt, 'title', 'Unknown'),
'author': getattr(yt, 'author', 'Unknown'),
'channel_url': getattr(yt, 'channel_url', 'Unknown'),
'length': getattr(yt, 'length', 0),
'views': getattr(yt, 'views', 0),
'description': getattr(yt, 'description', ''),
'thumbnail_url': getattr(yt, 'thumbnail_url', ''),
'publish_date': yt.publish_date.isoformat() if getattr(yt, 'publish_date', None) else None,
'keywords': getattr(yt, 'keywords', []),
'video_id': getattr(yt, 'video_id', ''),
'watch_url': getattr(yt, 'watch_url', url),
'available_streams': video_streams,
'captions_available': captions_available
}
return info
info = self._retry_operation(_get_info)
if info is not None:
logger.info(f"Retrieved info for video: {info.get('title', 'Unknown')}")
return info
except Exception as e:
logger.error(f"Failed to get video info for {url}: {e}")
return None
def download_video(self, url: str, output_path: str = './downloads',
resolution: str = 'highest', filename: Optional[str] = None) -> Optional[str]:
"""Download a YouTube video with retry logic"""
try:
def _download():
os.makedirs(output_path, exist_ok=True)
yt = YouTube(url, on_progress_callback=on_progress)
# Select stream based on resolution preference
if resolution == 'highest':
stream = yt.streams.get_highest_resolution()
elif resolution == 'lowest':
stream = yt.streams.get_lowest_resolution()
else:
stream = yt.streams.filter(res=resolution, progressive=True, file_extension='mp4').first()
if not stream:
logger.warning(f"Resolution {resolution} not found, downloading highest instead")
stream = yt.streams.get_highest_resolution()
if not stream:
raise Exception("No suitable stream found for download")
# Download with custom filename if provided
if filename:
safe_filename = "".join(c for c in filename if c.isalnum() or c in (' ', '-', '_')).rstrip()
file_path = stream.download(output_path=output_path, filename=f"{safe_filename}.{stream.subtype}")
else:
file_path = stream.download(output_path=output_path)
return file_path
file_path = self._retry_operation(_download)
logger.info(f"Downloaded video to {file_path}")
return file_path
except Exception as e:
logger.error(f"Failed to download video from {url}: {e}")
return None
def download_audio(self, url: str, output_path: str = './downloads',
filename: Optional[str] = None) -> Optional[str]:
"""Download only audio from a YouTube video with retry logic"""
try:
def _download_audio():
os.makedirs(output_path, exist_ok=True)
yt = YouTube(url, on_progress_callback=on_progress)
audio_stream = yt.streams.get_audio_only()
if not audio_stream:
raise Exception("No audio stream found")
if filename:
safe_filename = "".join(c for c in filename if c.isalnum() or c in (' ', '-', '_')).rstrip()
file_path = audio_stream.download(output_path=output_path, filename=f"{safe_filename}.{audio_stream.subtype}")
else:
file_path = audio_stream.download(output_path=output_path)
return file_path
file_path = self._retry_operation(_download_audio)
logger.info(f"Downloaded audio to {file_path}")
return file_path
except Exception as e:
logger.error(f"Failed to download audio from {url}: {e}")
return None
def get_captions(self, url: str, language_code: str = 'en') -> Optional[str]:
"""
Get captions/subtitles - FIXED: No more deprecation warning
"""
try:
def _get_captions():
yt = YouTube(url, on_progress_callback=on_progress)
if not yt.captions:
logger.warning("No captions available for this video")
return None
# Use modern dictionary-style access instead of deprecated method
if language_code in yt.captions:
caption = yt.captions[language_code]
captions_text = caption.generate_srt_captions()
return captions_text
else:
available_langs = list(yt.captions.keys())
logger.warning(f"Captions not found for language {language_code}. Available: {available_langs}")
return None
result = self._retry_operation(_get_captions)
if result:
logger.info(f"Retrieved captions in {language_code}")
return result
except Exception as e:
logger.error(f"Failed to get captions from {url}: {e}")
return None
def get_playlist_info(self, playlist_url: str) -> Optional[Dict[str, Any]]:
"""
Get information about a YouTube playlist - FIXED: Better error handling
"""
try:
def _get_playlist_info():
playlist = Playlist(playlist_url)
# Get video URLs first (this triggers the playlist loading)
video_urls = list(playlist.video_urls)
# Safely access playlist properties with fallbacks
info = {
'video_count': len(video_urls),
'video_urls': video_urls[:10], # Limit to first 10 for performance
'total_videos': len(video_urls)
}
# Try to get additional info, but don't fail if unavailable
try:
info['title'] = getattr(playlist, 'title', 'Unknown Playlist')
except:
info['title'] = 'Private/Unavailable Playlist'
try:
info['description'] = getattr(playlist, 'description', '')
except:
info['description'] = 'Description unavailable'
try:
info['owner'] = getattr(playlist, 'owner', 'Unknown')
except:
info['owner'] = 'Owner unavailable'
return info
info = self._retry_operation(_get_playlist_info)
if info is not None:
logger.info(f"Retrieved playlist info: {info['title']} ({info['video_count']} videos)")
return info
except Exception as e:
logger.error(f"Failed to get playlist info from {playlist_url}: {e}")
return None
def get_available_qualities(self, url: str) -> Optional[List[Dict[str, Any]]]:
"""
Get all available download qualities - FIXED: Better network handling
"""
try:
def _get_qualities():
yt = YouTube(url, on_progress_callback=on_progress)
streams = []
# Get progressive streams (video + audio)
for stream in yt.streams.filter(progressive=True):
try:
streams.append({
'resolution': getattr(stream, 'resolution', 'unknown'),
'fps': getattr(stream, 'fps', 'unknown'),
'filesize_mb': round(stream.filesize / (1024 * 1024), 2) if getattr(stream, 'filesize', None) else None,
'mime_type': getattr(stream, 'mime_type', 'unknown'),
'video_codec': getattr(stream, 'video_codec', 'unknown'),
'audio_codec': getattr(stream, 'audio_codec', 'unknown')
})
except Exception as stream_error:
logger.debug(f"Error processing stream: {stream_error}")
continue
# Sort by resolution (numeric part)
def sort_key(x):
res = x['resolution']
if res and res != 'unknown' and res[:-1].isdigit():
return int(res[:-1])
return 0
return sorted(streams, key=sort_key, reverse=True)
return self._retry_operation(_get_qualities)
except Exception as e:
logger.error(f"Failed to get qualities for {url}: {e}")
return None
# Convenience functions (unchanged)
def get_video_info(url: str) -> Optional[Dict[str, Any]]:
"""Standalone function to get video information"""
tools = YouTubeTools()
return tools.get_video_info(url)
def download_video(url: str, output_path: str = './downloads',
resolution: str = 'highest', filename: Optional[str] = None) -> Optional[str]:
"""Standalone function to download a video"""
tools = YouTubeTools()
return tools.download_video(url, output_path, resolution, filename)
def download_audio(url: str, output_path: str = './downloads',
filename: Optional[str] = None) -> Optional[str]:
"""Standalone function to download audio only"""
tools = YouTubeTools()
return tools.download_audio(url, output_path, filename)
def get_captions(url: str, language_code: str = 'en') -> Optional[str]:
"""Standalone function to get video captions"""
tools = YouTubeTools()
return tools.get_captions(url, language_code)
def get_playlist_info(playlist_url: str) -> Optional[Dict[str, Any]]:
"""Standalone function to get playlist information"""
tools = YouTubeTools()
return tools.get_playlist_info(playlist_url)