# -*- coding: utf-8 -*- import os import sys # THIS IS THE FIX - PART 1 os.environ['GRADIO_SUPPRESS_PROGRESS'] = 'true' import cv2 import numpy as np import gradio as gr import shutil import subprocess from PIL import Image, ImageDraw, ImageFont, ImageOps from datetime import datetime from threading import Lock import base64 import json import io # --- Dependency Check --- try: from controlnet_aux import ( CannyDetector, MLSDdetector, HEDdetector, LineartDetector, OpenposeDetector, NormalBaeDetector ) from gradio_client import Client from rembg import remove import librosa except ImportError as e: print("="*80) print(f"ERROR: Missing dependency -> {e}") print("Please install all required packages by running:") print("pip install -r requirements.txt") print("="*80) sys.exit(1) # --- AI Model Dependency Check --- try: import whisper except ImportError: print("="*80) print("WARNING: 'openai-whisper' not installed. The Transcription tab will be disabled.") print("To enable it, run: pip install -U openai-whisper") print("="*80) whisper = None # --- Global Variables & Setup --- TEMP_DIR = "temp_gradio" os.makedirs(TEMP_DIR, exist_ok=True) model_load_lock = Lock() loaded_detectors = {} whisper_model = None # --- Default Presets for Transfer Tab (Flat Dictionary) --- DEFAULT_LINK_PRESETS = { # Virtual Try-On & Character "OutfitAnyone": "https://huggingface.co/spaces/HumanAIGC/OutfitAnyone", "Kolors Virtual Try-On": "https://huggingface.co/spaces/Kwai-Kolors/Kolors-Virtual-Try-On", "Miragic Virtual Try-On": "https://huggingface.co/spaces/Miragic-AI/Miragic-Virtual-Try-On", "OutfitAnyway": "https://huggingface.co/spaces/selfit-camera/OutfitAnyway", "IDM-VTON": "https://huggingface.co/spaces/yisol/IDM-VTON", "InstantCharacter": "https://huggingface.co/spaces/InstantX/InstantCharacter", "InstantID": "https://huggingface.co/spaces/InstantX/InstantID", # AI Lip-Sync & Talking Avatars "LivePortrait": "https://huggingface.co/spaces/Han-123/LivePortrait", "LivePortrait (CPU)": "https://huggingface.co/spaces/K00B404/LivePortrait_cpu", "D-ID Live Portrait AI": "https://www.d-id.com/liveportrait-4/", "Synthesia Avatars": "https://www.synthesia.io/features/avatars", "Papercup": "https://www.papercup.com/", "Hedra": "https://www.hedra.com", "LemonSlice": "https://lemonslice.com", "Vozo AI": "https://www.vozo.ai/lip-sync", "Gooey AI Lipsync": "https://gooey.ai/Lipsync", "Sync.so": "https://sync.so", "LipDub AI": "https://www.lipdub.ai", "Magic Hour": "https://magichour.ai", "Lifelike AI": "https://www.lifelikeai.io", "DeepMotion": "https://www.deepmotion.com", "Elai.io": "https://elai.io", "Rephrase.ai": "https://www.rephrase.ai", "Colossyan": "https://www.colossyan.com", "HeyGen (Movio)": "https://www.heygen.com", "Murf Studio": "https://murf.ai", # Image Editing & Upscaling "FLUX Fill/Outpaint": "https://huggingface.co/spaces/multimodalart/flux-fill-outpaint", "ReSize Image Outpainting": "https://huggingface.co/spaces/VIDraft/ReSize-Image-Outpainting", "IC-Light (Relighting)": "https://huggingface.co/spaces/lllyasviel/IC-Light", "Kontext Relight": "https://huggingface.co/spaces/kontext-community/kontext-relight", "SUPIR Upscaler": "https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR", # Video Generation & FramePacks "Framepacks (atunc29)": "https://huggingface.co/spaces/atunc29/Framepacks", "Framepack i2v (ginigen)": "https://huggingface.co/spaces/ginigen/framepack-i2v", "Framepack i2v (beowcow)": "https://huggingface.co/spaces/beowcow/framepack-i2v", "Framepack i2v (lisonallen)": "https://huggingface.co/spaces/lisonallen/framepack-i2v", "FramePack F1 (Latyrine)": "https://huggingface.co/spaces/Latyrine/FramePack-F1", "FramePack F1 (linoyts)": "https://huggingface.co/spaces/linoyts/FramePack-F1", "FramePack Rotate (bep40)": "https://huggingface.co/spaces/bep40/FramePack_rotate_landscape", "FramePack Rotate (VIDraft)": "https://huggingface.co/spaces/VIDraft/FramePack_rotate_landscape", "FramePack Rotate (tori29umai)": "https://huggingface.co/spaces/tori29umai/FramePack_rotate_landscape", "Framepack-H111 (rahul7star)": "https://huggingface.co/spaces/rahul7star/Framepack-H111", "FLUX.1 Kontext Dev": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-Kontext-Dev", "Wan2-1-fast": "https://huggingface.co/spaces/multimodalart/wan2-1-fast", "LTX-video-distilled": "https://huggingface.co/spaces/Lightricks/ltx-video-distilled", "RunwayML": "https://app.runwayml.com/video-tools/teams/rinaabdine1/ai-tools/generate", "Pika Labs": "https://pika.art/", "Kling AI": "https://app.klingai.com/global/image-to-video/frame-mode", # Video Interpolation & Slow Motion "RIFE (remzloev)": "https://huggingface.co/spaces/remzloev/Rife", "VFI Converter (Agung1453)": "https://huggingface.co/spaces/Agung1453/Video-Frame-Interpolation-Converter", "ZeroGPU Upscaler/Interpolation": "https://huggingface.co/spaces/inoculatemedia/zerogpu-upscaler-interpolation", "Frame Interpolation (meta-artem)": "https://huggingface.co/spaces/meta-artem/frame-interpolation", "Video Frame Interpolation (guardiancc)": "https://huggingface.co/spaces/guardiancc/video_frame_interpolation", "Video Frame Interpolation (freealise)": "https://huggingface.co/spaces/freealise/video_frame_interpolation", "Framer (wwen1997)": "https://huggingface.co/spaces/wwen1997/Framer", "Inter4k VideoInterpolator": "https://huggingface.co/spaces/vimleshc57/Inter4k_VideoInterpolator", # AnimateDiff & Advanced Animation "AnimateDiff Lightning (ByteDance)": "https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning", "AnimateDiff Lightning (SahaniJi)": "https://huggingface.co/spaces/SahaniJi/AnimateDiff-Lightning", "AnimateDiff (fatima14)": "https://huggingface.co/spaces/fatima14/AnimateDiff", "AnimateDiff Video Gen (faizanR)": "https://huggingface.co/spaces/faizanR/animatediff-video-generator", "Text-to-Animation Fast (MisterProton)": "https://huggingface.co/spaces/MisterProton/text-to-Animation-Fast-AnimateDiff", "Text-to-Animation Fast (Rowdy013)": "https://huggingface.co/spaces/Rowdy013/text-to-Animation-Fast", # StyleGAN & Portrait Motion "StyleGAN-Human Interpolation (hysts)": "https://huggingface.co/spaces/hysts/StyleGAN-Human-Interpolation", "StyleGAN-Human (Gradio-Blocks)": "https://huggingface.co/spaces/Gradio-Blocks/StyleGAN-Human", # Film & Style Models "MGM-Film-Diffusion (tonyassi)": "https://huggingface.co/spaces/tonyassi/MGM-Film-Diffusion", "CineDiffusion (takarajordan)": "https://huggingface.co/spaces/takarajordan/CineDiffusion", "FLUX Film Foto (MartsoBodziu1994)": "https://huggingface.co/spaces/MartsoBodziu1994/alvdansen-flux_film_foto", "FLUX Style Shaping": "https://huggingface.co/spaces/multimodalart/flux-style-shaping", "Film (Stijnijzelenberg)": "https://huggingface.co/spaces/Stijnijzelenberg/film", "Film Eras (abbiewoodbridge)": "https://huggingface.co/spaces/abbiewoodbridge/Film_Eras", "Film Genre Classifier (Rezuwan)": "https://huggingface.co/spaces/Rezuwan/film_genre_classifier", "RunwayML (Faizbulbul)": "https://huggingface.co/spaces/Faizbulbul/Runwaymlfaiz", # Text-to-3D "TRELLIS TextTo3D (PUM4CH3N)": "https://huggingface.co/spaces/PUM4CH3N/TRELLIS_TextTo3D", "TRELLIS TextTo3D (cavargas10)": "https://huggingface.co/spaces/cavargas10/TRELLIS-Texto3D", "TRELLIS TextTo3D (dkatz2391)": "https://huggingface.co/spaces/dkatz2391/TRELLIS_TextTo3D_Try2", "Sparc3D": "https://huggingface.co/spaces/ilcve21/Sparc3D", "Hunyuan3D-2.1": "https://huggingface.co/spaces/tencent/Hunyuan3D-2.1", # Image Captioning & Interrogation "BLIP-2 (hysts)": "https://huggingface.co/spaces/hysts/BLIP2", "BLIP-3o": "https://huggingface.co/spaces/BLIP3o/blip-3o", "Blip-Dalle3 (DarwinAnim8or)": "https://huggingface.co/spaces/DarwinAnim8or/Blip-Dalle3", "BLIP API (Jonu1)": "https://huggingface.co/spaces/Jonu1/blip-image-captioning-api", "BLIP API (muxiddin19)": "https://huggingface.co/spaces/muxiddin19/blip-image-captioning-api", # Diffusion & Sketching Tools "DiffSketcher (SVGRender)": "https://huggingface.co/spaces/SVGRender/DiffSketcher", "Diffusion WikiArt (kaupane)": "https://huggingface.co/spaces/kaupane/diffusion-wikiart", "Diffusers Image Fill (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-image-fill", "Diffusers Fast Inpaint (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-fast-inpaint", # Miscellaneous Tools "EBSynth (NihalGazi)": "https://huggingface.co/spaces/NihalGazi/EBSynth", "MoodSpace (huzey)": "https://huggingface.co/spaces/huzey/MoodSpace", "TR0N (Layer6)": "https://huggingface.co/spaces/Layer6/TR0N", "TUTOR (nathannarrik)": "https://huggingface.co/spaces/nathannarrik/TUTOR", "Sport Model 1 (CHEN11102)": "https://huggingface.co/spaces/CHEN11102/sportmodel1", } # --- Model Loading --- DETECTOR_CONFIG = { "Canny": {"class": CannyDetector, "args": {}}, "Lineart": {"class": LineartDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "MLSD": {"class": MLSDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "OpenPose": {"class": OpenposeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "NormalBAE": {"class": NormalBaeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "SoftEdge (HED)": {"class": HEDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, } def get_detector(name): with model_load_lock: if name not in loaded_detectors: print(f"Loading {name} model...") config = DETECTOR_CONFIG[name] if "pretrained_model_or_path" in config["args"]: detector_class = config["class"] loaded_detectors[name] = detector_class.from_pretrained(**config["args"]) else: loaded_detectors[name] = config["class"](**config["args"]) print(f"{name} model loaded.") return loaded_detectors[name] def load_whisper_model(model_name="base"): global whisper_model if whisper: with model_load_lock: if whisper_model is None or whisper_model.name != model_name: print(f"Loading Whisper model '{model_name}'... (This may download files on first run)") whisper_model = whisper.load_model(model_name) print("Whisper model loaded.") return whisper_model return None get_detector("Canny") # Pre-load Canny detector # --- Utility Functions --- def rotate_image(image, rotation): if rotation == "90 Degrees Clockwise": return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) elif rotation == "90 Degrees Counter-Clockwise": return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) elif rotation == "180 Degrees": return cv2.rotate(image, cv2.ROTATE_180) return image def manipulate_image(image, operation): if image is None: raise gr.Error("Please upload an image first.") if operation == "Invert Colors": return cv2.bitwise_not(image) elif operation == "Flip Horizontal": return cv2.flip(image, 1) elif operation == "Flip Vertical": return cv2.flip(image, 0) elif operation == "Rotate 90° Right": return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) elif operation == "Rotate 90° Left": return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) else: return image def manipulate_video(video_path, operation, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"manipulated_video_{timestamp}.mp4") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise gr.Error("Error opening video file.") width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) if fps == 0: fps = 30 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_width, out_height = width, height if operation in ["Rotate 90° Right", "Rotate 90° Left"]: out_width, out_height = height, width writer = cv2.VideoWriter(output_video_path, fourcc, fps, (out_width, out_height)) for _ in progress.tqdm(range(frame_count), desc=f"Applying '{operation}'"): ret, frame = cap.read() if not ret: break processed_frame = manipulate_image(frame, operation) writer.write(processed_frame) cap.release() writer.release() return output_video_path def get_media_duration(media_path): if not media_path: return 0.0 try: cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", media_path] result = subprocess.run(cmd, capture_output=True, text=True, check=True) return float(result.stdout.strip()) except Exception as e: print(f"Could not get duration for {media_path}: {e}") return 0.0 def get_video_dimensions(video_path): if not video_path: return 0, 0 try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return 0, 0 width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() return width, height except Exception: return 0, 0 def get_video_fps(video_path): if not video_path: return 24.0 try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return 24.0 fps = cap.get(cv2.CAP_PROP_FPS) cap.release() return fps if fps > 0 else 24.0 except Exception: return 24.0 def run_ffmpeg_command(cmd, desc="Processing with FFMPEG..."): try: print(f"Running FFMPEG command: {' '.join(cmd)}") # Use subprocess.run for a more robust, blocking call that waits for completion. process = subprocess.run( cmd, capture_output=True, text=True, encoding='utf-8', check=False # We check the return code manually to provide a better error. ) # If FFMPEG returns a non-zero exit code, it indicates an error. if process.returncode != 0: # Combine stdout and stderr for a complete, easy-to-read log. full_output = f"--- FFMPEG & GRADIO ERROR LOG ---\n\n" \ f"FFMPEG COMMAND:\n{' '.join(cmd)}\n\n" \ f"FFMPEG STDERR:\n{process.stderr}\n\n" \ f"FFMPEG STDOUT:\n{process.stdout}" # Raise our own exception with the detailed output. raise subprocess.CalledProcessError(process.returncode, cmd, output=full_output) except subprocess.CalledProcessError as e: # Catch the exception and raise a user-friendly Gradio error. raise gr.Error(f"FFMPEG failed!\n\nDetails:\n{e.output}") except FileNotFoundError: raise gr.Error("FFMPEG not found. Please ensure ffmpeg is installed and in your system's PATH.") def batch_image_processor(files, processing_function, job_name, progress, **kwargs): if not files: raise gr.Error("Please upload at least one image.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) output_paths = [] for file_obj in progress.tqdm(files, desc=f"Processing batch for {job_name}"): try: base, _ = os.path.splitext(os.path.basename(file_obj.name)) if job_name == "zoom_videos": output_filename = f"{base}.mp4" elif job_name == "bg_removed": output_filename = f"{base}.png" else: output_filename = os.path.basename(file_obj.name) output_path = os.path.join(job_temp_dir, output_filename) processing_function(input_path=file_obj.name, output_path=output_path, **kwargs) output_paths.append(output_path) except Exception as e: print(f"Skipping file {file_obj.name} due to error: {e}") continue if not output_paths: shutil.rmtree(job_temp_dir) raise gr.Error("No images could be processed from the batch.") zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}") zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir) return output_paths, zip_path, job_temp_dir def process_batch_images_with_detector(files, detector_name, progress=gr.Progress(track_tqdm=True)): detector = get_detector(detector_name) def apply_detector(input_path, output_path, **kwargs): with Image.open(input_path).convert("RGB") as img: processed = detector(img, detect_resolution=512, image_resolution=1024) processed.save(output_path) output_paths, zip_path, _ = batch_image_processor(files, apply_detector, f"controlnet_{detector_name}", progress) return output_paths, zip_path def process_video_with_detector(video_path, detector_name, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") detector = get_detector(detector_name) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"job_{timestamp}") input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames") os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True) output_video_path = os.path.join(TEMP_DIR, f"{detector_name.lower()}_output_{timestamp}.mp4") cap = cv2.VideoCapture(video_path) frame_count, frame_rate = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path) for i in progress.tqdm(range(frame_count), desc="Extracting Frames"): success, frame = cap.read() if not success: break cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame) cap.release() input_files = sorted(os.listdir(input_frames_dir)) for filename in progress.tqdm(input_files, desc=f"Applying {detector_name}"): with Image.open(os.path.join(input_frames_dir, filename)).convert("RGB") as image: result_pil = detector(image, detect_resolution=512, image_resolution=1024) result_np = cv2.cvtColor(np.array(result_pil), cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(output_frames_dir, filename), result_np) cmd = ["ffmpeg", "-framerate", str(frame_rate), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Compiling Video") shutil.rmtree(job_temp_dir) return output_video_path def extract_first_last_frame(video_path): if not video_path: raise gr.Error("Please upload a video first.") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise gr.Error("Failed to open video file.") frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if frame_count < 1: cap.release() raise gr.Error("Video has no frames.") if frame_count < 2: success, frame_img = cap.read() cap.release() if not success: raise gr.Error("Could not read the only frame.") frame_rgb = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB) return [frame_rgb, frame_rgb.copy()] success, first_frame_img = cap.read() if not success: raise gr.Error("Could not read the first frame.") cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count - 1) success, last_frame_img = cap.read() if not success: raise gr.Error("Could not read the last frame.") cap.release() return [cv2.cvtColor(first_frame_img, cv2.COLOR_BGR2RGB), cv2.cvtColor(last_frame_img, cv2.COLOR_BGR2RGB)] def video_to_frames_extractor(video_path, skip_rate, rotation, do_resize, out_w, out_h, out_format, jpg_quality, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise gr.Error("Failed to open video file.") frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if frame_count < 1: cap.release(); raise gr.Error("Video appears to have no frames.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"v2f_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) frame_paths = [] saved_count = 0 for i in progress.tqdm(range(frame_count), desc="Extracting Frames"): success, frame = cap.read() if not success: break if i % skip_rate != 0: continue frame = rotate_image(frame, rotation) if do_resize: frame = cv2.resize(frame, (out_w, out_h), interpolation=cv2.INTER_LANCZOS4) frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) file_ext = out_format.lower() frame_path = os.path.join(job_temp_dir, f"frame_{saved_count:05d}.{file_ext}") if out_format == "JPG": frame_pil.save(frame_path, quality=jpg_quality) else: frame_pil.save(frame_path) frame_paths.append(frame_path) saved_count += 1 cap.release() if not frame_paths: shutil.rmtree(job_temp_dir); raise gr.Error("Could not extract any frames.") zip_base_name = os.path.join(TEMP_DIR, f"frames_archive_{timestamp}") zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir) return frame_paths[:100], zip_path def create_video_from_frames(files, fps, rotation, do_resize, out_w, out_h, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload frame images first.") if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"f2v_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) filenames = [] for i, file in enumerate(progress.tqdm(files, desc="Preparing Frames")): ext = os.path.splitext(file.name)[1] temp_path = os.path.join(job_temp_dir, f"frame_{i:05d}{ext}") shutil.copy(file.name, temp_path); filenames.append(temp_path) output_video_path = os.path.join(TEMP_DIR, f"video_from_frames_{timestamp}.mp4") first_frame_img = rotate_image(cv2.imread(filenames[0]), rotation) h, w, _ = first_frame_img.shape if do_resize: w, h = out_w, out_h w -= w % 2; h -= h % 2 temp_processed_dir = os.path.join(job_temp_dir, "processed"); os.makedirs(temp_processed_dir, exist_ok=True) for i, filename in enumerate(progress.tqdm(filenames, desc="Processing Frames for Video")): frame = rotate_image(cv2.imread(filename), rotation) frame = cv2.resize(frame, (w, h), interpolation=cv2.INTER_LANCZOS4) cv2.imwrite(os.path.join(temp_processed_dir, f"pframe_{i:05d}.png"), frame) cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(temp_processed_dir, "pframe_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Compiling Video") shutil.rmtree(job_temp_dir) return output_video_path def image_to_looping_video(image_array, duration, audio_path=None): if image_array is None: raise gr.Error("Please upload an image first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") temp_image_path = os.path.join(TEMP_DIR, f"temp_image_{timestamp}.png") output_video_path = os.path.join(TEMP_DIR, f"looping_video_{timestamp}.mp4") img = Image.fromarray(image_array) img.save(temp_image_path) width, height = img.size width -= width % 2; height -= height % 2 cmd = ["ffmpeg", "-loop", "1", "-i", temp_image_path] if audio_path: cmd.extend(["-i", audio_path, "-c:a", "aac", "-shortest"]) cmd.extend(["-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-vf", f"scale={width}:{height}", "-y", output_video_path]) run_ffmpeg_command(cmd, "Creating Looping Video...") os.remove(temp_image_path) return output_video_path def create_zoom_videos(files, duration, zoom_ratio, zoom_direction, combine_videos, audio_path=None, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload at least one image.") fps = 30 total_frames = int(duration * fps) zoom_step = (zoom_ratio - 1.0) / total_frames zoom_coords = { "Center": "x=iw/2-(iw/zoom)/2:y=ih/2-(ih/zoom)/2", "Top": "x=iw/2-(iw/zoom)/2:y=0", "Bottom": "x=iw/2-(iw/zoom)/2:y=ih-(ih/zoom)", "Left": "x=0:y=ih/2-(ih/zoom)/2", "Right": "x=iw-(iw/zoom):y=ih/2-(ih/zoom)/2", "Top-Left": "x=0:y=0", "Top-Right": "x=iw-(iw/zoom):y=0", "Bottom-Left": "x=0:y=ih-(ih/zoom)", "Bottom-Right": "x=iw-(iw/zoom):y=ih-(ih/zoom)", } def process_single_image(input_path, output_path, **kwargs): audio_for_clip = kwargs.get('audio_for_clip') zoom_filter = (f"scale=3840:-1,zoompan=z='min(zoom+{zoom_step},{zoom_ratio})':{zoom_coords[zoom_direction]}:d={total_frames}:s=1920x1080:fps={fps}") cmd = ["ffmpeg", "-loop", "1", "-i", input_path] if audio_for_clip: cmd.extend(["-i", audio_for_clip, "-c:a", "aac", "-shortest"]) cmd.extend(["-vf", zoom_filter, "-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-b:v", "5M", "-y", output_path]) run_ffmpeg_command(cmd, f"Creating zoom video for {os.path.basename(input_path)}") batch_kwargs = {} if not combine_videos and audio_path: batch_kwargs['audio_for_clip'] = audio_path video_paths, zip_path, job_temp_dir = batch_image_processor(files, process_single_image, "zoom_videos", progress, **batch_kwargs) if not combine_videos: return video_paths, None, zip_path if not video_paths: raise gr.Error("No videos were created to be combined.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") silent_combined_path = os.path.join(job_temp_dir, f"combined_silent_{timestamp}.mp4") if len(video_paths) > 1: file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: for path in video_paths: f.write(f"file '{os.path.abspath(path)}'\n") run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_combined_path], "Combining Videos") else: shutil.copy(video_paths[0], silent_combined_path) if audio_path: final_video_path = os.path.join(TEMP_DIR, f"combined_audio_{timestamp}.mp4") run_ffmpeg_command(["ffmpeg", "-i", silent_combined_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", final_video_path], "Adding audio...") else: final_video_path = os.path.join(TEMP_DIR, f"combined_final_{timestamp}.mp4") shutil.move(silent_combined_path, final_video_path) return None, final_video_path, zip_path def change_video_speed(video_path, speed_multiplier): if not video_path: raise gr.Error("Please upload a video first.") if speed_multiplier <= 0: raise gr.Error("Speed multiplier must be positive.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"speed_change_{timestamp}.mp4") pts_value = 1 / speed_multiplier cmd = ["ffmpeg", "-i", video_path, "-filter:v", f"setpts={pts_value}*PTS", "-an", "-y", output_video_path] run_ffmpeg_command(cmd, "Changing Video Speed") return output_video_path def reverse_video(video_path, audio_option): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"reversed_video_{timestamp}.mp4") filters = ["reverse"] if audio_option == "Reverse Audio": filters.append("areverse") cmd = ["ffmpeg", "-i", video_path, "-vf", filters[0]] if len(filters) > 1: cmd.extend(["-af", filters[1]]) if audio_option == "Remove Audio": cmd.append("-an") cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]) run_ffmpeg_command(cmd, "Reversing video...") return output_video_path def add_audio_to_video(video_path, audio_path): if not video_path: raise gr.Error("Please upload a video.") if not audio_path: raise gr.Error("Please upload an audio file.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"video_with_audio_{timestamp}.mp4") cmd = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", output_video_path] run_ffmpeg_command(cmd, "Adding Audio to Video") return output_video_path def extract_audio(video_path, audio_format="mp3", progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_audio_path = os.path.join(TEMP_DIR, f"extracted_audio_{timestamp}.{audio_format}") cmd = ["ffmpeg", "-i", video_path, "-vn"] # -vn strips video if audio_format == "mp3": cmd.extend(["-c:a", "libmp3lame", "-q:a", "2"]) # VBR quality elif audio_format == "aac": cmd.extend(["-c:a", "aac", "-b:a", "192k"]) elif audio_format == "wav": cmd.extend(["-c:a", "pcm_s16le"]) cmd.extend(["-y", output_audio_path]) run_ffmpeg_command(cmd, "Extracting audio...") return output_audio_path def create_gif_from_video(video_path, start_time, end_time, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_gif_path, palette_path = os.path.join(TEMP_DIR, f"video_to_gif_{timestamp}.gif"), os.path.join(TEMP_DIR, f"palette_{timestamp}.png") duration_filter = [] if start_time > 0 or end_time > 0: if end_time > 0 and end_time <= start_time: raise gr.Error("End time must be after start time.") if start_time > 0: duration_filter.extend(["-ss", str(start_time)]) if end_time > 0: duration_filter.extend(["-to", str(end_time)]) progress(0, desc="Generating Color Palette"); run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-vf", "fps=15,scale=480:-1:flags=lanczos,palettegen", "-y", palette_path]) progress(0.5, desc="Creating GIF"); run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-i", palette_path, "-filter_complex", "fps=15,scale=480:-1:flags=lanczos[x];[x][1:v]paletteuse", "-y", output_gif_path]) progress(1, desc="Done"); os.remove(palette_path) return output_gif_path def get_frame_at_time(video_path, time_in_seconds=0): if not video_path: return None try: command = ['ffmpeg', '-ss', str(time_in_seconds), '-i', video_path, '-vframes', '1', '-f', 'image2pipe', '-c:v', 'png', '-'] pipe = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) return Image.open(io.BytesIO(pipe.stdout)).convert("RGB") except Exception as e: print(f"Error extracting frame for crop preview: {e}") cap = cv2.VideoCapture(video_path); cap.set(cv2.CAP_PROP_POS_MSEC, time_in_seconds * 1000) success, frame = cap.read(); cap.release() if success: return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) return None def crop_video(video_path, x, y, w, h, do_resize, out_w, out_h): if not video_path: raise gr.Error("Please upload a video first.") w, h, x, y = int(w), int(h), int(x), int(y) w -= w % 2; h -= h % 2 if w <= 0 or h <= 0: raise gr.Error("Crop dimensions must be positive.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"cropped_video_{timestamp}.mp4") vf_filters = [f"crop={w}:{h}:{x}:{y}"] if do_resize: if out_w <= 0 or out_h <= 0: raise gr.Error("Resize dimensions must be positive.") out_w, out_h = int(out_w), int(out_h) out_w -= out_w % 2; out_h -= out_h % 2 vf_filters.append(f"scale={out_w}:{out_h}") cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(vf_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Cropping video...") return output_video_path def trim_video(video_path, start_time, end_time): if not video_path: raise gr.Error("Please upload a video first.") if start_time < 0: start_time = 0 if end_time <= start_time: end_time = 0 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"trimmed_video_{timestamp}.mp4") cmd = ["ffmpeg", "-i", video_path, "-ss", str(start_time)] if end_time > 0: cmd.extend(["-to", str(end_time)]) cmd.extend(["-c:v", "libx264", "-c:a", "copy", "-pix_fmt", "yuv420p", "-y", output_video_path]) run_ffmpeg_command(cmd, "Trimming Video") return output_video_path def apply_video_watermark(video_path, text, position, opacity, size_scale, color): if not video_path: raise gr.Error("Please upload a video first.") if not text: raise gr.Error("Watermark text cannot be empty.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"watermarked_video_{timestamp}.mp4") _ , video_h = get_video_dimensions(video_path) if video_h == 0: video_h = 720 # Fallback escaped_text = text.replace("'", r"'\''").replace(":", r"\:").replace(",", r"\,") pos_map = {"Top-Left": "x=20:y=20", "Top-Right": "x=w-tw-20:y=20", "Bottom-Left": "x=20:y=h-th-20", "Bottom-Right": "x=w-tw-20:y=h-th-20", "Center": "x=(w-tw)/2:y=(h-th)/2"} font_opacity = opacity / 100.0 font_size = int(video_h / (50 - (size_scale * 3.5))) # Cleaned up filter. The pre-calculation of fontsize is the most stable method. drawtext_filter = ( f"drawtext=" f"text='{escaped_text}':" f"{pos_map[position]}:" f"fontsize={font_size}:" f"fontcolor={color}@{font_opacity}" ) cmd = [ "ffmpeg", "-i", video_path, "-vf", drawtext_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path ] run_ffmpeg_command(cmd, "Applying text watermark...") return output_video_path def remove_video_background(video_path, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"bg_rem_job_{timestamp}"); input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames") os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True) cap = cv2.VideoCapture(video_path); frame_count, fps = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path) for i in progress.tqdm(range(frame_count), desc="Step 1: Extracting Frames"): success, frame = cap.read() if not success: break cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame) cap.release() for filename in progress.tqdm(sorted(os.listdir(input_frames_dir)), desc="Step 2: Removing Backgrounds"): with Image.open(os.path.join(input_frames_dir, filename)) as img: remove(img).save(os.path.join(output_frames_dir, filename)) output_video_path = os.path.join(TEMP_DIR, f"bg_removed_{timestamp}.webm") progress(0, desc="Step 3: Compiling Video") cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p", "-auto-alt-ref", "0", "-b:v", "1M", "-y", output_video_path] run_ffmpeg_command(cmd, "Compiling transparent video...") shutil.rmtree(job_temp_dir) return output_video_path def transcribe_media(media_path, model_name, progress=gr.Progress(track_tqdm=True)): if media_path is None: raise gr.Error("Please upload a video or audio file first.") model = load_whisper_model(model_name) if model is None: raise gr.Error("Whisper model is not available.") # media_path is now a gr.File object, so we use .name audio_path = media_path.name base_name = os.path.splitext(os.path.basename(media_path.name))[0] # Check if the input is a video file to extract audio from if audio_path.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm')): progress(0, desc="Extracting audio...") audio_path_temp = os.path.join(TEMP_DIR, f"{base_name}.mp3") try: run_ffmpeg_command(["ffmpeg", "-i", audio_path, "-q:a", "0", "-map", "a", "-y", audio_path_temp]) audio_path = audio_path_temp except gr.Error as e: if "does not contain any stream" in str(e): raise gr.Error("The uploaded video has no audio track.") else: raise e progress(0.2, desc=f"Transcribing with Whisper '{model_name}' model...") result = model.transcribe(audio_path, verbose=False) def format_ts(s): h, r = divmod(s, 3600); m, s = divmod(r, 60) return f"{int(h):02}:{int(m):02}:{int(s):02},{int((s-int(s))*1000):03}" srt_path = os.path.join(TEMP_DIR, f"{base_name}.srt") vtt_path = os.path.join(TEMP_DIR, f"{base_name}.vtt") with open(srt_path, "w", encoding="utf-8") as srt_f, open(vtt_path, "w", encoding="utf-8") as vtt_f: vtt_f.write("WEBVTT\n\n") for i, seg in enumerate(result["segments"]): start, end, text = seg['start'], seg['end'], seg['text'].strip() srt_f.write(f"{i + 1}\n{format_ts(start)} --> {format_ts(end)}\n{text}\n\n") vtt_f.write(f"{format_ts(start).replace(',', '.')} --> {format_ts(end).replace(',', '.')}\n{text}\n\n") return result["text"], [srt_path, vtt_path] def transcribe_and_prep_burn(media_file, model_name, progress=gr.Progress(track_tqdm=True)): if not media_file: raise gr.Error("Please upload a file first.") is_video = media_file.name.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm')) text, files = transcribe_media(media_file, model_name, progress) # Return the original video path and make the burn-in UI visible only if it was a video if is_video: return text, files, media_file.name, gr.update(visible=True) else: return text, files, None, gr.update(visible=False) def burn_subtitles(video_path, srt_file_obj, font_size_scale, font_color, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Original video path not found. Please re-transcribe.") if not srt_file_obj or not srt_file_obj[0].name: raise gr.Error("SRT file not found. Please re-transcribe.") srt_path = srt_file_obj[0].name # srt_file_obj is a list of file objects timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"subtitled_video_{timestamp}.mp4") _, video_h = get_video_dimensions(video_path) if video_h == 0: video_h = 720 # Fallback divisor = 32 - (font_size_scale * 2) calculated_font_size = int(video_h / divisor) color_bgr = font_color[5:7] + font_color[3:5] + font_color[1:3] ffmpeg_color = f"&H00{color_bgr.upper()}" # This filter requires FFMPEG to be compiled with libass. Escaping is crucial for Windows paths. escaped_srt_path = srt_path.replace('\\', '/').replace(':', '\\:') vf_filter = f"subtitles='{escaped_srt_path}':force_style='Fontsize={calculated_font_size},PrimaryColour={ffmpeg_color},BorderStyle=1,Outline=1,Shadow=0.5,MarginV=15'" cmd = ["ffmpeg", "-i", video_path, "-vf", vf_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Burning subtitles into video...") return output_video_path def remove_background_single(input_path, output_path, **kwargs): with Image.open(input_path) as img: remove(img).save(output_path) def remove_background_batch(files, progress=gr.Progress(track_tqdm=True)): output_paths, zip_path, _ = batch_image_processor(files, remove_background_single, "bg_removed", progress) return output_paths, zip_path def resize_convert_single_image(input_path, output_path, **kwargs): output_format = kwargs.get('output_format', 'JPG') quality = kwargs.get('quality', 95) enable_resize = kwargs.get('enable_resize', False) max_w = kwargs.get('max_w', 1024) max_h = kwargs.get('max_h', 1024) resize_mode = kwargs.get('resize_mode', "Fit (preserve aspect ratio)") with Image.open(input_path) as img: # Handle transparency for formats that don't support it if output_format in ['JPG', 'WEBP'] and img.mode in ['RGBA', 'P', 'LA']: img = img.convert("RGB") if enable_resize: if resize_mode == "Fit (preserve aspect ratio)": img.thumbnail((max_w, max_h), Image.Resampling.LANCZOS) else: # Stretch img = img.resize((max_w, max_h), Image.Resampling.LANCZOS) save_kwargs = {} # Pillow's format name for JPG is 'JPEG' pil_format = 'JPEG' if output_format == 'JPG' else output_format if pil_format in ['JPEG', 'WEBP']: save_kwargs['quality'] = quality img.save(output_path, pil_format, **save_kwargs) def batch_resize_convert_images(files, output_format, quality, enable_resize, max_w, max_h, resize_mode, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload at least one image.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_name = "resized_converted" job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) output_paths = [] processing_kwargs = { 'output_format': output_format, 'quality': quality, 'enable_resize': enable_resize, 'max_w': max_w, 'max_h': max_h, 'resize_mode': resize_mode } for file_obj in progress.tqdm(files, desc=f"Processing batch for {job_name}"): try: base, _ = os.path.splitext(os.path.basename(file_obj.name)) output_filename = f"{base}.{output_format.lower()}" output_path = os.path.join(job_temp_dir, output_filename) resize_convert_single_image(file_obj.name, output_path, **processing_kwargs) output_paths.append(output_path) except Exception as e: print(f"Skipping file {file_obj.name} due to error: {e}"); continue if not output_paths: shutil.rmtree(job_temp_dir); raise gr.Error("No images could be processed.") zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}") zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir) return output_paths[:100], zip_path def apply_watermark_single(input_path, output_path, watermark_text, position, opacity): with Image.open(input_path).convert("RGBA") as image: if not watermark_text: raise ValueError("Watermark text cannot be empty.") txt = Image.new("RGBA", image.size, (255, 255, 255, 0)) try: font = ImageFont.truetype("DejaVuSans.ttf", int(image.width / 20)) except IOError: font = ImageFont.load_default() d = ImageDraw.Draw(txt); bbox = d.textbbox((0, 0), watermark_text, font=font); w, h = bbox[2]-bbox[0], bbox[3]-bbox[1] pos_map = {"Top-Left":(10,10), "Top-Right":(image.width-w-10,10), "Bottom-Left":(10,image.height-h-10), "Bottom-Right":(image.width-w-10,image.height-h-10), "Center":((image.width-w)/2,(image.height-h)/2)} d.text(pos_map[position], watermark_text, font=font, fill=(255, 255, 255, int(255 * (opacity / 100)))) Image.alpha_composite(image, txt).convert("RGB").save(output_path) def apply_watermark_batch(files, watermark_text, position, opacity, progress=gr.Progress(track_tqdm=True)): if not watermark_text: raise gr.Error("Please provide watermark text.") processing_func = lambda input_path, output_path: apply_watermark_single( input_path, output_path, watermark_text=watermark_text, position=position, opacity=opacity ) output_paths, zip_path, _ = batch_image_processor(files, processing_func, "watermarked", progress) return output_paths, zip_path def convert_compress_video(video_path, out_format, v_codec, crf_value, scale_option, a_codec, a_bitrate, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video to convert.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_filename = f"converted_{timestamp}.{out_format.lower()}" output_path = os.path.join(TEMP_DIR, output_filename) cmd = ["ffmpeg", "-i", video_path] vf_filters = [] if scale_option != "Original": w, h = get_video_dimensions(video_path) if w > 0 and h > 0: target_h = int(scale_option.replace('p', '')) target_w = round(w * target_h / h / 2) * 2 vf_filters.append(f"scale={target_w}:{target_h}") if vf_filters: cmd.extend(["-vf", ",".join(vf_filters)]) cmd.extend(["-c:v", v_codec]) if v_codec in ["libx264", "libx265"]: cmd.extend(["-crf", str(crf_value)]) cmd.extend(["-pix_fmt", "yuv420p"]) if a_codec == "copy": cmd.extend(["-c:a", "copy"]) else: cmd.extend(["-c:a", a_codec, "-b:a", f"{a_bitrate}k"]) cmd.extend(["-y", output_path]) run_ffmpeg_command(cmd, "Converting and Compressing Video...") return output_path def apply_video_fade(video_path, fade_in_duration, fade_out_duration): if not video_path: raise gr.Error("Please upload a video.") video_duration = get_media_duration(video_path) if fade_in_duration + fade_out_duration > video_duration: raise gr.Error("The sum of fade durations cannot be greater than the video duration.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"faded_video_{timestamp}.mp4") fade_filters = [] if fade_in_duration > 0: fade_filters.append(f"fade=t=in:st=0:d={fade_in_duration}") if fade_out_duration > 0: fade_out_start = video_duration - fade_out_duration; fade_filters.append(f"fade=t=out:st={fade_out_start}:d={fade_out_duration}") if not fade_filters: gr.Info("No fade applied."); return video_path cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(fade_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Applying video fade...") return output_video_path def trim_and_fade_audio(audio_path, start_time, end_time, fade_in_duration, fade_out_duration): if not audio_path: raise gr.Error("Please upload an audio file.") audio_duration = get_media_duration(audio_path) if start_time < 0: start_time = 0 if end_time <= 0 or end_time > audio_duration: end_time = audio_duration if start_time >= end_time: raise gr.Error("Start time must be less than end time.") trimmed_duration = end_time - start_time if fade_in_duration + fade_out_duration > trimmed_duration: raise gr.Error("Sum of fade durations cannot be greater than the trimmed audio duration.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_audio_path = os.path.join(TEMP_DIR, f"edited_audio_{timestamp}.mp3") af_filters = [] if fade_in_duration > 0: af_filters.append(f"afade=t=in:st=0:d={fade_in_duration}") if fade_out_duration > 0: fade_out_start = trimmed_duration - fade_out_duration; af_filters.append(f"afade=t=out:st={fade_out_start}:d={fade_out_duration}") cmd = ["ffmpeg", "-ss", str(start_time), "-to", str(end_time), "-i", audio_path] if af_filters: cmd.extend(["-af", ",".join(af_filters)]) cmd.extend(["-y", output_audio_path]) run_ffmpeg_command(cmd, "Trimming and fading audio...") return output_audio_path # --- FLUX API --- FLUX_MODELS = {"FLUX.1-schnell (Fast)": "black-forest-labs/FLUX.1-schnell", "FLUX.1-dev (High Quality)": "black-forest-labs/FLUX.1-dev"} def call_flux_api(prompt, model_choice, width, height, hf_token): if not hf_token: raise gr.Error("Hugging Face User Access Token is required.") try: client = Client(FLUX_MODELS[model_choice], hf_token=hf_token) return client.predict(prompt=prompt, seed=0, randomize_seed=True, width=width, height=height, num_inference_steps=8 if "dev" in model_choice else 4, api_name="/infer")[0] except Exception as e: raise gr.Error(f"API call failed: {e}") def get_image_as_base64(path): try: with open(path, "rb") as f: return f"data:image/png;base64,{base64.b64encode(f.read()).decode('utf-8')}" except FileNotFoundError: return None # --- Transfer Tab Functions (Simplified) --- def filter_presets(query, all_presets): """Filters the preset dropdown based on a search query.""" if not query: return gr.update(choices=sorted(list(all_presets.keys()))) filtered_keys = [key for key in all_presets.keys() if query.lower() in key.lower()] return gr.update(choices=sorted(filtered_keys)) def save_preset(presets, name, url): if not name or not name.strip(): gr.Warning("Preset name cannot be empty."); return presets, gr.update() if not url or not url.strip(): gr.Warning("Target URL cannot be empty."); return presets, gr.update() presets[name] = url gr.Info(f"Preset '{name}' saved!") return presets, gr.update(choices=sorted(list(presets.keys()))) def delete_preset(presets, name): if name in presets: del presets[name] gr.Info(f"Preset '{name}' deleted!") return presets, gr.update(choices=sorted(list(presets.keys())), value=None), "" gr.Warning(f"Preset '{name}' not found.") return presets, gr.update(), gr.update() def load_preset(presets, name): return presets.get(name, "") # --- Join/Beat-Sync/Etc Video Feature Functions --- def add_videos_to_join_list(files, current_list, progress=gr.Progress(track_tqdm=True)): if not files: return current_list session_id = f"join_session_{datetime.now().strftime('%Y%m%d_%H%M%S')}" join_session_dir = os.path.join(TEMP_DIR, session_id); os.makedirs(join_session_dir, exist_ok=True) new_list = list(current_list) for file in progress.tqdm(files, desc="Processing New Videos"): fname = os.path.basename(file.name); new_path = os.path.join(join_session_dir, fname) shutil.copy(file.name, new_path) duration = get_media_duration(new_path) if duration > 0: new_list.append({"path": new_path, "name": fname, "duration": duration}) else: gr.Warning(f"Could not process or get duration for video: {fname}. Skipping.") return new_list def update_video_queue_df(video_list): if not video_list: return gr.update(value=None) return gr.update(value=[[i + 1, v['name']] for i, v in enumerate(video_list)]) def handle_video_list_action(video_list, selected_index, action): if selected_index is None or not (0 <= selected_index < len(video_list)): gr.Warning("Please select a video from the list first.") return video_list, None index = int(selected_index) new_list = list(video_list) if action == "up" and index > 0: new_list.insert(index - 1, new_list.pop(index)) elif action == "down" and index < len(new_list) - 1: new_list.insert(index + 1, new_list.pop(index)) elif action == "remove": new_list.pop(index) return new_list, gr.update(value=None) def get_video_start_end_frames_for_preview(video_list, evt: gr.SelectData): """Universal function to extract first and last frames for a gallery preview.""" if not evt.selected: return None, -1, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) index = evt.index[0] if not (0 <= index < len(video_list)): return None, -1, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) video_path = video_list[index].get("path") preview_frames = None if video_path: try: frames = extract_first_last_frame(video_path) preview_frames = frames except Exception as e: print(f"Error generating start/end preview for {video_path}: {e}") preview_frames = None can_move_up = index > 0 can_move_down = index < len(video_list) - 1 return preview_frames, index, gr.update(interactive=can_move_up), gr.update(interactive=can_move_down), gr.update(interactive=True) def join_videos_from_list(video_data, audio_path=None, progress=gr.Progress(track_tqdm=True)): if not video_data: raise gr.Error("Please add at least one video to the queue.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"join_{timestamp}") os.makedirs(job_temp_dir, exist_ok=True) video_input_path = "" if len(video_data) > 1: progress(0.1, desc="Joining video streams...") file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: for video_info in video_data: f.write(f"file '{os.path.abspath(video_info['path'])}'\n") concatenated_video_path = os.path.join(job_temp_dir, "concatenated.mp4") run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", concatenated_video_path], "Joining Videos") video_input_path = concatenated_video_path elif len(video_data) == 1: progress(0.1, desc="Preparing single video...") video_input_path = video_data[0]['path'] if not audio_path: final_output_path = os.path.join(TEMP_DIR, f"joined_video_{timestamp}.mp4") if len(video_data) == 1: shutil.copy(video_input_path, final_output_path) else: shutil.move(video_input_path, final_output_path) if os.path.exists(job_temp_dir): shutil.rmtree(job_temp_dir) return final_output_path else: progress(0.7, desc="Adding audio track...") final_output_path = os.path.join(TEMP_DIR, f"joined_video_with_audio_{timestamp}.mp4") cmd = [ "ffmpeg", "-i", video_input_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", "-y", final_output_path ] run_ffmpeg_command(cmd, "Adding Audio to Joined Video") if os.path.exists(job_temp_dir): shutil.rmtree(job_temp_dir) return final_output_path def ping_pong_video(video_path, audio_option, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"pingpong_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) progress(0.2, desc="Reversing video...") reversed_video_path = os.path.join(job_temp_dir, "reversed_temp.mp4") cmd_reverse = ["ffmpeg", "-i", video_path, "-vf", "reverse"] if audio_option == "Reverse Audio": cmd_reverse.extend(["-af", "areverse"]) else: cmd_reverse.append("-an") cmd_reverse.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", reversed_video_path]) run_ffmpeg_command(cmd_reverse) progress(0.6, desc="Joining videos...") file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: f.write(f"file '{os.path.abspath(video_path)}'\n") f.write(f"file '{os.path.abspath(reversed_video_path)}'\n") output_video_path = os.path.join(TEMP_DIR, f"pingpong_video_{timestamp}.mp4") cmd_join = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", output_video_path] if audio_option == "Original Audio Only": cmd_join = ["ffmpeg", "-i", video_path, "-i", reversed_video_path, "-filter_complex", "[0:v][1:v]concat=n=2:v=1[v]", "-map", "[v]", "-map", "0:a?", "-c:a", "copy", "-y", output_video_path] run_ffmpeg_command(cmd_join) shutil.rmtree(job_temp_dir) return output_video_path def create_beat_sync_video(video_data, audio_path, rhythm_source, beat_sensitivity, cuts_per_measure, min_clip_duration, loop_videos, slicing_method, max_slowdown_clip_duration, progress=gr.Progress(track_tqdm=True)): if not video_data: raise gr.Error("Please upload at least one video.") if not audio_path: raise gr.Error("Please upload a music track.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S"); job_temp_dir = os.path.join(TEMP_DIR, f"beatsync_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) progress(0, desc="Step 1: Analyzing audio..."); beat_times = [] try: y, sr = librosa.load(audio_path) if rhythm_source == "Detect Beats (dynamic)": _, beat_frames = librosa.beat.beat_track(y=y, sr=sr, tightness=beat_sensitivity); beat_times = librosa.frames_to_time(beat_frames, sr=sr) elif rhythm_source == "Generate Rhythmic Grid (BPM-based)": tempo, _ = librosa.beat.beat_track(y=y, sr=sr); audio_duration = librosa.get_duration(y=y, sr=sr) if tempo == 0: raise ValueError("Could not determine BPM.") cut_interval = (60.0 / tempo) * 4 / cuts_per_measure beat_times = list(np.arange(0, audio_duration, cut_interval)) except Exception as e: raise gr.Error(f"Failed to analyze audio: {e}") if len(beat_times) < 2: raise gr.Error("Could not determine enough rhythm points.") progress(0.2, desc="Step 2: Refining intervals..."); intervals = [] if beat_times[0] > min_clip_duration: intervals.append((0.0, beat_times[0])) for i in range(len(beat_times) - 1): start_beat, end_beat = beat_times[i], beat_times[i+1]; duration = end_beat - start_beat if duration >= min_clip_duration: intervals.append((start_beat, end_beat)) if not intervals: raise gr.Error("No beat intervals found meeting minimum duration.") progress(0.3, desc="Step 3: Slicing video clips..."); clip_paths = []; video_idx = 0; current_video_time = 0.0 for i, (start_beat, end_beat) in enumerate(progress.tqdm(intervals, desc="Slicing video clips")): target_clip_duration = end_beat - start_beat; found_clip = False for _ in range(len(video_data)): video_info = video_data[video_idx]; input_video_path = video_info['path']; output_clip_path = os.path.join(job_temp_dir, f"clip_{i:05d}.mp4") if slicing_method == "Cut to Fit": if (video_info['duration'] - current_video_time) >= target_clip_duration: run_ffmpeg_command(["ffmpeg", "-ss", str(current_video_time), "-i", input_video_path, "-t", str(target_clip_duration), "-c", "copy", "-an", "-y", output_clip_path]) clip_paths.append(output_clip_path); current_video_time += target_clip_duration; found_clip = True; break elif slicing_method == "Slowdown to Fit": original_clip_duration = min(target_clip_duration, max_slowdown_clip_duration) if (video_info['duration'] - current_video_time) >= original_clip_duration: speed_multiplier = original_clip_duration / target_clip_duration run_ffmpeg_command(["ffmpeg", "-ss", str(current_video_time), "-i", input_video_path, "-t", str(original_clip_duration), "-vf", f"setpts={1/speed_multiplier:.4f}*PTS", "-an", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_clip_path]) clip_paths.append(output_clip_path); current_video_time += original_clip_duration; found_clip = True; break video_idx = (video_idx + 1) % len(video_data); current_video_time = 0.0 if loop_videos == "End when videos run out" and video_idx == 0: break if not found_clip: gr.Warning("Ran out of video footage."); break if not clip_paths: raise gr.Error("Failed to create any video clips.") progress(0.7, desc="Step 4: Joining clips..."); file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: for path in clip_paths: f.write(f"file '{os.path.abspath(path)}'\n") silent_video_path = os.path.join(job_temp_dir, "silent_final.mp4") run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_video_path]) progress(0.9, desc="Step 5: Adding music..."); output_video_path = os.path.join(TEMP_DIR, f"beatsynced_video_{timestamp}.mp4") run_ffmpeg_command(["ffmpeg", "-i", silent_video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", output_video_path]) shutil.rmtree(job_temp_dir) return output_video_path # --- CSS and JS --- footer_css = """ #custom-footer { text-align: center !important; padding: 20px 0 5px 0 !important; font-size: .9em; color: #a0aec0; } """ jkl_video_control_js = """()=>{document.addEventListener("keydown",e=>{const t=document.activeElement;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))return;const n=document.querySelector("#video-trim-input video");if(!n)return;const o=document.querySelector("#video-trim-fps input"),a=o?parseFloat(o.value):24,i=1/a;let r=!1;switch(e.key.toLowerCase()){case"k":n.paused?n.play():n.pause(),r=!0;break;case"j":n.currentTime=Math.max(0,n.currentTime-i),r=!0;break;case"l":n.currentTime+=i,r=!0}r&&e.preventDefault()})}""" with gr.Blocks( theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), title="Skriptz - Universal Tool", css=footer_css, js=jkl_video_control_js ) as demo: logo_b64 = get_image_as_base64("logo.png") if logo_b64: gr.HTML(f"""
""") else: gr.Markdown("# Skriptz Universal Tool") gr.Markdown("