# -*- coding: utf-8 -*- import os import sys # THIS IS THE FIX - PART 1 os.environ['GRADIO_SUPPRESS_PROGRESS'] = 'true' import cv2 import numpy as np import gradio as gr import shutil import subprocess from PIL import Image, ImageDraw, ImageFont, ImageOps from datetime import datetime from threading import Lock import base64 import json import io # --- Dependency Check --- try: from controlnet_aux import ( CannyDetector, MLSDdetector, HEDdetector, LineartDetector, OpenposeDetector, NormalBaeDetector ) from gradio_client import Client from rembg import remove import librosa except ImportError as e: print("="*80) print(f"ERROR: Missing dependency -> {e}") print("Please install all required packages by running:") print("pip install -r requirements.txt") print("="*80) sys.exit(1) # --- AI Model Dependency Check --- try: import whisper except ImportError: print("="*80) print("WARNING: 'openai-whisper' not installed. The Transcription tab will be disabled.") print("To enable it, run: pip install -U openai-whisper") print("="*80) whisper = None # --- Global Variables & Setup --- TEMP_DIR = "temp_gradio" os.makedirs(TEMP_DIR, exist_ok=True) model_load_lock = Lock() loaded_detectors = {} whisper_model = None # --- Default Presets for Transfer Tab (Flat Dictionary) --- DEFAULT_LINK_PRESETS = { # Virtual Try-On & Character "OutfitAnyone": "https://huggingface.co/spaces/HumanAIGC/OutfitAnyone", "Kolors Virtual Try-On": "https://huggingface.co/spaces/Kwai-Kolors/Kolors-Virtual-Try-On", "Miragic Virtual Try-On": "https://huggingface.co/spaces/Miragic-AI/Miragic-Virtual-Try-On", "OutfitAnyway": "https://huggingface.co/spaces/selfit-camera/OutfitAnyway", "IDM-VTON": "https://huggingface.co/spaces/yisol/IDM-VTON", "InstantCharacter": "https://huggingface.co/spaces/InstantX/InstantCharacter", "InstantID": "https://huggingface.co/spaces/InstantX/InstantID", # AI Lip-Sync & Talking Avatars "LivePortrait": "https://huggingface.co/spaces/Han-123/LivePortrait", "LivePortrait (CPU)": "https://huggingface.co/spaces/K00B404/LivePortrait_cpu", "D-ID Live Portrait AI": "https://www.d-id.com/liveportrait-4/", "Synthesia Avatars": "https://www.synthesia.io/features/avatars", "Papercup": "https://www.papercup.com/", "Hedra": "https://www.hedra.com", "LemonSlice": "https://lemonslice.com", "Vozo AI": "https://www.vozo.ai/lip-sync", "Gooey AI Lipsync": "https://gooey.ai/Lipsync", "Sync.so": "https://sync.so", "LipDub AI": "https://www.lipdub.ai", "Magic Hour": "https://magichour.ai", "Lifelike AI": "https://www.lifelikeai.io", "DeepMotion": "https://www.deepmotion.com", "Elai.io": "https://elai.io", "Rephrase.ai": "https://www.rephrase.ai", "Colossyan": "https://www.colossyan.com", "HeyGen (Movio)": "https://www.heygen.com", "Murf Studio": "https://murf.ai", # Image Editing & Upscaling "FLUX Fill/Outpaint": "https://huggingface.co/spaces/multimodalart/flux-fill-outpaint", "ReSize Image Outpainting": "https://huggingface.co/spaces/VIDraft/ReSize-Image-Outpainting", "IC-Light (Relighting)": "https://huggingface.co/spaces/lllyasviel/IC-Light", "Kontext Relight": "https://huggingface.co/spaces/kontext-community/kontext-relight", "SUPIR Upscaler": "https://huggingface.co/spaces/Fabrice-TIERCELIN/SUPIR", # Video Generation & FramePacks "Framepacks (atunc29)": "https://huggingface.co/spaces/atunc29/Framepacks", "Framepack i2v (ginigen)": "https://huggingface.co/spaces/ginigen/framepack-i2v", "Framepack i2v (beowcow)": "https://huggingface.co/spaces/beowcow/framepack-i2v", "Framepack i2v (lisonallen)": "https://huggingface.co/spaces/lisonallen/framepack-i2v", "FramePack F1 (Latyrine)": "https://huggingface.co/spaces/Latyrine/FramePack-F1", "FramePack F1 (linoyts)": "https://huggingface.co/spaces/linoyts/FramePack-F1", "FramePack Rotate (bep40)": "https://huggingface.co/spaces/bep40/FramePack_rotate_landscape", "FramePack Rotate (VIDraft)": "https://huggingface.co/spaces/VIDraft/FramePack_rotate_landscape", "FramePack Rotate (tori29umai)": "https://huggingface.co/spaces/tori29umai/FramePack_rotate_landscape", "Framepack-H111 (rahul7star)": "https://huggingface.co/spaces/rahul7star/Framepack-H111", "FLUX.1 Kontext Dev": "https://huggingface.co/spaces/black-forest-labs/FLUX.1-Kontext-Dev", "Wan2-1-fast": "https://huggingface.co/spaces/multimodalart/wan2-1-fast", "LTX-video-distilled": "https://huggingface.co/spaces/Lightricks/ltx-video-distilled", "RunwayML": "https://app.runwayml.com/video-tools/teams/rinaabdine1/ai-tools/generate", "Pika Labs": "https://pika.art/", "Kling AI": "https://app.klingai.com/global/image-to-video/frame-mode", # Video Interpolation & Slow Motion "RIFE (remzloev)": "https://huggingface.co/spaces/remzloev/Rife", "VFI Converter (Agung1453)": "https://huggingface.co/spaces/Agung1453/Video-Frame-Interpolation-Converter", "ZeroGPU Upscaler/Interpolation": "https://huggingface.co/spaces/inoculatemedia/zerogpu-upscaler-interpolation", "Frame Interpolation (meta-artem)": "https://huggingface.co/spaces/meta-artem/frame-interpolation", "Video Frame Interpolation (guardiancc)": "https://huggingface.co/spaces/guardiancc/video_frame_interpolation", "Video Frame Interpolation (freealise)": "https://huggingface.co/spaces/freealise/video_frame_interpolation", "Framer (wwen1997)": "https://huggingface.co/spaces/wwen1997/Framer", "Inter4k VideoInterpolator": "https://huggingface.co/spaces/vimleshc57/Inter4k_VideoInterpolator", # AnimateDiff & Advanced Animation "AnimateDiff Lightning (ByteDance)": "https://huggingface.co/spaces/ByteDance/AnimateDiff-Lightning", "AnimateDiff Lightning (SahaniJi)": "https://huggingface.co/spaces/SahaniJi/AnimateDiff-Lightning", "AnimateDiff (fatima14)": "https://huggingface.co/spaces/fatima14/AnimateDiff", "AnimateDiff Video Gen (faizanR)": "https://huggingface.co/spaces/faizanR/animatediff-video-generator", "Text-to-Animation Fast (MisterProton)": "https://huggingface.co/spaces/MisterProton/text-to-Animation-Fast-AnimateDiff", "Text-to-Animation Fast (Rowdy013)": "https://huggingface.co/spaces/Rowdy013/text-to-Animation-Fast", # StyleGAN & Portrait Motion "StyleGAN-Human Interpolation (hysts)": "https://huggingface.co/spaces/hysts/StyleGAN-Human-Interpolation", "StyleGAN-Human (Gradio-Blocks)": "https://huggingface.co/spaces/Gradio-Blocks/StyleGAN-Human", # Film & Style Models "MGM-Film-Diffusion (tonyassi)": "https://huggingface.co/spaces/tonyassi/MGM-Film-Diffusion", "CineDiffusion (takarajordan)": "https://huggingface.co/spaces/takarajordan/CineDiffusion", "FLUX Film Foto (MartsoBodziu1994)": "https://huggingface.co/spaces/MartsoBodziu1994/alvdansen-flux_film_foto", "FLUX Style Shaping": "https://huggingface.co/spaces/multimodalart/flux-style-shaping", "Film (Stijnijzelenberg)": "https://huggingface.co/spaces/Stijnijzelenberg/film", "Film Eras (abbiewoodbridge)": "https://huggingface.co/spaces/abbiewoodbridge/Film_Eras", "Film Genre Classifier (Rezuwan)": "https://huggingface.co/spaces/Rezuwan/film_genre_classifier", "RunwayML (Faizbulbul)": "https://huggingface.co/spaces/Faizbulbul/Runwaymlfaiz", # Text-to-3D "TRELLIS TextTo3D (PUM4CH3N)": "https://huggingface.co/spaces/PUM4CH3N/TRELLIS_TextTo3D", "TRELLIS TextTo3D (cavargas10)": "https://huggingface.co/spaces/cavargas10/TRELLIS-Texto3D", "TRELLIS TextTo3D (dkatz2391)": "https://huggingface.co/spaces/dkatz2391/TRELLIS_TextTo3D_Try2", "Sparc3D": "https://huggingface.co/spaces/ilcve21/Sparc3D", "Hunyuan3D-2.1": "https://huggingface.co/spaces/tencent/Hunyuan3D-2.1", # Image Captioning & Interrogation "BLIP-2 (hysts)": "https://huggingface.co/spaces/hysts/BLIP2", "BLIP-3o": "https://huggingface.co/spaces/BLIP3o/blip-3o", "Blip-Dalle3 (DarwinAnim8or)": "https://huggingface.co/spaces/DarwinAnim8or/Blip-Dalle3", "BLIP API (Jonu1)": "https://huggingface.co/spaces/Jonu1/blip-image-captioning-api", "BLIP API (muxiddin19)": "https://huggingface.co/spaces/muxiddin19/blip-image-captioning-api", # Diffusion & Sketching Tools "DiffSketcher (SVGRender)": "https://huggingface.co/spaces/SVGRender/DiffSketcher", "Diffusion WikiArt (kaupane)": "https://huggingface.co/spaces/kaupane/diffusion-wikiart", "Diffusers Image Fill (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-image-fill", "Diffusers Fast Inpaint (OzzyGT)": "https://huggingface.co/spaces/OzzyGT/diffusers-fast-inpaint", # Miscellaneous Tools "EBSynth (NihalGazi)": "https://huggingface.co/spaces/NihalGazi/EBSynth", "MoodSpace (huzey)": "https://huggingface.co/spaces/huzey/MoodSpace", "TR0N (Layer6)": "https://huggingface.co/spaces/Layer6/TR0N", "TUTOR (nathannarrik)": "https://huggingface.co/spaces/nathannarrik/TUTOR", "Sport Model 1 (CHEN11102)": "https://huggingface.co/spaces/CHEN11102/sportmodel1", } # --- Model Loading --- DETECTOR_CONFIG = { "Canny": {"class": CannyDetector, "args": {}}, "Lineart": {"class": LineartDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "MLSD": {"class": MLSDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "OpenPose": {"class": OpenposeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "NormalBAE": {"class": NormalBaeDetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, "SoftEdge (HED)": {"class": HEDdetector, "args": {"pretrained_model_or_path": "lllyasviel/Annotators"}}, } def get_detector(name): with model_load_lock: if name not in loaded_detectors: print(f"Loading {name} model...") config = DETECTOR_CONFIG[name] if "pretrained_model_or_path" in config["args"]: detector_class = config["class"] loaded_detectors[name] = detector_class.from_pretrained(**config["args"]) else: loaded_detectors[name] = config["class"](**config["args"]) print(f"{name} model loaded.") return loaded_detectors[name] def load_whisper_model(model_name="base"): global whisper_model if whisper: with model_load_lock: if whisper_model is None or whisper_model.name != model_name: print(f"Loading Whisper model '{model_name}'... (This may download files on first run)") whisper_model = whisper.load_model(model_name) print("Whisper model loaded.") return whisper_model return None get_detector("Canny") # Pre-load Canny detector # --- Utility Functions --- def rotate_image(image, rotation): if rotation == "90 Degrees Clockwise": return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) elif rotation == "90 Degrees Counter-Clockwise": return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) elif rotation == "180 Degrees": return cv2.rotate(image, cv2.ROTATE_180) return image def manipulate_image(image, operation): if image is None: raise gr.Error("Please upload an image first.") if operation == "Invert Colors": return cv2.bitwise_not(image) elif operation == "Flip Horizontal": return cv2.flip(image, 1) elif operation == "Flip Vertical": return cv2.flip(image, 0) elif operation == "Rotate 90° Right": return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) elif operation == "Rotate 90° Left": return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) else: return image def manipulate_video(video_path, operation, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"manipulated_video_{timestamp}.mp4") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise gr.Error("Error opening video file.") width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) if fps == 0: fps = 30 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out_width, out_height = width, height if operation in ["Rotate 90° Right", "Rotate 90° Left"]: out_width, out_height = height, width writer = cv2.VideoWriter(output_video_path, fourcc, fps, (out_width, out_height)) for _ in progress.tqdm(range(frame_count), desc=f"Applying '{operation}'"): ret, frame = cap.read() if not ret: break processed_frame = manipulate_image(frame, operation) writer.write(processed_frame) cap.release() writer.release() return output_video_path def get_media_duration(media_path): if not media_path: return 0.0 try: cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", media_path] result = subprocess.run(cmd, capture_output=True, text=True, check=True) return float(result.stdout.strip()) except Exception as e: print(f"Could not get duration for {media_path}: {e}") return 0.0 def get_video_dimensions(video_path): if not video_path: return 0, 0 try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return 0, 0 width, height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) cap.release() return width, height except Exception: return 0, 0 def get_video_fps(video_path): if not video_path: return 24.0 try: cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return 24.0 fps = cap.get(cv2.CAP_PROP_FPS) cap.release() return fps if fps > 0 else 24.0 except Exception: return 24.0 def run_ffmpeg_command(cmd, desc="Processing with FFMPEG..."): try: print(f"Running FFMPEG command: {' '.join(cmd)}") # Use subprocess.run for a more robust, blocking call that waits for completion. process = subprocess.run( cmd, capture_output=True, text=True, encoding='utf-8', check=False # We check the return code manually to provide a better error. ) # If FFMPEG returns a non-zero exit code, it indicates an error. if process.returncode != 0: # Combine stdout and stderr for a complete, easy-to-read log. full_output = f"--- FFMPEG & GRADIO ERROR LOG ---\n\n" \ f"FFMPEG COMMAND:\n{' '.join(cmd)}\n\n" \ f"FFMPEG STDERR:\n{process.stderr}\n\n" \ f"FFMPEG STDOUT:\n{process.stdout}" # Raise our own exception with the detailed output. raise subprocess.CalledProcessError(process.returncode, cmd, output=full_output) except subprocess.CalledProcessError as e: # Catch the exception and raise a user-friendly Gradio error. raise gr.Error(f"FFMPEG failed!\n\nDetails:\n{e.output}") except FileNotFoundError: raise gr.Error("FFMPEG not found. Please ensure ffmpeg is installed and in your system's PATH.") def batch_image_processor(files, processing_function, job_name, progress, **kwargs): if not files: raise gr.Error("Please upload at least one image.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) output_paths = [] for file_obj in progress.tqdm(files, desc=f"Processing batch for {job_name}"): try: base, _ = os.path.splitext(os.path.basename(file_obj.name)) if job_name == "zoom_videos": output_filename = f"{base}.mp4" elif job_name == "bg_removed": output_filename = f"{base}.png" else: output_filename = os.path.basename(file_obj.name) output_path = os.path.join(job_temp_dir, output_filename) processing_function(input_path=file_obj.name, output_path=output_path, **kwargs) output_paths.append(output_path) except Exception as e: print(f"Skipping file {file_obj.name} due to error: {e}") continue if not output_paths: shutil.rmtree(job_temp_dir) raise gr.Error("No images could be processed from the batch.") zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}") zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir) return output_paths, zip_path, job_temp_dir def process_batch_images_with_detector(files, detector_name, progress=gr.Progress(track_tqdm=True)): detector = get_detector(detector_name) def apply_detector(input_path, output_path, **kwargs): with Image.open(input_path).convert("RGB") as img: processed = detector(img, detect_resolution=512, image_resolution=1024) processed.save(output_path) output_paths, zip_path, _ = batch_image_processor(files, apply_detector, f"controlnet_{detector_name}", progress) return output_paths, zip_path def process_video_with_detector(video_path, detector_name, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") detector = get_detector(detector_name) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"job_{timestamp}") input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames") os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True) output_video_path = os.path.join(TEMP_DIR, f"{detector_name.lower()}_output_{timestamp}.mp4") cap = cv2.VideoCapture(video_path) frame_count, frame_rate = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path) for i in progress.tqdm(range(frame_count), desc="Extracting Frames"): success, frame = cap.read() if not success: break cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame) cap.release() input_files = sorted(os.listdir(input_frames_dir)) for filename in progress.tqdm(input_files, desc=f"Applying {detector_name}"): with Image.open(os.path.join(input_frames_dir, filename)).convert("RGB") as image: result_pil = detector(image, detect_resolution=512, image_resolution=1024) result_np = cv2.cvtColor(np.array(result_pil), cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(output_frames_dir, filename), result_np) cmd = ["ffmpeg", "-framerate", str(frame_rate), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Compiling Video") shutil.rmtree(job_temp_dir) return output_video_path def extract_first_last_frame(video_path): if not video_path: raise gr.Error("Please upload a video first.") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise gr.Error("Failed to open video file.") frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if frame_count < 1: cap.release() raise gr.Error("Video has no frames.") if frame_count < 2: success, frame_img = cap.read() cap.release() if not success: raise gr.Error("Could not read the only frame.") frame_rgb = cv2.cvtColor(frame_img, cv2.COLOR_BGR2RGB) return [frame_rgb, frame_rgb.copy()] success, first_frame_img = cap.read() if not success: raise gr.Error("Could not read the first frame.") cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count - 1) success, last_frame_img = cap.read() if not success: raise gr.Error("Could not read the last frame.") cap.release() return [cv2.cvtColor(first_frame_img, cv2.COLOR_BGR2RGB), cv2.cvtColor(last_frame_img, cv2.COLOR_BGR2RGB)] def video_to_frames_extractor(video_path, skip_rate, rotation, do_resize, out_w, out_h, out_format, jpg_quality, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.") cap = cv2.VideoCapture(video_path) if not cap.isOpened(): raise gr.Error("Failed to open video file.") frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if frame_count < 1: cap.release(); raise gr.Error("Video appears to have no frames.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"v2f_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) frame_paths = [] saved_count = 0 for i in progress.tqdm(range(frame_count), desc="Extracting Frames"): success, frame = cap.read() if not success: break if i % skip_rate != 0: continue frame = rotate_image(frame, rotation) if do_resize: frame = cv2.resize(frame, (out_w, out_h), interpolation=cv2.INTER_LANCZOS4) frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) file_ext = out_format.lower() frame_path = os.path.join(job_temp_dir, f"frame_{saved_count:05d}.{file_ext}") if out_format == "JPG": frame_pil.save(frame_path, quality=jpg_quality) else: frame_pil.save(frame_path) frame_paths.append(frame_path) saved_count += 1 cap.release() if not frame_paths: shutil.rmtree(job_temp_dir); raise gr.Error("Could not extract any frames.") zip_base_name = os.path.join(TEMP_DIR, f"frames_archive_{timestamp}") zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir) return frame_paths[:100], zip_path def create_video_from_frames(files, fps, rotation, do_resize, out_w, out_h, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload frame images first.") if do_resize and (out_w <= 0 or out_h <= 0): raise gr.Error("If resizing, width and height must be positive.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"f2v_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) filenames = [] for i, file in enumerate(progress.tqdm(files, desc="Preparing Frames")): ext = os.path.splitext(file.name)[1] temp_path = os.path.join(job_temp_dir, f"frame_{i:05d}{ext}") shutil.copy(file.name, temp_path); filenames.append(temp_path) output_video_path = os.path.join(TEMP_DIR, f"video_from_frames_{timestamp}.mp4") first_frame_img = rotate_image(cv2.imread(filenames[0]), rotation) h, w, _ = first_frame_img.shape if do_resize: w, h = out_w, out_h w -= w % 2; h -= h % 2 temp_processed_dir = os.path.join(job_temp_dir, "processed"); os.makedirs(temp_processed_dir, exist_ok=True) for i, filename in enumerate(progress.tqdm(filenames, desc="Processing Frames for Video")): frame = rotate_image(cv2.imread(filename), rotation) frame = cv2.resize(frame, (w, h), interpolation=cv2.INTER_LANCZOS4) cv2.imwrite(os.path.join(temp_processed_dir, f"pframe_{i:05d}.png"), frame) cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(temp_processed_dir, "pframe_%05d.png"), "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Compiling Video") shutil.rmtree(job_temp_dir) return output_video_path def image_to_looping_video(image_array, duration, audio_path=None): if image_array is None: raise gr.Error("Please upload an image first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") temp_image_path = os.path.join(TEMP_DIR, f"temp_image_{timestamp}.png") output_video_path = os.path.join(TEMP_DIR, f"looping_video_{timestamp}.mp4") img = Image.fromarray(image_array) img.save(temp_image_path) width, height = img.size width -= width % 2; height -= height % 2 cmd = ["ffmpeg", "-loop", "1", "-i", temp_image_path] if audio_path: cmd.extend(["-i", audio_path, "-c:a", "aac", "-shortest"]) cmd.extend(["-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-vf", f"scale={width}:{height}", "-y", output_video_path]) run_ffmpeg_command(cmd, "Creating Looping Video...") os.remove(temp_image_path) return output_video_path def create_zoom_videos(files, duration, zoom_ratio, zoom_direction, combine_videos, audio_path=None, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload at least one image.") fps = 30 total_frames = int(duration * fps) zoom_step = (zoom_ratio - 1.0) / total_frames zoom_coords = { "Center": "x=iw/2-(iw/zoom)/2:y=ih/2-(ih/zoom)/2", "Top": "x=iw/2-(iw/zoom)/2:y=0", "Bottom": "x=iw/2-(iw/zoom)/2:y=ih-(ih/zoom)", "Left": "x=0:y=ih/2-(ih/zoom)/2", "Right": "x=iw-(iw/zoom):y=ih/2-(ih/zoom)/2", "Top-Left": "x=0:y=0", "Top-Right": "x=iw-(iw/zoom):y=0", "Bottom-Left": "x=0:y=ih-(ih/zoom)", "Bottom-Right": "x=iw-(iw/zoom):y=ih-(ih/zoom)", } def process_single_image(input_path, output_path, **kwargs): audio_for_clip = kwargs.get('audio_for_clip') zoom_filter = (f"scale=3840:-1,zoompan=z='min(zoom+{zoom_step},{zoom_ratio})':{zoom_coords[zoom_direction]}:d={total_frames}:s=1920x1080:fps={fps}") cmd = ["ffmpeg", "-loop", "1", "-i", input_path] if audio_for_clip: cmd.extend(["-i", audio_for_clip, "-c:a", "aac", "-shortest"]) cmd.extend(["-vf", zoom_filter, "-c:v", "libx264", "-t", str(duration), "-pix_fmt", "yuv420p", "-b:v", "5M", "-y", output_path]) run_ffmpeg_command(cmd, f"Creating zoom video for {os.path.basename(input_path)}") batch_kwargs = {} if not combine_videos and audio_path: batch_kwargs['audio_for_clip'] = audio_path video_paths, zip_path, job_temp_dir = batch_image_processor(files, process_single_image, "zoom_videos", progress, **batch_kwargs) if not combine_videos: return video_paths, None, zip_path if not video_paths: raise gr.Error("No videos were created to be combined.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") silent_combined_path = os.path.join(job_temp_dir, f"combined_silent_{timestamp}.mp4") if len(video_paths) > 1: file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: for path in video_paths: f.write(f"file '{os.path.abspath(path)}'\n") run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_combined_path], "Combining Videos") else: shutil.copy(video_paths[0], silent_combined_path) if audio_path: final_video_path = os.path.join(TEMP_DIR, f"combined_audio_{timestamp}.mp4") run_ffmpeg_command(["ffmpeg", "-i", silent_combined_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", final_video_path], "Adding audio...") else: final_video_path = os.path.join(TEMP_DIR, f"combined_final_{timestamp}.mp4") shutil.move(silent_combined_path, final_video_path) return None, final_video_path, zip_path def change_video_speed(video_path, speed_multiplier): if not video_path: raise gr.Error("Please upload a video first.") if speed_multiplier <= 0: raise gr.Error("Speed multiplier must be positive.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"speed_change_{timestamp}.mp4") pts_value = 1 / speed_multiplier cmd = ["ffmpeg", "-i", video_path, "-filter:v", f"setpts={pts_value}*PTS", "-an", "-y", output_video_path] run_ffmpeg_command(cmd, "Changing Video Speed") return output_video_path def reverse_video(video_path, audio_option): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"reversed_video_{timestamp}.mp4") filters = ["reverse"] if audio_option == "Reverse Audio": filters.append("areverse") cmd = ["ffmpeg", "-i", video_path, "-vf", filters[0]] if len(filters) > 1: cmd.extend(["-af", filters[1]]) if audio_option == "Remove Audio": cmd.append("-an") cmd.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path]) run_ffmpeg_command(cmd, "Reversing video...") return output_video_path def add_audio_to_video(video_path, audio_path): if not video_path: raise gr.Error("Please upload a video.") if not audio_path: raise gr.Error("Please upload an audio file.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"video_with_audio_{timestamp}.mp4") cmd = ["ffmpeg", "-i", video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", output_video_path] run_ffmpeg_command(cmd, "Adding Audio to Video") return output_video_path def extract_audio(video_path, audio_format="mp3", progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_audio_path = os.path.join(TEMP_DIR, f"extracted_audio_{timestamp}.{audio_format}") cmd = ["ffmpeg", "-i", video_path, "-vn"] # -vn strips video if audio_format == "mp3": cmd.extend(["-c:a", "libmp3lame", "-q:a", "2"]) # VBR quality elif audio_format == "aac": cmd.extend(["-c:a", "aac", "-b:a", "192k"]) elif audio_format == "wav": cmd.extend(["-c:a", "pcm_s16le"]) cmd.extend(["-y", output_audio_path]) run_ffmpeg_command(cmd, "Extracting audio...") return output_audio_path def create_gif_from_video(video_path, start_time, end_time, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_gif_path, palette_path = os.path.join(TEMP_DIR, f"video_to_gif_{timestamp}.gif"), os.path.join(TEMP_DIR, f"palette_{timestamp}.png") duration_filter = [] if start_time > 0 or end_time > 0: if end_time > 0 and end_time <= start_time: raise gr.Error("End time must be after start time.") if start_time > 0: duration_filter.extend(["-ss", str(start_time)]) if end_time > 0: duration_filter.extend(["-to", str(end_time)]) progress(0, desc="Generating Color Palette"); run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-vf", "fps=15,scale=480:-1:flags=lanczos,palettegen", "-y", palette_path]) progress(0.5, desc="Creating GIF"); run_ffmpeg_command(["ffmpeg", "-i", video_path] + duration_filter + ["-i", palette_path, "-filter_complex", "fps=15,scale=480:-1:flags=lanczos[x];[x][1:v]paletteuse", "-y", output_gif_path]) progress(1, desc="Done"); os.remove(palette_path) return output_gif_path def get_frame_at_time(video_path, time_in_seconds=0): if not video_path: return None try: command = ['ffmpeg', '-ss', str(time_in_seconds), '-i', video_path, '-vframes', '1', '-f', 'image2pipe', '-c:v', 'png', '-'] pipe = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) return Image.open(io.BytesIO(pipe.stdout)).convert("RGB") except Exception as e: print(f"Error extracting frame for crop preview: {e}") cap = cv2.VideoCapture(video_path); cap.set(cv2.CAP_PROP_POS_MSEC, time_in_seconds * 1000) success, frame = cap.read(); cap.release() if success: return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) return None def crop_video(video_path, x, y, w, h, do_resize, out_w, out_h): if not video_path: raise gr.Error("Please upload a video first.") w, h, x, y = int(w), int(h), int(x), int(y) w -= w % 2; h -= h % 2 if w <= 0 or h <= 0: raise gr.Error("Crop dimensions must be positive.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"cropped_video_{timestamp}.mp4") vf_filters = [f"crop={w}:{h}:{x}:{y}"] if do_resize: if out_w <= 0 or out_h <= 0: raise gr.Error("Resize dimensions must be positive.") out_w, out_h = int(out_w), int(out_h) out_w -= out_w % 2; out_h -= out_h % 2 vf_filters.append(f"scale={out_w}:{out_h}") cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(vf_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Cropping video...") return output_video_path def trim_video(video_path, start_time, end_time): if not video_path: raise gr.Error("Please upload a video first.") if start_time < 0: start_time = 0 if end_time <= start_time: end_time = 0 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"trimmed_video_{timestamp}.mp4") cmd = ["ffmpeg", "-i", video_path, "-ss", str(start_time)] if end_time > 0: cmd.extend(["-to", str(end_time)]) cmd.extend(["-c:v", "libx264", "-c:a", "copy", "-pix_fmt", "yuv420p", "-y", output_video_path]) run_ffmpeg_command(cmd, "Trimming Video") return output_video_path def apply_video_watermark(video_path, text, position, opacity, size_scale, color): if not video_path: raise gr.Error("Please upload a video first.") if not text: raise gr.Error("Watermark text cannot be empty.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"watermarked_video_{timestamp}.mp4") _ , video_h = get_video_dimensions(video_path) if video_h == 0: video_h = 720 # Fallback escaped_text = text.replace("'", r"'\''").replace(":", r"\:").replace(",", r"\,") pos_map = {"Top-Left": "x=20:y=20", "Top-Right": "x=w-tw-20:y=20", "Bottom-Left": "x=20:y=h-th-20", "Bottom-Right": "x=w-tw-20:y=h-th-20", "Center": "x=(w-tw)/2:y=(h-th)/2"} font_opacity = opacity / 100.0 font_size = int(video_h / (50 - (size_scale * 3.5))) # Cleaned up filter. The pre-calculation of fontsize is the most stable method. drawtext_filter = ( f"drawtext=" f"text='{escaped_text}':" f"{pos_map[position]}:" f"fontsize={font_size}:" f"fontcolor={color}@{font_opacity}" ) cmd = [ "ffmpeg", "-i", video_path, "-vf", drawtext_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path ] run_ffmpeg_command(cmd, "Applying text watermark...") return output_video_path def remove_video_background(video_path, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video first.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"bg_rem_job_{timestamp}"); input_frames_dir, output_frames_dir = os.path.join(job_temp_dir, "input_frames"), os.path.join(job_temp_dir, "output_frames") os.makedirs(input_frames_dir, exist_ok=True); os.makedirs(output_frames_dir, exist_ok=True) cap = cv2.VideoCapture(video_path); frame_count, fps = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), get_video_fps(video_path) for i in progress.tqdm(range(frame_count), desc="Step 1: Extracting Frames"): success, frame = cap.read() if not success: break cv2.imwrite(os.path.join(input_frames_dir, f"frame_{i:05d}.png"), frame) cap.release() for filename in progress.tqdm(sorted(os.listdir(input_frames_dir)), desc="Step 2: Removing Backgrounds"): with Image.open(os.path.join(input_frames_dir, filename)) as img: remove(img).save(os.path.join(output_frames_dir, filename)) output_video_path = os.path.join(TEMP_DIR, f"bg_removed_{timestamp}.webm") progress(0, desc="Step 3: Compiling Video") cmd = ["ffmpeg", "-framerate", str(fps), "-i", os.path.join(output_frames_dir, "frame_%05d.png"), "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p", "-auto-alt-ref", "0", "-b:v", "1M", "-y", output_video_path] run_ffmpeg_command(cmd, "Compiling transparent video...") shutil.rmtree(job_temp_dir) return output_video_path def transcribe_media(media_path, model_name, progress=gr.Progress(track_tqdm=True)): if media_path is None: raise gr.Error("Please upload a video or audio file first.") model = load_whisper_model(model_name) if model is None: raise gr.Error("Whisper model is not available.") # media_path is now a gr.File object, so we use .name audio_path = media_path.name base_name = os.path.splitext(os.path.basename(media_path.name))[0] # Check if the input is a video file to extract audio from if audio_path.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm')): progress(0, desc="Extracting audio...") audio_path_temp = os.path.join(TEMP_DIR, f"{base_name}.mp3") try: run_ffmpeg_command(["ffmpeg", "-i", audio_path, "-q:a", "0", "-map", "a", "-y", audio_path_temp]) audio_path = audio_path_temp except gr.Error as e: if "does not contain any stream" in str(e): raise gr.Error("The uploaded video has no audio track.") else: raise e progress(0.2, desc=f"Transcribing with Whisper '{model_name}' model...") result = model.transcribe(audio_path, verbose=False) def format_ts(s): h, r = divmod(s, 3600); m, s = divmod(r, 60) return f"{int(h):02}:{int(m):02}:{int(s):02},{int((s-int(s))*1000):03}" srt_path = os.path.join(TEMP_DIR, f"{base_name}.srt") vtt_path = os.path.join(TEMP_DIR, f"{base_name}.vtt") with open(srt_path, "w", encoding="utf-8") as srt_f, open(vtt_path, "w", encoding="utf-8") as vtt_f: vtt_f.write("WEBVTT\n\n") for i, seg in enumerate(result["segments"]): start, end, text = seg['start'], seg['end'], seg['text'].strip() srt_f.write(f"{i + 1}\n{format_ts(start)} --> {format_ts(end)}\n{text}\n\n") vtt_f.write(f"{format_ts(start).replace(',', '.')} --> {format_ts(end).replace(',', '.')}\n{text}\n\n") return result["text"], [srt_path, vtt_path] def transcribe_and_prep_burn(media_file, model_name, progress=gr.Progress(track_tqdm=True)): if not media_file: raise gr.Error("Please upload a file first.") is_video = media_file.name.lower().endswith(('.mp4', '.mov', '.mkv', '.avi', '.webm')) text, files = transcribe_media(media_file, model_name, progress) # Return the original video path and make the burn-in UI visible only if it was a video if is_video: return text, files, media_file.name, gr.update(visible=True) else: return text, files, None, gr.update(visible=False) def burn_subtitles(video_path, srt_file_obj, font_size_scale, font_color, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Original video path not found. Please re-transcribe.") if not srt_file_obj or not srt_file_obj[0].name: raise gr.Error("SRT file not found. Please re-transcribe.") srt_path = srt_file_obj[0].name # srt_file_obj is a list of file objects timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"subtitled_video_{timestamp}.mp4") _, video_h = get_video_dimensions(video_path) if video_h == 0: video_h = 720 # Fallback divisor = 32 - (font_size_scale * 2) calculated_font_size = int(video_h / divisor) color_bgr = font_color[5:7] + font_color[3:5] + font_color[1:3] ffmpeg_color = f"&H00{color_bgr.upper()}" # This filter requires FFMPEG to be compiled with libass. Escaping is crucial for Windows paths. escaped_srt_path = srt_path.replace('\\', '/').replace(':', '\\:') vf_filter = f"subtitles='{escaped_srt_path}':force_style='Fontsize={calculated_font_size},PrimaryColour={ffmpeg_color},BorderStyle=1,Outline=1,Shadow=0.5,MarginV=15'" cmd = ["ffmpeg", "-i", video_path, "-vf", vf_filter, "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Burning subtitles into video...") return output_video_path def remove_background_single(input_path, output_path, **kwargs): with Image.open(input_path) as img: remove(img).save(output_path) def remove_background_batch(files, progress=gr.Progress(track_tqdm=True)): output_paths, zip_path, _ = batch_image_processor(files, remove_background_single, "bg_removed", progress) return output_paths, zip_path def resize_convert_single_image(input_path, output_path, **kwargs): output_format = kwargs.get('output_format', 'JPG') quality = kwargs.get('quality', 95) enable_resize = kwargs.get('enable_resize', False) max_w = kwargs.get('max_w', 1024) max_h = kwargs.get('max_h', 1024) resize_mode = kwargs.get('resize_mode', "Fit (preserve aspect ratio)") with Image.open(input_path) as img: # Handle transparency for formats that don't support it if output_format in ['JPG', 'WEBP'] and img.mode in ['RGBA', 'P', 'LA']: img = img.convert("RGB") if enable_resize: if resize_mode == "Fit (preserve aspect ratio)": img.thumbnail((max_w, max_h), Image.Resampling.LANCZOS) else: # Stretch img = img.resize((max_w, max_h), Image.Resampling.LANCZOS) save_kwargs = {} # Pillow's format name for JPG is 'JPEG' pil_format = 'JPEG' if output_format == 'JPG' else output_format if pil_format in ['JPEG', 'WEBP']: save_kwargs['quality'] = quality img.save(output_path, pil_format, **save_kwargs) def batch_resize_convert_images(files, output_format, quality, enable_resize, max_w, max_h, resize_mode, progress=gr.Progress(track_tqdm=True)): if not files: raise gr.Error("Please upload at least one image.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_name = "resized_converted" job_temp_dir = os.path.join(TEMP_DIR, f"{job_name}_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) output_paths = [] processing_kwargs = { 'output_format': output_format, 'quality': quality, 'enable_resize': enable_resize, 'max_w': max_w, 'max_h': max_h, 'resize_mode': resize_mode } for file_obj in progress.tqdm(files, desc=f"Processing batch for {job_name}"): try: base, _ = os.path.splitext(os.path.basename(file_obj.name)) output_filename = f"{base}.{output_format.lower()}" output_path = os.path.join(job_temp_dir, output_filename) resize_convert_single_image(file_obj.name, output_path, **processing_kwargs) output_paths.append(output_path) except Exception as e: print(f"Skipping file {file_obj.name} due to error: {e}"); continue if not output_paths: shutil.rmtree(job_temp_dir); raise gr.Error("No images could be processed.") zip_base_name = os.path.join(TEMP_DIR, f"{job_name}_archive_{timestamp}") zip_path = shutil.make_archive(zip_base_name, 'zip', job_temp_dir) return output_paths[:100], zip_path def apply_watermark_single(input_path, output_path, watermark_text, position, opacity): with Image.open(input_path).convert("RGBA") as image: if not watermark_text: raise ValueError("Watermark text cannot be empty.") txt = Image.new("RGBA", image.size, (255, 255, 255, 0)) try: font = ImageFont.truetype("DejaVuSans.ttf", int(image.width / 20)) except IOError: font = ImageFont.load_default() d = ImageDraw.Draw(txt); bbox = d.textbbox((0, 0), watermark_text, font=font); w, h = bbox[2]-bbox[0], bbox[3]-bbox[1] pos_map = {"Top-Left":(10,10), "Top-Right":(image.width-w-10,10), "Bottom-Left":(10,image.height-h-10), "Bottom-Right":(image.width-w-10,image.height-h-10), "Center":((image.width-w)/2,(image.height-h)/2)} d.text(pos_map[position], watermark_text, font=font, fill=(255, 255, 255, int(255 * (opacity / 100)))) Image.alpha_composite(image, txt).convert("RGB").save(output_path) def apply_watermark_batch(files, watermark_text, position, opacity, progress=gr.Progress(track_tqdm=True)): if not watermark_text: raise gr.Error("Please provide watermark text.") processing_func = lambda input_path, output_path: apply_watermark_single( input_path, output_path, watermark_text=watermark_text, position=position, opacity=opacity ) output_paths, zip_path, _ = batch_image_processor(files, processing_func, "watermarked", progress) return output_paths, zip_path def convert_compress_video(video_path, out_format, v_codec, crf_value, scale_option, a_codec, a_bitrate, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video to convert.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_filename = f"converted_{timestamp}.{out_format.lower()}" output_path = os.path.join(TEMP_DIR, output_filename) cmd = ["ffmpeg", "-i", video_path] vf_filters = [] if scale_option != "Original": w, h = get_video_dimensions(video_path) if w > 0 and h > 0: target_h = int(scale_option.replace('p', '')) target_w = round(w * target_h / h / 2) * 2 vf_filters.append(f"scale={target_w}:{target_h}") if vf_filters: cmd.extend(["-vf", ",".join(vf_filters)]) cmd.extend(["-c:v", v_codec]) if v_codec in ["libx264", "libx265"]: cmd.extend(["-crf", str(crf_value)]) cmd.extend(["-pix_fmt", "yuv420p"]) if a_codec == "copy": cmd.extend(["-c:a", "copy"]) else: cmd.extend(["-c:a", a_codec, "-b:a", f"{a_bitrate}k"]) cmd.extend(["-y", output_path]) run_ffmpeg_command(cmd, "Converting and Compressing Video...") return output_path def apply_video_fade(video_path, fade_in_duration, fade_out_duration): if not video_path: raise gr.Error("Please upload a video.") video_duration = get_media_duration(video_path) if fade_in_duration + fade_out_duration > video_duration: raise gr.Error("The sum of fade durations cannot be greater than the video duration.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_video_path = os.path.join(TEMP_DIR, f"faded_video_{timestamp}.mp4") fade_filters = [] if fade_in_duration > 0: fade_filters.append(f"fade=t=in:st=0:d={fade_in_duration}") if fade_out_duration > 0: fade_out_start = video_duration - fade_out_duration; fade_filters.append(f"fade=t=out:st={fade_out_start}:d={fade_out_duration}") if not fade_filters: gr.Info("No fade applied."); return video_path cmd = ["ffmpeg", "-i", video_path, "-vf", ",".join(fade_filters), "-c:a", "copy", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_video_path] run_ffmpeg_command(cmd, "Applying video fade...") return output_video_path def trim_and_fade_audio(audio_path, start_time, end_time, fade_in_duration, fade_out_duration): if not audio_path: raise gr.Error("Please upload an audio file.") audio_duration = get_media_duration(audio_path) if start_time < 0: start_time = 0 if end_time <= 0 or end_time > audio_duration: end_time = audio_duration if start_time >= end_time: raise gr.Error("Start time must be less than end time.") trimmed_duration = end_time - start_time if fade_in_duration + fade_out_duration > trimmed_duration: raise gr.Error("Sum of fade durations cannot be greater than the trimmed audio duration.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_audio_path = os.path.join(TEMP_DIR, f"edited_audio_{timestamp}.mp3") af_filters = [] if fade_in_duration > 0: af_filters.append(f"afade=t=in:st=0:d={fade_in_duration}") if fade_out_duration > 0: fade_out_start = trimmed_duration - fade_out_duration; af_filters.append(f"afade=t=out:st={fade_out_start}:d={fade_out_duration}") cmd = ["ffmpeg", "-ss", str(start_time), "-to", str(end_time), "-i", audio_path] if af_filters: cmd.extend(["-af", ",".join(af_filters)]) cmd.extend(["-y", output_audio_path]) run_ffmpeg_command(cmd, "Trimming and fading audio...") return output_audio_path # --- FLUX API --- FLUX_MODELS = {"FLUX.1-schnell (Fast)": "black-forest-labs/FLUX.1-schnell", "FLUX.1-dev (High Quality)": "black-forest-labs/FLUX.1-dev"} def call_flux_api(prompt, model_choice, width, height, hf_token): if not hf_token: raise gr.Error("Hugging Face User Access Token is required.") try: client = Client(FLUX_MODELS[model_choice], hf_token=hf_token) return client.predict(prompt=prompt, seed=0, randomize_seed=True, width=width, height=height, num_inference_steps=8 if "dev" in model_choice else 4, api_name="/infer")[0] except Exception as e: raise gr.Error(f"API call failed: {e}") def get_image_as_base64(path): try: with open(path, "rb") as f: return f"data:image/png;base64,{base64.b64encode(f.read()).decode('utf-8')}" except FileNotFoundError: return None # --- Transfer Tab Functions (Simplified) --- def filter_presets(query, all_presets): """Filters the preset dropdown based on a search query.""" if not query: return gr.update(choices=sorted(list(all_presets.keys()))) filtered_keys = [key for key in all_presets.keys() if query.lower() in key.lower()] return gr.update(choices=sorted(filtered_keys)) def save_preset(presets, name, url): if not name or not name.strip(): gr.Warning("Preset name cannot be empty."); return presets, gr.update() if not url or not url.strip(): gr.Warning("Target URL cannot be empty."); return presets, gr.update() presets[name] = url gr.Info(f"Preset '{name}' saved!") return presets, gr.update(choices=sorted(list(presets.keys()))) def delete_preset(presets, name): if name in presets: del presets[name] gr.Info(f"Preset '{name}' deleted!") return presets, gr.update(choices=sorted(list(presets.keys())), value=None), "" gr.Warning(f"Preset '{name}' not found.") return presets, gr.update(), gr.update() def load_preset(presets, name): return presets.get(name, "") # --- Join/Beat-Sync/Etc Video Feature Functions --- def add_videos_to_join_list(files, current_list, progress=gr.Progress(track_tqdm=True)): if not files: return current_list session_id = f"join_session_{datetime.now().strftime('%Y%m%d_%H%M%S')}" join_session_dir = os.path.join(TEMP_DIR, session_id); os.makedirs(join_session_dir, exist_ok=True) new_list = list(current_list) for file in progress.tqdm(files, desc="Processing New Videos"): fname = os.path.basename(file.name); new_path = os.path.join(join_session_dir, fname) shutil.copy(file.name, new_path) duration = get_media_duration(new_path) if duration > 0: new_list.append({"path": new_path, "name": fname, "duration": duration}) else: gr.Warning(f"Could not process or get duration for video: {fname}. Skipping.") return new_list def update_video_queue_df(video_list): if not video_list: return gr.update(value=None) return gr.update(value=[[i + 1, v['name']] for i, v in enumerate(video_list)]) def handle_video_list_action(video_list, selected_index, action): if selected_index is None or not (0 <= selected_index < len(video_list)): gr.Warning("Please select a video from the list first.") return video_list, None index = int(selected_index) new_list = list(video_list) if action == "up" and index > 0: new_list.insert(index - 1, new_list.pop(index)) elif action == "down" and index < len(new_list) - 1: new_list.insert(index + 1, new_list.pop(index)) elif action == "remove": new_list.pop(index) return new_list, gr.update(value=None) def get_video_start_end_frames_for_preview(video_list, evt: gr.SelectData): """Universal function to extract first and last frames for a gallery preview.""" if not evt.selected: return None, -1, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) index = evt.index[0] if not (0 <= index < len(video_list)): return None, -1, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) video_path = video_list[index].get("path") preview_frames = None if video_path: try: frames = extract_first_last_frame(video_path) preview_frames = frames except Exception as e: print(f"Error generating start/end preview for {video_path}: {e}") preview_frames = None can_move_up = index > 0 can_move_down = index < len(video_list) - 1 return preview_frames, index, gr.update(interactive=can_move_up), gr.update(interactive=can_move_down), gr.update(interactive=True) def join_videos_from_list(video_data, audio_path=None, progress=gr.Progress(track_tqdm=True)): if not video_data: raise gr.Error("Please add at least one video to the queue.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"join_{timestamp}") os.makedirs(job_temp_dir, exist_ok=True) video_input_path = "" if len(video_data) > 1: progress(0.1, desc="Joining video streams...") file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: for video_info in video_data: f.write(f"file '{os.path.abspath(video_info['path'])}'\n") concatenated_video_path = os.path.join(job_temp_dir, "concatenated.mp4") run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", concatenated_video_path], "Joining Videos") video_input_path = concatenated_video_path elif len(video_data) == 1: progress(0.1, desc="Preparing single video...") video_input_path = video_data[0]['path'] if not audio_path: final_output_path = os.path.join(TEMP_DIR, f"joined_video_{timestamp}.mp4") if len(video_data) == 1: shutil.copy(video_input_path, final_output_path) else: shutil.move(video_input_path, final_output_path) if os.path.exists(job_temp_dir): shutil.rmtree(job_temp_dir) return final_output_path else: progress(0.7, desc="Adding audio track...") final_output_path = os.path.join(TEMP_DIR, f"joined_video_with_audio_{timestamp}.mp4") cmd = [ "ffmpeg", "-i", video_input_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-map", "0:v:0", "-map", "1:a:0", "-shortest", "-y", final_output_path ] run_ffmpeg_command(cmd, "Adding Audio to Joined Video") if os.path.exists(job_temp_dir): shutil.rmtree(job_temp_dir) return final_output_path def ping_pong_video(video_path, audio_option, progress=gr.Progress(track_tqdm=True)): if not video_path: raise gr.Error("Please upload a video.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") job_temp_dir = os.path.join(TEMP_DIR, f"pingpong_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) progress(0.2, desc="Reversing video...") reversed_video_path = os.path.join(job_temp_dir, "reversed_temp.mp4") cmd_reverse = ["ffmpeg", "-i", video_path, "-vf", "reverse"] if audio_option == "Reverse Audio": cmd_reverse.extend(["-af", "areverse"]) else: cmd_reverse.append("-an") cmd_reverse.extend(["-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", reversed_video_path]) run_ffmpeg_command(cmd_reverse) progress(0.6, desc="Joining videos...") file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: f.write(f"file '{os.path.abspath(video_path)}'\n") f.write(f"file '{os.path.abspath(reversed_video_path)}'\n") output_video_path = os.path.join(TEMP_DIR, f"pingpong_video_{timestamp}.mp4") cmd_join = ["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", output_video_path] if audio_option == "Original Audio Only": cmd_join = ["ffmpeg", "-i", video_path, "-i", reversed_video_path, "-filter_complex", "[0:v][1:v]concat=n=2:v=1[v]", "-map", "[v]", "-map", "0:a?", "-c:a", "copy", "-y", output_video_path] run_ffmpeg_command(cmd_join) shutil.rmtree(job_temp_dir) return output_video_path def create_beat_sync_video(video_data, audio_path, rhythm_source, beat_sensitivity, cuts_per_measure, min_clip_duration, loop_videos, slicing_method, max_slowdown_clip_duration, progress=gr.Progress(track_tqdm=True)): if not video_data: raise gr.Error("Please upload at least one video.") if not audio_path: raise gr.Error("Please upload a music track.") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S"); job_temp_dir = os.path.join(TEMP_DIR, f"beatsync_{timestamp}"); os.makedirs(job_temp_dir, exist_ok=True) progress(0, desc="Step 1: Analyzing audio..."); beat_times = [] try: y, sr = librosa.load(audio_path) if rhythm_source == "Detect Beats (dynamic)": _, beat_frames = librosa.beat.beat_track(y=y, sr=sr, tightness=beat_sensitivity); beat_times = librosa.frames_to_time(beat_frames, sr=sr) elif rhythm_source == "Generate Rhythmic Grid (BPM-based)": tempo, _ = librosa.beat.beat_track(y=y, sr=sr); audio_duration = librosa.get_duration(y=y, sr=sr) if tempo == 0: raise ValueError("Could not determine BPM.") cut_interval = (60.0 / tempo) * 4 / cuts_per_measure beat_times = list(np.arange(0, audio_duration, cut_interval)) except Exception as e: raise gr.Error(f"Failed to analyze audio: {e}") if len(beat_times) < 2: raise gr.Error("Could not determine enough rhythm points.") progress(0.2, desc="Step 2: Refining intervals..."); intervals = [] if beat_times[0] > min_clip_duration: intervals.append((0.0, beat_times[0])) for i in range(len(beat_times) - 1): start_beat, end_beat = beat_times[i], beat_times[i+1]; duration = end_beat - start_beat if duration >= min_clip_duration: intervals.append((start_beat, end_beat)) if not intervals: raise gr.Error("No beat intervals found meeting minimum duration.") progress(0.3, desc="Step 3: Slicing video clips..."); clip_paths = []; video_idx = 0; current_video_time = 0.0 for i, (start_beat, end_beat) in enumerate(progress.tqdm(intervals, desc="Slicing video clips")): target_clip_duration = end_beat - start_beat; found_clip = False for _ in range(len(video_data)): video_info = video_data[video_idx]; input_video_path = video_info['path']; output_clip_path = os.path.join(job_temp_dir, f"clip_{i:05d}.mp4") if slicing_method == "Cut to Fit": if (video_info['duration'] - current_video_time) >= target_clip_duration: run_ffmpeg_command(["ffmpeg", "-ss", str(current_video_time), "-i", input_video_path, "-t", str(target_clip_duration), "-c", "copy", "-an", "-y", output_clip_path]) clip_paths.append(output_clip_path); current_video_time += target_clip_duration; found_clip = True; break elif slicing_method == "Slowdown to Fit": original_clip_duration = min(target_clip_duration, max_slowdown_clip_duration) if (video_info['duration'] - current_video_time) >= original_clip_duration: speed_multiplier = original_clip_duration / target_clip_duration run_ffmpeg_command(["ffmpeg", "-ss", str(current_video_time), "-i", input_video_path, "-t", str(original_clip_duration), "-vf", f"setpts={1/speed_multiplier:.4f}*PTS", "-an", "-c:v", "libx264", "-pix_fmt", "yuv420p", "-y", output_clip_path]) clip_paths.append(output_clip_path); current_video_time += original_clip_duration; found_clip = True; break video_idx = (video_idx + 1) % len(video_data); current_video_time = 0.0 if loop_videos == "End when videos run out" and video_idx == 0: break if not found_clip: gr.Warning("Ran out of video footage."); break if not clip_paths: raise gr.Error("Failed to create any video clips.") progress(0.7, desc="Step 4: Joining clips..."); file_list_path = os.path.join(job_temp_dir, "files.txt") with open(file_list_path, 'w', encoding='utf-8') as f: for path in clip_paths: f.write(f"file '{os.path.abspath(path)}'\n") silent_video_path = os.path.join(job_temp_dir, "silent_final.mp4") run_ffmpeg_command(["ffmpeg", "-f", "concat", "-safe", "0", "-i", file_list_path, "-c", "copy", "-y", silent_video_path]) progress(0.9, desc="Step 5: Adding music..."); output_video_path = os.path.join(TEMP_DIR, f"beatsynced_video_{timestamp}.mp4") run_ffmpeg_command(["ffmpeg", "-i", silent_video_path, "-i", audio_path, "-c:v", "copy", "-c:a", "aac", "-shortest", "-y", output_video_path]) shutil.rmtree(job_temp_dir) return output_video_path # --- CSS and JS --- footer_css = """ #custom-footer { text-align: center !important; padding: 20px 0 5px 0 !important; font-size: .9em; color: #a0aec0; } """ jkl_video_control_js = """()=>{document.addEventListener("keydown",e=>{const t=document.activeElement;if(t&&("INPUT"===t.tagName||"TEXTAREA"===t.tagName))return;const n=document.querySelector("#video-trim-input video");if(!n)return;const o=document.querySelector("#video-trim-fps input"),a=o?parseFloat(o.value):24,i=1/a;let r=!1;switch(e.key.toLowerCase()){case"k":n.paused?n.play():n.pause(),r=!0;break;case"j":n.currentTime=Math.max(0,n.currentTime-i),r=!0;break;case"l":n.currentTime+=i,r=!0}r&&e.preventDefault()})}""" with gr.Blocks( theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), title="Skriptz - Universal Tool", css=footer_css, js=jkl_video_control_js ) as demo: logo_b64 = get_image_as_base64("logo.png") if logo_b64: gr.HTML(f"""
Skriptz Banner
""") else: gr.Markdown("# Skriptz Universal Tool") gr.Markdown("

Your one-stop shop for video and image processing

") with gr.Tabs(): with gr.TabItem("Image Utilities"): gr.Markdown("## Tools for processing and generating single images.") with gr.Tabs(): with gr.TabItem("Manipulate Image"): gr.Markdown("### Simple Image Manipulation") gr.Info("Apply a single transformation like inverting colors, flipping, or rotating.") with gr.Row(): with gr.Column(): manip_input_image = gr.Image(type="numpy", label="Input Image") manip_operation_radio = gr.Radio( ["Invert Colors", "Flip Horizontal", "Flip Vertical", "Rotate 90° Right", "Rotate 90° Left"], label="Select Operation", value="Invert Colors" ) manip_apply_btn = gr.Button("✨ Apply Manipulation", variant="primary") with gr.Column(): manip_output_image = gr.Image(label="Output Image", interactive=True) manip_apply_btn.click(fn=manipulate_image, inputs=[manip_input_image, manip_operation_radio], outputs=manip_output_image) with gr.TabItem("Image to Looping Video"): gr.Markdown("### Create a short, looping video from a single static image.") with gr.Row(): with gr.Column(): input_image_i2v = gr.Image(type="numpy", label="Input Image") duration_slider_i2v = gr.Slider(1, 30, 5, step=0.1, label="Duration (s)") input_audio_i2v = gr.Audio(label="Add Music (Optional)", type="filepath") compile_i2v_btn = gr.Button("📹 Create Looping Video", variant="primary") with gr.Column(): output_video_i2v = gr.Video(label="Output Looping Video", interactive=True, show_download_button=True) compile_i2v_btn.click(image_to_looping_video, [input_image_i2v, duration_slider_i2v, input_audio_i2v], output_video_i2v) with gr.TabItem("Image to Zoom Video"): gr.Markdown("### Create a 'Ken Burns' style zoom/pan video from an image.") gr.Info("Upload one or more images. The output will be a gallery of videos, or a single combined video if you check the box.") with gr.Row(): with gr.Column(): i2zv_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"]) i2zv_duration = gr.Slider(1, 30, 5, step=0.5, label="Video Duration (s) per Image") i2zv_zoom_ratio = gr.Slider(1.0, 2.0, 1.25, step=0.05, label="Zoom Ratio") i2zv_zoom_dir = gr.Dropdown( ["Center", "Top", "Bottom", "Left", "Right", "Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right"], value="Center", label="Zoom Direction" ) i2zv_combine = gr.Checkbox(label="Combine all videos into one", value=False) i2zv_audio = gr.Audio(label="Add Music (Optional)", type="filepath") i2zv_btn = gr.Button("🔎 Create Zoom Video(s)", variant="primary") with gr.Column(): i2zv_output_gallery = gr.Gallery(label="Output Video Previews", columns=2, object_fit="contain", visible=True) i2zv_output_video = gr.Video(label="Combined Output Video", interactive=True, show_download_button=True, visible=False) i2zv_output_zip = gr.File(label="Download All as .zip", interactive=False) i2zv_combine.change( fn=lambda x: [gr.update(visible=not x), gr.update(visible=x)], inputs=i2zv_combine, outputs=[i2zv_output_gallery, i2zv_output_video] ) i2zv_btn.click( fn=create_zoom_videos, inputs=[i2zv_input_images, i2zv_duration, i2zv_zoom_ratio, i2zv_zoom_dir, i2zv_combine, i2zv_audio], outputs=[i2zv_output_gallery, i2zv_output_video, i2zv_output_zip] ) with gr.TabItem("Batch Background Remover"): gr.Markdown("### Remove the background from a batch of images.") with gr.Row(): with gr.Column(): input_images_bg = gr.File(label="Upload Images", file_count="multiple", file_types=["image"]) remove_bg_btn = gr.Button("✂️ Remove Backgrounds", variant="primary") with gr.Column(): output_gallery_bg = gr.Gallery(label="Images with Transparent Background", show_label=True, columns=4, object_fit="contain", height="auto") output_zip_bg = gr.File(label="Download All as .zip", interactive=False) remove_bg_btn.click(remove_background_batch, input_images_bg, [output_gallery_bg, output_zip_bg]) with gr.TabItem("Batch Watermarker"): gr.Markdown("### Apply a text watermark to a batch of images.") with gr.Row(): with gr.Column(): input_images_wm = gr.File(label="Upload Images", file_count="multiple", file_types=["image"]) watermark_text = gr.Textbox(label="Watermark Text", placeholder="(c) My Awesome Project") watermark_pos = gr.Radio(["Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right", "Center"], value="Bottom-Right", label="Position") watermark_opacity = gr.Slider(0, 100, 50, step=1, label="Opacity (%)") watermark_btn = gr.Button("🖋️ Apply Watermarks", variant="primary") with gr.Column(): output_gallery_wm = gr.Gallery(label="Watermarked Images", show_label=True, columns=4, object_fit="contain", height="auto") output_zip_wm = gr.File(label="Download All as .zip", interactive=False) watermark_btn.click(apply_watermark_batch, [input_images_wm, watermark_text, watermark_pos, watermark_opacity], [output_gallery_wm, output_zip_wm]) with gr.TabItem("Batch Resizer & Converter"): gr.Markdown("### Convert, resize, and compress a batch of images.") with gr.Row(): with gr.Column(): brc_input_images = gr.File(label="Upload Images", file_count="multiple", file_types=["image"]) with gr.Accordion("⚙️ Output Settings", open=True): brc_format = gr.Dropdown(["JPG", "PNG", "WEBP"], value="JPG", label="Output Format") brc_quality = gr.Slider(1, 100, 90, step=1, label="JPG/WEBP Quality", interactive=True) brc_enable_resize = gr.Checkbox(label="Enable Resizing", value=False) with gr.Row(): brc_max_w = gr.Number(label="Max Width", value=1920, interactive=False) brc_max_h = gr.Number(label="Max Height", value=1080, interactive=False) brc_resize_mode = gr.Radio(["Fit (preserve aspect ratio)", "Stretch to Fit"], value="Fit (preserve aspect ratio)", label="Resize Mode", interactive=False) brc_btn = gr.Button("✨ Process Images", variant="primary") with gr.Column(): brc_output_gallery = gr.Gallery(label="Processed Images Preview", show_label=True, columns=4, object_fit="contain", height="auto") brc_output_zip = gr.File(label="Download All as .zip", interactive=False) brc_format.change(lambda f: gr.update(visible=f in ["JPG", "WEBP"]), brc_format, brc_quality) brc_enable_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x), gr.update(interactive=x)], brc_enable_resize, [brc_max_w, brc_max_h, brc_resize_mode]) brc_btn.click( batch_resize_convert_images, [brc_input_images, brc_format, brc_quality, brc_enable_resize, brc_max_w, brc_max_h, brc_resize_mode], [brc_output_gallery, brc_output_zip] ) with gr.TabItem("FLUX.1 API Caller (Experimental)"): gr.Markdown("### Generate an image using `FLUX.1` models via Gradio Client.") gr.Info("Requires a Hugging Face User Access Token.") with gr.Row(): with gr.Column(): hf_token_input = gr.Textbox(label="HF Token", type="password", placeholder="Enter hf_... token") flux_model_dropdown = gr.Dropdown(list(FLUX_MODELS.keys()), value="FLUX.1-schnell (Fast)", label="Select FLUX Model") prompt_input_flux = gr.Textbox(label="Prompt", lines=3, placeholder="A cinematic photo...") with gr.Row(): flux_width_slider = gr.Slider(256, 2048, 1024, step=64, label="Width") flux_height_slider = gr.Slider(256, 2048, 1024, step=64, label="Height") flux_btn = gr.Button("🚀 Generate Image", variant="primary") with gr.Column(): output_image_flux = gr.Image(label="Generated Image", interactive=True) flux_btn.click(call_flux_api, [prompt_input_flux, flux_model_dropdown, flux_width_slider, flux_height_slider, hf_token_input], output_image_flux) with gr.TabItem("Video Utilities"): # This section remains unchanged gr.Markdown("## A collection of useful video tools.") with gr.Tabs(): with gr.TabItem("Frame Tools"): with gr.Tabs(): with gr.TabItem("Extract First & Last"): gr.Markdown("### Extract the very first and very last frames of a video.") with gr.Row(): with gr.Column(): input_video_fl = gr.Video(label="Input Video") extract_fl_btn = gr.Button("🎬 Extract Frames", variant="primary") with gr.Column(): output_gallery_fl = gr.Gallery(label="Output Frames (First, Last)", show_label=True, columns=2, object_fit="contain", height="auto") extract_fl_btn.click(fn=extract_first_last_frame, inputs=input_video_fl, outputs=output_gallery_fl) with gr.TabItem("Extract All Frames"): gr.Markdown("### Extract all individual frames from a video file.") with gr.Row(): with gr.Column(): input_video_v2f = gr.Video(label="Input Video") v2f_fps_display = gr.Textbox(label="Detected FPS", interactive=False, value="N/A") with gr.Accordion("⚙️ Advanced Options", open=False): v2f_skip_rate = gr.Slider(1, 30, 1, step=1, label="Extract Every Nth Frame") v2f_rotation = gr.Dropdown(["None", "90 Degrees Clockwise", "90 Degrees Counter-Clockwise", "180 Degrees"], value="None", label="Rotation") v2f_format = gr.Radio(["PNG", "JPG"], value="PNG", label="Output Format") v2f_jpg_quality = gr.Slider(1, 100, 95, step=1, label="JPG Quality", interactive=False) v2f_resize = gr.Checkbox(label="Resize all extracted frames", value=False) with gr.Row(): v2f_width = gr.Number(label="Output Width", value=1024, interactive=False) v2f_height = gr.Number(label="Output Height", value=576, interactive=False) extract_v2f_btn = gr.Button("🎞️ Extract All Frames", variant="primary") with gr.Column(): output_gallery_v2f = gr.Gallery(label="Extracted Frames Preview (max 100 shown)", show_label=True, columns=8, object_fit="contain", height="auto") output_zip_v2f = gr.File(label="Download All Frames (.zip)", interactive=False) input_video_v2f.upload(lambda v: f"{get_video_fps(v):.2f} FPS", input_video_v2f, v2f_fps_display) v2f_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], v2f_resize, [v2f_width, v2f_height]) v2f_format.change(lambda x: gr.update(interactive=(x=="JPG")), v2f_format, v2f_jpg_quality) extract_v2f_btn.click(video_to_frames_extractor, [input_video_v2f, v2f_skip_rate, v2f_rotation, v2f_resize, v2f_width, v2f_height, v2f_format, v2f_jpg_quality], [output_gallery_v2f, output_zip_v2f]) with gr.TabItem("Frames to Video"): gr.Markdown("### Compile a sequence of image frames into a video file.") with gr.Row(): with gr.Column(): input_frames_f2v = gr.File(label="Upload Frames", file_count="multiple", file_types=["image"]) fps_slider_f2v = gr.Slider(1, 60, 24, step=1, label="FPS") with gr.Accordion("⚙️ Advanced Options", open=False): f2v_rotation = gr.Dropdown(["None", "90 Degrees Clockwise", "90 Degrees Counter-Clockwise", "180 Degrees"], value="None", label="Rotation") f2v_resize = gr.Checkbox(label="Resize all frames", value=False) with gr.Row(): f2v_width = gr.Number(label="Output Width", value=1024, interactive=False) f2v_height = gr.Number(label="Output Height", value=576, interactive=False) compile_f2v_btn = gr.Button("📽️ Create Video", variant="primary") with gr.Column(): output_video_f2v = gr.Video(label="Compiled Video", interactive=True, show_download_button=True) f2v_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], f2v_resize, [f2v_width, f2v_height]) compile_f2v_btn.click(create_video_from_frames, [input_frames_f2v, fps_slider_f2v, f2v_rotation, f2v_resize, f2v_width, f2v_height], output_video_f2v) with gr.TabItem("Join Videos"): gr.Markdown("### Concatenate multiple video files into one.") gr.Info("Add one or more videos to the queue. You can optionally add a new audio track, which will replace any existing audio.") join_video_list_state = gr.State([]) with gr.Row(): with gr.Column(scale=2): gr.Markdown("#### Video Queue") join_video_df = gr.DataFrame(headers=["Order", "Filename"], datatype=["number", "str"], interactive=False) with gr.Row(): join_up_btn = gr.Button("⬆️ Move Up", interactive=False) join_down_btn = gr.Button("⬇️ Move Down", interactive=False) join_remove_btn = gr.Button("🗑️ Remove Selected", interactive=False) join_selected_index_state = gr.State(-1) with gr.Column(scale=1): gr.Markdown("#### Controls & Preview") input_videos_join = gr.File(label="Upload Videos to Add", file_count="multiple", file_types=["video"]) join_preview_gallery = gr.Gallery(label="Selection Preview (First & Last Frame)", columns=2, height=150, object_fit="contain", interactive=False) input_audio_join = gr.Audio(label="Add Audio Track (Optional)", type="filepath") join_btn = gr.Button("🤝 Join Videos", variant="primary") clear_join_btn = gr.Button("Clear List") output_video_join = gr.Video(label="Joined Video", interactive=True, show_download_button=True) input_videos_join.upload(add_videos_to_join_list, [input_videos_join, join_video_list_state], join_video_list_state) join_video_list_state.change(update_video_queue_df, join_video_list_state, join_video_df) join_video_df.select(get_video_start_end_frames_for_preview, [join_video_list_state], [join_preview_gallery, join_selected_index_state, join_up_btn, join_down_btn, join_remove_btn]) join_up_btn.click(handle_video_list_action, [join_video_list_state, join_selected_index_state, gr.State("up")], [join_video_list_state, join_preview_gallery]) join_down_btn.click(handle_video_list_action, [join_video_list_state, join_selected_index_state, gr.State("down")], [join_video_list_state, join_preview_gallery]) join_remove_btn.click(handle_video_list_action, [join_video_list_state, join_selected_index_state, gr.State("remove")], [join_video_list_state, join_preview_gallery]) clear_join_btn.click(lambda: ([], None, None, None), outputs=[join_video_list_state, join_video_df, join_preview_gallery, input_audio_join]) join_btn.click(join_videos_from_list, [join_video_list_state, input_audio_join], output_video_join) with gr.TabItem("Editing & Effects"): with gr.Tabs(): with gr.TabItem("Manipulate Video"): gr.Markdown("### Simple Video Manipulation") gr.Info("Apply a single transformation like inverting colors, flipping, or rotating to every frame of a video.") with gr.Row(): with gr.Column(): vmanip_input_video = gr.Video(label="Input Video") vmanip_operation_radio = gr.Radio( ["Invert Colors", "Flip Horizontal", "Flip Vertical", "Rotate 90° Right", "Rotate 90° Left"], label="Select Operation", value="Invert Colors" ) vmanip_apply_btn = gr.Button("✨ Apply Manipulation", variant="primary") with gr.Column(): vmanip_output_video = gr.Video(label="Output Video", interactive=True) vmanip_apply_btn.click(fn=manipulate_video, inputs=[vmanip_input_video, vmanip_operation_radio], outputs=vmanip_output_video) with gr.TabItem("Beat Sync Editor"): gr.Markdown("### Automatically edit video clips to the beat of a song.") gr.Info("Add videos, select to see a preview. Choose rhythm and slicing strategy.") beatsync_video_list_state = gr.State([]) with gr.Row(): with gr.Column(scale=2): gr.Markdown("#### Video Source Queue (in order)") bs_video_df = gr.DataFrame(headers=["Order", "Filename"], datatype=["number", "str"], interactive=False) with gr.Row(): bs_up_btn = gr.Button("⬆️ Move Up", interactive=False) bs_down_btn = gr.Button("⬇️ Move Down", interactive=False) bs_remove_btn = gr.Button("🗑️ Remove Selected", interactive=False) bs_selected_index_state = gr.State(-1) with gr.Column(scale=1): gr.Markdown("#### Controls, Settings & Preview") input_videos_bs = gr.File(label="Upload Videos to Add", file_count="multiple", file_types=["video"]) bs_preview_gallery = gr.Gallery(label="Selection Preview (First & Last Frame)", columns=2, height=150, object_fit="contain", interactive=False) input_audio_bs = gr.Audio(label="Upload Music Track", type="filepath") with gr.Accordion("⚙️ Advanced Sync & Slicing Settings", open=True): gr.Markdown("##### Step 1: Choose Rhythm Source") rhythm_source_bs = gr.Radio(["Detect Beats (dynamic)", "Generate Rhythmic Grid (BPM-based)"], value="Detect Beats (dynamic)", label="Rhythm Source") with gr.Group(visible=True) as beat_detect_group: beat_sensitivity_bs = gr.Slider(50, 200, 100, step=10, label="Beat Detection Sensitivity") with gr.Group(visible=False) as rhythmic_grid_group: cuts_per_measure_bs = gr.Dropdown([("1 (Whole Note)", 1), ("2 (Half Notes)", 2), ("3 (Triplets)", 3), ("4 (Quarter Notes/Beat)", 4), ("8 (Eighth Notes)", 8)], value=4, label="Cuts Per Measure") gr.Markdown("##### Step 2: Choose Slicing Strategy") slicing_method_bs = gr.Radio(["Cut to Fit", "Slowdown to Fit"], value="Cut to Fit", label="Clip Slicing Method") max_slowdown_clip_duration_bs = gr.Slider(0.2, 5.0, 1.5, step=0.1, label="Max Original Clip Duration for Slowdown (s)", visible=False) gr.Markdown("##### Step 3: General Options") min_clip_duration_bs = gr.Slider(0.1, 2.0, 0.4, step=0.05, label="Minimum Beat Interval (s)") loop_videos_bs = gr.Radio(["Loop videos", "End when videos run out"], value="Loop videos", label="If Music is Longer") with gr.Row(): bs_generate_btn = gr.Button("🎶 Generate Beat-Synced Video", variant="primary", scale=2) bs_clear_btn = gr.Button("Clear List") output_video_bs = gr.Video(label="Beat-Synced Video", interactive=True, show_download_button=True) input_videos_bs.upload(add_videos_to_join_list, [input_videos_bs, beatsync_video_list_state], beatsync_video_list_state) beatsync_video_list_state.change(update_video_queue_df, beatsync_video_list_state, bs_video_df) bs_video_df.select(get_video_start_end_frames_for_preview, [beatsync_video_list_state], [bs_preview_gallery, bs_selected_index_state, bs_up_btn, bs_down_btn, bs_remove_btn]) bs_up_btn.click(handle_video_list_action, [beatsync_video_list_state, bs_selected_index_state, gr.State("up")], [beatsync_video_list_state, bs_preview_gallery]) bs_down_btn.click(handle_video_list_action, [beatsync_video_list_state, bs_selected_index_state, gr.State("down")], [beatsync_video_list_state, bs_preview_gallery]) bs_remove_btn.click(handle_video_list_action, [beatsync_video_list_state, bs_selected_index_state, gr.State("remove")], [beatsync_video_list_state, bs_preview_gallery]) def toggle_rhythm_ui(c): return gr.update(visible=(c=="Detect Beats (dynamic)")), gr.update(visible=(c!="Detect Beats (dynamic)")) rhythm_source_bs.change(toggle_rhythm_ui, rhythm_source_bs, [beat_detect_group, rhythmic_grid_group]) slicing_method_bs.change(lambda c: gr.update(visible=(c=="Slowdown to Fit")), slicing_method_bs, max_slowdown_clip_duration_bs) bs_clear_btn.click(lambda: ([], None, None), outputs=[beatsync_video_list_state, bs_video_df, bs_preview_gallery]) bs_generate_btn.click(fn=create_beat_sync_video, inputs=[beatsync_video_list_state, input_audio_bs, rhythm_source_bs, beat_sensitivity_bs, cuts_per_measure_bs, min_clip_duration_bs, loop_videos_bs, slicing_method_bs, max_slowdown_clip_duration_bs], outputs=output_video_bs) with gr.TabItem("Ping-Pong Effect"): gr.Markdown("### Create a forward-then-reverse video loop (Boomerang).") with gr.Row(): with gr.Column(): input_video_pingpong = gr.Video(label="Input Video") audio_option_pingpong = gr.Radio(["Remove Audio", "Original Audio Only", "Reverse Audio"], value="Remove Audio", label="Audio Handling") pingpong_btn = gr.Button("🏓 Create Ping-Pong Video", variant="primary") with gr.Column(): output_video_pingpong = gr.Video(label="Ping-Pong Video", interactive=True, show_download_button=True) pingpong_btn.click(fn=ping_pong_video, inputs=[input_video_pingpong, audio_option_pingpong], outputs=output_video_pingpong) with gr.TabItem("Reverse Video"): gr.Markdown("### Reverse a video clip.") with gr.Row(): with gr.Column(): input_video_reverse = gr.Video(label="Input Video") audio_option_reverse = gr.Radio(["Remove Audio", "Reverse Audio"], value="Remove Audio", label="Audio Handling") reverse_btn = gr.Button("🔄 Reverse Video", variant="primary") with gr.Column(): output_video_reverse = gr.Video(label="Reversed Video", interactive=True, show_download_button=True) reverse_btn.click(fn=reverse_video, inputs=[input_video_reverse, audio_option_reverse], outputs=output_video_reverse) with gr.TabItem("Visual Trimmer"): gr.Markdown("### Visually trim a video. Use the player to find a frame, then set it as the start or end point.") gr.Info("Keyboard hotkeys enabled: K = Play/Pause, J = Back 1 Frame, L = Forward 1 Frame") with gr.Row(): with gr.Column(scale=2): input_video_trim = gr.Video(label="Input Video", elem_id="video-trim-input") with gr.Row(): set_start_btn = gr.Button("Set Current Frame as START") set_end_btn = gr.Button("Set Current Frame as END") trim_btn = gr.Button("✂️ Trim Video", variant="primary") with gr.Column(scale=1): gr.Markdown("#### Trim Points") start_frame_img = gr.Image(label="Start Frame", interactive=False) trim_start_time_display = gr.Textbox(label="Start Time (s)", interactive=False) end_frame_img = gr.Image(label="End Frame", interactive=False) trim_end_time_display = gr.Textbox(label="End Time (s)", interactive=False) trim_start_time = gr.Number(value=0, visible=False) trim_end_time = gr.Number(value=0, visible=False) trim_video_fps = gr.Number(value=24.0, visible=False, elem_id="video-trim-fps") with gr.Row(): output_video_trim = gr.Video(label="Trimmed Video", interactive=True, show_download_button=True) get_current_time_js = """()=>{const e=document.querySelector("#video-trim-input video");return e?e.currentTime:0}""" def get_frame_from_time_wrapper(v,t): return get_frame_at_time(v,t), f"{t:.3f}" input_video_trim.upload(fn=get_video_fps, inputs=input_video_trim, outputs=trim_video_fps) set_start_btn.click(fn=None, js=get_current_time_js, outputs=[trim_start_time]) set_end_btn.click(fn=None, js=get_current_time_js, outputs=[trim_end_time]) trim_start_time.change(fn=get_frame_from_time_wrapper, inputs=[input_video_trim, trim_start_time], outputs=[start_frame_img, trim_start_time_display]) trim_end_time.change(fn=get_frame_from_time_wrapper, inputs=[input_video_trim, trim_end_time], outputs=[end_frame_img, trim_end_time_display]) trim_btn.click(fn=trim_video, inputs=[input_video_trim, trim_start_time, trim_end_time], outputs=output_video_trim) input_video_trim.clear(fn=lambda: (None, "0.00", None, "0.00", 0, 0, 24.0), outputs=[start_frame_img, trim_start_time_display, end_frame_img, trim_end_time_display, trim_start_time, trim_end_time, trim_video_fps]) with gr.TabItem("Crop & Resize"): gr.Markdown("### Visually crop a video.") with gr.Row(): with gr.Column(scale=2): crop_input_video = gr.Video(label="Input Video") crop_preview_image = gr.Image(label="Frame Preview", interactive=False) with gr.Column(scale=1): gr.Markdown("#### Crop Settings") with gr.Row(): crop_w = gr.Number(label="Width", value=1280) crop_h = gr.Number(label="Height", value=720) with gr.Row(): crop_x = gr.Number(label="Offset X", value=0) crop_y = gr.Number(label="Offset Y", value=0) gr.Markdown("#### Options") crop_btn = gr.Button("✂️ Crop Video", variant="primary") with gr.Accordion("Optional: Resize after cropping", open=False): crop_do_resize = gr.Checkbox(label="Enable Resizing", value=False) crop_resize_w = gr.Number(label="Output Width", value=1024, interactive=False) crop_resize_h = gr.Number(label="Output Height", value=576, interactive=False) output_video_crop = gr.Video(label="Cropped Video", interactive=True, show_download_button=True) crop_input_video.upload(fn=get_frame_at_time, inputs=crop_input_video, outputs=crop_preview_image) crop_do_resize.change(lambda x: [gr.update(interactive=x), gr.update(interactive=x)], inputs=crop_do_resize, outputs=[crop_resize_w, crop_resize_h]) crop_btn.click(fn=crop_video, inputs=[crop_input_video, crop_x, crop_y, crop_w, crop_h, crop_do_resize, crop_resize_w, crop_resize_h], outputs=output_video_crop) with gr.TabItem("Change Speed"): gr.Markdown("### Create slow-motion or fast-forward videos.") with gr.Row(): with gr.Column(): input_video_speed = gr.Video(label="Input Video") speed_multiplier = gr.Slider(0.1, 10.0, 1.0, step=0.1, label="Speed Multiplier") speed_btn = gr.Button("🏃 Change Speed", variant="primary") with gr.Column(): output_video_speed = gr.Video(label="Modified Video", interactive=True, show_download_button=True) speed_btn.click(fn=change_video_speed, inputs=[input_video_speed, speed_multiplier], outputs=output_video_speed) with gr.TabItem("Effects & Overlays"): with gr.Tabs(): with gr.TabItem("Video Fader"): gr.Markdown("### Apply Fade-In and/or Fade-Out to a Video") with gr.Row(): with gr.Column(): fade_input_video = gr.Video(label="Input Video") with gr.Row(): fade_in_slider = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-In Duration (s)") fade_out_slider = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-Out Duration (s)") fade_video_btn = gr.Button("✨ Apply Fade", variant="primary") with gr.Column(): fade_output_video = gr.Video(label="Faded Video", interactive=True) fade_video_btn.click(apply_video_fade, [fade_input_video, fade_in_slider, fade_out_slider], fade_output_video) with gr.TabItem("Background Remover"): gr.Markdown("## Video Background Remover") gr.Warning("This is a very slow process. A short video can take several minutes. Output is a .webm file.") with gr.Row(): with gr.Column(): vbg_input_video = gr.Video(label="Input Video") vbg_btn = gr.Button("✂️ Remove Video Background", variant="primary") with gr.Column(): vbg_output_video = gr.Video(label="Output Video with Transparency (.webm)", interactive=True) vbg_btn.click(remove_video_background, vbg_input_video, vbg_output_video) with gr.TabItem("Text Watermark"): gr.Markdown("### Apply a text watermark to a video.") with gr.Row(): with gr.Column(): wm_input_video = gr.Video(label="Input Video") wm_text = gr.Textbox(label="Watermark Text", placeholder="(c) My Video 2025") wm_pos = gr.Radio(["Top-Left", "Top-Right", "Bottom-Left", "Bottom-Right", "Center"], value="Bottom-Right", label="Position") wm_opacity = gr.Slider(0, 100, 70, step=1, label="Opacity (%)") with gr.Accordion("Advanced Options", open=False): wm_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size") wm_color = gr.ColorPicker(value="#FFFFFF", label="Font Color") wm_btn = gr.Button("🖋️ Apply Watermark", variant="primary") with gr.Column(): wm_output_video = gr.Video(label="Watermarked Video", interactive=True) wm_btn.click(apply_video_watermark, [wm_input_video, wm_text, wm_pos, wm_opacity, wm_size, wm_color], wm_output_video) with gr.TabItem("Create GIF"): gr.Markdown("### Convert a video clip into a high-quality animated GIF.") with gr.Row(): with gr.Column(): input_video_gif = gr.Video(label="Input Video") with gr.Row(): gif_start_time = gr.Number(value=0, label="Start Time (s)") gif_end_time = gr.Number(value=0, label="End Time (s)", info="Set to 0 for full duration") gif_btn = gr.Button("🖼️ Create GIF", variant="primary") with gr.Column(): output_gif = gr.Image(label="Output GIF", interactive=True) gif_btn.click(create_gif_from_video, [input_video_gif, gif_start_time, gif_end_time], output_gif) with gr.TabItem("Audio & Transcription"): with gr.Tabs(): with gr.TabItem("Add Audio to Video"): gr.Markdown("### Combine a silent video with an audio file.") with gr.Row(): with gr.Column(): input_video_audio = gr.Video(label="Input Video") input_audio = gr.Audio(type="filepath", label="Input Audio") add_audio_btn = gr.Button("🎶 Add Audio", variant="primary") with gr.Column(): output_video_audio = gr.Video(label="Final Video with Audio", interactive=True, show_download_button=True) add_audio_btn.click(add_audio_to_video, [input_video_audio, input_audio], output_video_audio) with gr.TabItem("Extract Audio from Video"): gr.Markdown("### Strip the audio track from a video file.") with gr.Row(): with gr.Column(): extract_audio_input_video = gr.Video(label="Input Video") extract_audio_format = gr.Dropdown(["mp3", "wav", "aac"], value="mp3", label="Output Audio Format") extract_audio_btn = gr.Button("🎵 Extract Audio", variant="primary") with gr.Column(): extract_audio_output = gr.Audio(label="Extracted Audio", type="filepath") extract_audio_btn.click(extract_audio, [extract_audio_input_video, extract_audio_format], extract_audio_output) with gr.TabItem("Audio Trimmer & Fader"): gr.Markdown("### Trim and Apply Fades to an Audio File") gr.Info("Set start/end times to trim the clip, then apply optional fades.") with gr.Row(): with gr.Column(): audio_trim_input = gr.Audio(type="filepath", label="Input Audio") with gr.Row(): audio_start_time = gr.Number(label="Start Time (s)", value=0) audio_end_time = gr.Number(label="End Time (s)", info="Set to 0 for full duration") with gr.Row(): audio_fade_in = gr.Slider(0.0, 10.0, 0.5, step=0.1, label="Fade-In Duration (s)") audio_fade_out = gr.Slider(0.0, 10.0, 1.0, step=0.1, label="Fade-Out Duration (s)") audio_trim_fade_btn = gr.Button("✂️ Process Audio", variant="primary") with gr.Column(): audio_trim_output = gr.Audio(label="Processed Audio", type="filepath") audio_trim_fade_btn.click(trim_and_fade_audio, [audio_trim_input, audio_start_time, audio_end_time, audio_fade_in, audio_fade_out], audio_trim_output) with gr.TabItem("Transcribe Audio/Video", visible=(whisper is not None)): gr.Markdown("## Transcribe Speech and Burn Subtitles") gr.Info("Uses OpenAI's Whisper model. First run will download model files. After transcribing a video, options to burn subtitles will appear.") transcribed_video_path_state = gr.State(None) with gr.Row(): with gr.Column(): transcribe_input = gr.File(label="Upload Video or Audio File", file_types=["video", "audio"]) transcribe_model = gr.Dropdown(["tiny", "base", "small", "medium", "large"], value="base", label="Whisper Model Size") transcribe_btn = gr.Button("🎙️ Transcribe", variant="primary") with gr.Column(): transcribe_text = gr.Textbox(label="Transcription Result", lines=10, interactive=True) transcribe_files = gr.File(label="Download Subtitle Files (.srt, .vtt)", file_count="multiple", interactive=False) with gr.Accordion("🔥 Burn Subtitles onto Video", open=True, visible=False) as burn_accordion: gr.Markdown("Set styling and burn the generated subtitles into the video.") with gr.Row(): burn_font_size = gr.Slider(1, 10, 5, step=1, label="Relative Font Size") burn_font_color = gr.ColorPicker(value="#FFFFFF", label="Font Color") burn_btn = gr.Button("🔥 Burn Subtitles", variant="primary") burn_output_video = gr.Video(label="Video with Burned-in Subtitles", interactive=True) transcribe_btn.click( fn=transcribe_and_prep_burn, inputs=[transcribe_input, transcribe_model], outputs=[transcribe_text, transcribe_files, transcribed_video_path_state, burn_accordion] ) burn_btn.click( fn=burn_subtitles, inputs=[transcribed_video_path_state, transcribe_files, burn_font_size, burn_font_color], outputs=burn_output_video ) with gr.TabItem("ControlNet Tools"): gr.Markdown("## ControlNet Preprocessing") with gr.Tabs(): with gr.TabItem("Process a Video"): gr.Markdown("### Convert a Video into a ControlNet-Ready Map") with gr.Row(): with gr.Column(): input_video_cn = gr.Video(label="Input Video") detector_dropdown_cn = gr.Dropdown(choices=list(DETECTOR_CONFIG.keys()), value="Canny", label="Choose Detector") process_btn_cn = gr.Button("✨ Process Video", variant="primary") with gr.Column(): output_video_cn = gr.Video(label="Output ControlNet Video", interactive=True, show_download_button=True) process_btn_cn.click(fn=process_video_with_detector, inputs=[input_video_cn, detector_dropdown_cn], outputs=output_video_cn) with gr.TabItem("Process Batch Images"): gr.Markdown("### Generate ControlNet Maps from one or more images.") with gr.Row(): with gr.Column(): input_images_cn = gr.File(label="Upload Images or Folder", file_count="multiple", file_types=["image"]) detector_dropdown_img = gr.Dropdown(choices=list(DETECTOR_CONFIG.keys()), value="Canny", label="Choose Detector") process_btn_img = gr.Button("✨ Process Images", variant="primary") with gr.Column(): output_gallery_cn = gr.Gallery(label="Output ControlNet Images", show_label=True, columns=4, object_fit="contain", height="auto") output_zip_cn = gr.File(label="Download All as .zip", interactive=False) process_btn_img.click(fn=process_batch_images_with_detector, inputs=[input_images_cn, detector_dropdown_img], outputs=[output_gallery_cn, output_zip_cn]) with gr.TabItem("Converter & Compressor"): gr.Markdown("## Universal Video Converter & Compressor") gr.Info("Convert your video to a different format, change the codec, reduce the quality to save space, or downscale the resolution.") with gr.Row(): with gr.Column(): conv_input_video = gr.Video(label="Input Video") conv_btn = gr.Button("⚙️ Convert & Compress", variant="primary") conv_output_video = gr.Video(label="Converted Video", interactive=True, show_download_button=True) with gr.Column(): gr.Markdown("#### Output Settings") with gr.Row(): conv_format = gr.Dropdown(["mp4", "mkv", "webm", "mov"], value="mp4", label="Output Format") conv_vcodec = gr.Dropdown(["libx264", "libx265", "vp9"], value="libx264", label="Video Codec") conv_crf = gr.Slider(minimum=18, maximum=30, value=23, step=1, label="Quality (CRF)", info="Lower = higher quality/size, Higher = lower quality/size. 23 is a good default.") conv_scale = gr.Dropdown(["Original", "1080p", "720p", "480p"], value="Original", label="Downscale Resolution (optional)") gr.Markdown("##### Audio Settings") with gr.Row(): conv_acodec = gr.Dropdown(["copy", "aac", "opus"], value="copy", label="Audio Codec", info="'copy' is fastest and preserves quality.") conv_abitrate = gr.Dropdown([96, 128, 192, 256, 320], value=192, label="Audio Bitrate (kbps)", interactive=False) conv_acodec.change(lambda x: gr.update(interactive=(x != "copy")), conv_acodec, conv_abitrate) conv_btn.click(fn=convert_compress_video, inputs=[conv_input_video, conv_format, conv_vcodec, conv_crf, conv_scale, conv_acodec, conv_abitrate], outputs=conv_output_video) with gr.TabItem("Transfer"): gr.Markdown("## Image & Link Transfer Utility") gr.Info("Drop images below, manage URL presets, and open the target application in a new tab.") link_presets = gr.State(DEFAULT_LINK_PRESETS.copy()) with gr.Row(): with gr.Column(scale=1): transfer_gallery = gr.Gallery(label="Drop Images Here", height=300, columns=3, object_fit="contain") with gr.Column(scale=2): gr.Markdown("### Link Preset Management") target_url = gr.Textbox(label="Target URL", value="https://huggingface.co/spaces/bep40/FramePack_rotate_landscape", interactive=True, elem_id="transfer_target_url") search_bar = gr.Textbox(label="Search Presets", placeholder="Type to filter...", interactive=True) with gr.Row(): preset_dropdown = gr.Dropdown( label="Load Link Preset", choices=sorted(list(DEFAULT_LINK_PRESETS.keys())), interactive=True ) delete_preset_btn = gr.Button("🗑️ Delete", variant="stop") with gr.Accordion("Create a new preset", open=False): with gr.Row(): new_preset_name = gr.Textbox(label="New Preset Name", placeholder="e.g., My Favorite App") save_preset_btn = gr.Button("💾 Save") open_link_btn = gr.Button("🔗 Open in New Tab", variant="primary") search_bar.input(fn=filter_presets, inputs=[search_bar, link_presets], outputs=[preset_dropdown]) preset_dropdown.change(fn=load_preset, inputs=[link_presets, preset_dropdown], outputs=[target_url]) save_preset_btn.click( fn=save_preset, inputs=[link_presets, new_preset_name, target_url], outputs=[link_presets, preset_dropdown] ).then(lambda: ("", ""), outputs=[new_preset_name, search_bar]) delete_confirm_js = """(name) => { if (!name) { alert('Please select a preset to delete.'); return false; } return confirm(`Are you sure you want to delete the preset: '` + name + `'?`); }""" delete_preset_btn.click(fn=None, js=delete_confirm_js, inputs=[preset_dropdown]).then( fn=delete_preset, inputs=[link_presets, preset_dropdown], outputs=[link_presets, preset_dropdown, target_url] ).then(lambda: "", outputs=[search_bar]) open_link_btn.click(fn=None, js="()=>{const url=document.getElementById('transfer_target_url').querySelector('textarea').value;if(url){window.open(url,'_blank')}else{alert('Target URL is empty.')}}") gr.HTML('skylinkd production 2025 (c)', elem_id="custom-footer") if __name__ == "__main__": if os.path.exists(TEMP_DIR): try: shutil.rmtree(TEMP_DIR) except OSError as e: print(f"Error removing temp directory {TEMP_DIR}: {e}") os.makedirs(TEMP_DIR, exist_ok=True) demo.launch(inbrowser=True)