import streamlit as st import cv2 import numpy as np import torch from transformers import pipeline from PIL import Image import os import tempfile import subprocess from tqdm import tqdm import shutil # Load Depth Anything V2 model (from Hugging Face) @st.cache_resource def load_depth_model(): return pipeline("depth-estimation", model="DepthAnything/Depth-Anything-V2-Small") # Simple DIBR: Generate left/right views from image and depth def dibr_stereo(image, depth, baseline=0.1, focal_length=1.0): height, width, _ = image.shape depth = cv2.resize(depth, (width, height)) # Ensure depth matches image size depth = depth / 255.0 # Normalize depth (assuming 0-255 grayscale) # Create shift map shift = (baseline * focal_length) / (depth + 1e-6) # Avoid division by zero # Left view: Shift pixels to the right left = np.zeros_like(image) for y in range(height): for x in range(width): new_x = int(x + shift[y, x] * width / 2) # Shift scaled if 0 <= new_x < width: left[y, new_x] = image[y, x] # Right view: Shift pixels to the left right = np.zeros_like(image) for y in range(height): for x in range(width): new_x = int(x - shift[y, x] * width / 2) if 0 <= new_x < width: right[y, new_x] = image[y, x] # Basic hole filling (inpaint) mask_left = np.all(left == 0, axis=2).astype(np.uint8) mask_right = np.all(right == 0, axis=2).astype(np.uint8) left = cv2.inpaint(left, mask_left, 3, cv2.INPAINT_TELEA) right = cv2.inpaint(right, mask_right, 3, cv2.INPAINT_TELEA) return left, right # Combine left and right into SBS def combine_sbs(left, right): return np.hstack((left, right)) # Add VR 180 metadata using spatial-media def add_vr180_metadata(input_path, output_path, spatial_media_path): # Command for injecting VR180 metadata (top-bottom or SBS; here SBS) cmd = [ 'python', os.path.join(spatial_media_path, 'spatialmedia', '__main__.py'), '-i', input_path, # Input file '--stereo=side-by-side', # SBS format '--crop=0,0,0,0', # No crop '--spatial-audio=false', # No spatial audio output_path # Output file ] subprocess.run(cmd, check=True) # Main app st.title("2D to 3D VR 180 Converter") st.write("Upload a 2D video clip (e.g., from Inception) to convert it to immersive 3D VR 180.") # User input uploaded_file = st.file_uploader("Upload 2D Video", type=["mp4", "avi", "mov"]) spatial_media_path = st.text_input("Path to spatial-media folder", value="C:\\Users\\Administrator\\Downloads\\vr_converter\\spatial-media") if uploaded_file and spatial_media_path: if st.button("Convert to VR 180"): with tempfile.TemporaryDirectory() as tmpdir: # Save uploaded video input_path = os.path.join(tmpdir, "input.mp4") with open(input_path, "wb") as f: f.write(uploaded_file.read()) # Extract video info cap = cv2.VideoCapture(input_path) fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) cap.release() # Progress bar progress = st.progress(0) status = st.empty() # Process frames depth_model = load_depth_model() temp_frames_dir = os.path.join(tmpdir, "frames") os.makedirs(temp_frames_dir) sbs_video_path = os.path.join(tmpdir, "sbs_output.mp4") writer = cv2.VideoWriter(sbs_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width * 2, height)) cap = cv2.VideoCapture(input_path) for i in tqdm(range(frame_count)): ret, frame = cap.read() if not ret: break # Compute depth depth_result = depth_model(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))) depth = np.array(depth_result["depth"]).astype(np.uint8) # Grayscale depth # Generate stereo left, right = dibr_stereo(frame, depth) # Combine SBS sbs_frame = combine_sbs(left, right) # Write to video writer.write(sbs_frame) # Update progress progress.progress((i + 1) / frame_count) status.text(f"Processing frame {i+1}/{frame_count}") writer.release() cap.release() # Add VR metadata vr_output_path = os.path.join(tmpdir, "vr180_output.mp4") add_vr180_metadata(sbs_video_path, vr_output_path, spatial_media_path) # Preview st.video(vr_output_path) # Basic 2D preview (SBS) # Download with open(vr_output_path, "rb") as f: st.download_button("Download VR 180 Video", f, file_name="vr180_output.mp4") st.write("Note: For VR viewing, load into a headset. Processing may take time for long clips.")