Spaces:
Configuration error
Configuration error
File size: 5,205 Bytes
a2fcab8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import streamlit as st
import cv2
import numpy as np
import torch
from transformers import pipeline
from PIL import Image
import os
import tempfile
import subprocess
from tqdm import tqdm
import shutil
# Load Depth Anything V2 model (from Hugging Face)
@st.cache_resource
def load_depth_model():
return pipeline("depth-estimation", model="DepthAnything/Depth-Anything-V2-Small")
# Simple DIBR: Generate left/right views from image and depth
def dibr_stereo(image, depth, baseline=0.1, focal_length=1.0):
height, width, _ = image.shape
depth = cv2.resize(depth, (width, height)) # Ensure depth matches image size
depth = depth / 255.0 # Normalize depth (assuming 0-255 grayscale)
# Create shift map
shift = (baseline * focal_length) / (depth + 1e-6) # Avoid division by zero
# Left view: Shift pixels to the right
left = np.zeros_like(image)
for y in range(height):
for x in range(width):
new_x = int(x + shift[y, x] * width / 2) # Shift scaled
if 0 <= new_x < width:
left[y, new_x] = image[y, x]
# Right view: Shift pixels to the left
right = np.zeros_like(image)
for y in range(height):
for x in range(width):
new_x = int(x - shift[y, x] * width / 2)
if 0 <= new_x < width:
right[y, new_x] = image[y, x]
# Basic hole filling (inpaint)
mask_left = np.all(left == 0, axis=2).astype(np.uint8)
mask_right = np.all(right == 0, axis=2).astype(np.uint8)
left = cv2.inpaint(left, mask_left, 3, cv2.INPAINT_TELEA)
right = cv2.inpaint(right, mask_right, 3, cv2.INPAINT_TELEA)
return left, right
# Combine left and right into SBS
def combine_sbs(left, right):
return np.hstack((left, right))
# Add VR 180 metadata using spatial-media
def add_vr180_metadata(input_path, output_path, spatial_media_path):
# Command for injecting VR180 metadata (top-bottom or SBS; here SBS)
cmd = [
'python', os.path.join(spatial_media_path, 'spatialmedia', '__main__.py'),
'-i', input_path, # Input file
'--stereo=side-by-side', # SBS format
'--crop=0,0,0,0', # No crop
'--spatial-audio=false', # No spatial audio
output_path # Output file
]
subprocess.run(cmd, check=True)
# Main app
st.title("2D to 3D VR 180 Converter")
st.write("Upload a 2D video clip (e.g., from Inception) to convert it to immersive 3D VR 180.")
# User input
uploaded_file = st.file_uploader("Upload 2D Video", type=["mp4", "avi", "mov"])
spatial_media_path = st.text_input("Path to spatial-media folder", value="C:\\Users\\Administrator\\Downloads\\vr_converter\\spatial-media")
if uploaded_file and spatial_media_path:
if st.button("Convert to VR 180"):
with tempfile.TemporaryDirectory() as tmpdir:
# Save uploaded video
input_path = os.path.join(tmpdir, "input.mp4")
with open(input_path, "wb") as f:
f.write(uploaded_file.read())
# Extract video info
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
# Progress bar
progress = st.progress(0)
status = st.empty()
# Process frames
depth_model = load_depth_model()
temp_frames_dir = os.path.join(tmpdir, "frames")
os.makedirs(temp_frames_dir)
sbs_video_path = os.path.join(tmpdir, "sbs_output.mp4")
writer = cv2.VideoWriter(sbs_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width * 2, height))
cap = cv2.VideoCapture(input_path)
for i in tqdm(range(frame_count)):
ret, frame = cap.read()
if not ret:
break
# Compute depth
depth_result = depth_model(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
depth = np.array(depth_result["depth"]).astype(np.uint8) # Grayscale depth
# Generate stereo
left, right = dibr_stereo(frame, depth)
# Combine SBS
sbs_frame = combine_sbs(left, right)
# Write to video
writer.write(sbs_frame)
# Update progress
progress.progress((i + 1) / frame_count)
status.text(f"Processing frame {i+1}/{frame_count}")
writer.release()
cap.release()
# Add VR metadata
vr_output_path = os.path.join(tmpdir, "vr180_output.mp4")
add_vr180_metadata(sbs_video_path, vr_output_path, spatial_media_path)
# Preview
st.video(vr_output_path) # Basic 2D preview (SBS)
# Download
with open(vr_output_path, "rb") as f:
st.download_button("Download VR 180 Video", f, file_name="vr180_output.mp4")
st.write("Note: For VR viewing, load into a headset. Processing may take time for long clips.") |