vr-converter / app.py
rohitmeshram976's picture
Update VR 180 Converter with HF_TOKEN and spatial-media
a2fcab8
import streamlit as st
import cv2
import numpy as np
import torch
from transformers import pipeline
from PIL import Image
import os
import tempfile
import subprocess
from tqdm import tqdm
import shutil
# Load Depth Anything V2 model (from Hugging Face)
@st.cache_resource
def load_depth_model():
return pipeline("depth-estimation", model="DepthAnything/Depth-Anything-V2-Small")
# Simple DIBR: Generate left/right views from image and depth
def dibr_stereo(image, depth, baseline=0.1, focal_length=1.0):
height, width, _ = image.shape
depth = cv2.resize(depth, (width, height)) # Ensure depth matches image size
depth = depth / 255.0 # Normalize depth (assuming 0-255 grayscale)
# Create shift map
shift = (baseline * focal_length) / (depth + 1e-6) # Avoid division by zero
# Left view: Shift pixels to the right
left = np.zeros_like(image)
for y in range(height):
for x in range(width):
new_x = int(x + shift[y, x] * width / 2) # Shift scaled
if 0 <= new_x < width:
left[y, new_x] = image[y, x]
# Right view: Shift pixels to the left
right = np.zeros_like(image)
for y in range(height):
for x in range(width):
new_x = int(x - shift[y, x] * width / 2)
if 0 <= new_x < width:
right[y, new_x] = image[y, x]
# Basic hole filling (inpaint)
mask_left = np.all(left == 0, axis=2).astype(np.uint8)
mask_right = np.all(right == 0, axis=2).astype(np.uint8)
left = cv2.inpaint(left, mask_left, 3, cv2.INPAINT_TELEA)
right = cv2.inpaint(right, mask_right, 3, cv2.INPAINT_TELEA)
return left, right
# Combine left and right into SBS
def combine_sbs(left, right):
return np.hstack((left, right))
# Add VR 180 metadata using spatial-media
def add_vr180_metadata(input_path, output_path, spatial_media_path):
# Command for injecting VR180 metadata (top-bottom or SBS; here SBS)
cmd = [
'python', os.path.join(spatial_media_path, 'spatialmedia', '__main__.py'),
'-i', input_path, # Input file
'--stereo=side-by-side', # SBS format
'--crop=0,0,0,0', # No crop
'--spatial-audio=false', # No spatial audio
output_path # Output file
]
subprocess.run(cmd, check=True)
# Main app
st.title("2D to 3D VR 180 Converter")
st.write("Upload a 2D video clip (e.g., from Inception) to convert it to immersive 3D VR 180.")
# User input
uploaded_file = st.file_uploader("Upload 2D Video", type=["mp4", "avi", "mov"])
spatial_media_path = st.text_input("Path to spatial-media folder", value="C:\\Users\\Administrator\\Downloads\\vr_converter\\spatial-media")
if uploaded_file and spatial_media_path:
if st.button("Convert to VR 180"):
with tempfile.TemporaryDirectory() as tmpdir:
# Save uploaded video
input_path = os.path.join(tmpdir, "input.mp4")
with open(input_path, "wb") as f:
f.write(uploaded_file.read())
# Extract video info
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
# Progress bar
progress = st.progress(0)
status = st.empty()
# Process frames
depth_model = load_depth_model()
temp_frames_dir = os.path.join(tmpdir, "frames")
os.makedirs(temp_frames_dir)
sbs_video_path = os.path.join(tmpdir, "sbs_output.mp4")
writer = cv2.VideoWriter(sbs_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width * 2, height))
cap = cv2.VideoCapture(input_path)
for i in tqdm(range(frame_count)):
ret, frame = cap.read()
if not ret:
break
# Compute depth
depth_result = depth_model(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
depth = np.array(depth_result["depth"]).astype(np.uint8) # Grayscale depth
# Generate stereo
left, right = dibr_stereo(frame, depth)
# Combine SBS
sbs_frame = combine_sbs(left, right)
# Write to video
writer.write(sbs_frame)
# Update progress
progress.progress((i + 1) / frame_count)
status.text(f"Processing frame {i+1}/{frame_count}")
writer.release()
cap.release()
# Add VR metadata
vr_output_path = os.path.join(tmpdir, "vr180_output.mp4")
add_vr180_metadata(sbs_video_path, vr_output_path, spatial_media_path)
# Preview
st.video(vr_output_path) # Basic 2D preview (SBS)
# Download
with open(vr_output_path, "rb") as f:
st.download_button("Download VR 180 Video", f, file_name="vr180_output.mp4")
st.write("Note: For VR viewing, load into a headset. Processing may take time for long clips.")