Spaces:

rohitmeshram976
/

vr-converter

Configuration error

App Files Files Community

vr-converter / app.py

rohitmeshram976

Update VR 180 Converter with HF_TOKEN and spatial-media

a2fcab8 5 days ago

raw

history blame contribute delete

5.21 kB

	import streamlit as st
	import cv2
	import numpy as np
	import torch
	from transformers import pipeline
	from PIL import Image
	import os
	import tempfile
	import subprocess
	from tqdm import tqdm
	import shutil

	# Load Depth Anything V2 model (from Hugging Face)
	@st.cache_resource
	def load_depth_model():
	return pipeline("depth-estimation", model="DepthAnything/Depth-Anything-V2-Small")

	# Simple DIBR: Generate left/right views from image and depth
	def dibr_stereo(image, depth, baseline=0.1, focal_length=1.0):
	height, width, _ = image.shape
	depth = cv2.resize(depth, (width, height)) # Ensure depth matches image size
	depth = depth / 255.0 # Normalize depth (assuming 0-255 grayscale)

	# Create shift map
	shift = (baseline * focal_length) / (depth + 1e-6) # Avoid division by zero

	# Left view: Shift pixels to the right
	left = np.zeros_like(image)
	for y in range(height):
	for x in range(width):
	new_x = int(x + shift[y, x] * width / 2) # Shift scaled
	if 0 <= new_x < width:
	left[y, new_x] = image[y, x]

	# Right view: Shift pixels to the left
	right = np.zeros_like(image)
	for y in range(height):
	for x in range(width):
	new_x = int(x - shift[y, x] * width / 2)
	if 0 <= new_x < width:
	right[y, new_x] = image[y, x]

	# Basic hole filling (inpaint)
	mask_left = np.all(left == 0, axis=2).astype(np.uint8)
	mask_right = np.all(right == 0, axis=2).astype(np.uint8)
	left = cv2.inpaint(left, mask_left, 3, cv2.INPAINT_TELEA)
	right = cv2.inpaint(right, mask_right, 3, cv2.INPAINT_TELEA)

	return left, right

	# Combine left and right into SBS
	def combine_sbs(left, right):
	return np.hstack((left, right))

	# Add VR 180 metadata using spatial-media
	def add_vr180_metadata(input_path, output_path, spatial_media_path):
	# Command for injecting VR180 metadata (top-bottom or SBS; here SBS)
	cmd = [
	'python', os.path.join(spatial_media_path, 'spatialmedia', '__main__.py'),
	'-i', input_path, # Input file
	'--stereo=side-by-side', # SBS format
	'--crop=0,0,0,0', # No crop
	'--spatial-audio=false', # No spatial audio
	output_path # Output file
	]
	subprocess.run(cmd, check=True)

	# Main app
	st.title("2D to 3D VR 180 Converter")
	st.write("Upload a 2D video clip (e.g., from Inception) to convert it to immersive 3D VR 180.")

	# User input
	uploaded_file = st.file_uploader("Upload 2D Video", type=["mp4", "avi", "mov"])
	spatial_media_path = st.text_input("Path to spatial-media folder", value="C:\\Users\\Administrator\\Downloads\\vr_converter\\spatial-media")

	if uploaded_file and spatial_media_path:
	if st.button("Convert to VR 180"):
	with tempfile.TemporaryDirectory() as tmpdir:
	# Save uploaded video
	input_path = os.path.join(tmpdir, "input.mp4")
	with open(input_path, "wb") as f:
	f.write(uploaded_file.read())

	# Extract video info
	cap = cv2.VideoCapture(input_path)
	fps = cap.get(cv2.CAP_PROP_FPS)
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
	cap.release()

	# Progress bar
	progress = st.progress(0)
	status = st.empty()

	# Process frames
	depth_model = load_depth_model()
	temp_frames_dir = os.path.join(tmpdir, "frames")
	os.makedirs(temp_frames_dir)
	sbs_video_path = os.path.join(tmpdir, "sbs_output.mp4")
	writer = cv2.VideoWriter(sbs_video_path, cv2.VideoWriter_fourcc('mp4v'), fps, (width 2, height))

	cap = cv2.VideoCapture(input_path)
	for i in tqdm(range(frame_count)):
	ret, frame = cap.read()
	if not ret:
	break

	# Compute depth
	depth_result = depth_model(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
	depth = np.array(depth_result["depth"]).astype(np.uint8) # Grayscale depth

	# Generate stereo
	left, right = dibr_stereo(frame, depth)

	# Combine SBS
	sbs_frame = combine_sbs(left, right)

	# Write to video
	writer.write(sbs_frame)

	# Update progress
	progress.progress((i + 1) / frame_count)
	status.text(f"Processing frame {i+1}/{frame_count}")

	writer.release()
	cap.release()

	# Add VR metadata
	vr_output_path = os.path.join(tmpdir, "vr180_output.mp4")
	add_vr180_metadata(sbs_video_path, vr_output_path, spatial_media_path)

	# Preview
	st.video(vr_output_path) # Basic 2D preview (SBS)

	# Download
	with open(vr_output_path, "rb") as f:
	st.download_button("Download VR 180 Video", f, file_name="vr180_output.mp4")

	st.write("Note: For VR viewing, load into a headset. Processing may take time for long clips.")