File size: 5,205 Bytes
a2fcab8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
import cv2
import numpy as np
import torch
from transformers import pipeline
from PIL import Image
import os
import tempfile
import subprocess
from tqdm import tqdm
import shutil

# Load Depth Anything V2 model (from Hugging Face)
@st.cache_resource
def load_depth_model():
    return pipeline("depth-estimation", model="DepthAnything/Depth-Anything-V2-Small")

# Simple DIBR: Generate left/right views from image and depth
def dibr_stereo(image, depth, baseline=0.1, focal_length=1.0):
    height, width, _ = image.shape
    depth = cv2.resize(depth, (width, height))  # Ensure depth matches image size
    depth = depth / 255.0  # Normalize depth (assuming 0-255 grayscale)

    # Create shift map
    shift = (baseline * focal_length) / (depth + 1e-6)  # Avoid division by zero

    # Left view: Shift pixels to the right
    left = np.zeros_like(image)
    for y in range(height):
        for x in range(width):
            new_x = int(x + shift[y, x] * width / 2)  # Shift scaled
            if 0 <= new_x < width:
                left[y, new_x] = image[y, x]

    # Right view: Shift pixels to the left
    right = np.zeros_like(image)
    for y in range(height):
        for x in range(width):
            new_x = int(x - shift[y, x] * width / 2)
            if 0 <= new_x < width:
                right[y, new_x] = image[y, x]

    # Basic hole filling (inpaint)
    mask_left = np.all(left == 0, axis=2).astype(np.uint8)
    mask_right = np.all(right == 0, axis=2).astype(np.uint8)
    left = cv2.inpaint(left, mask_left, 3, cv2.INPAINT_TELEA)
    right = cv2.inpaint(right, mask_right, 3, cv2.INPAINT_TELEA)

    return left, right

# Combine left and right into SBS
def combine_sbs(left, right):
    return np.hstack((left, right))

# Add VR 180 metadata using spatial-media
def add_vr180_metadata(input_path, output_path, spatial_media_path):
    # Command for injecting VR180 metadata (top-bottom or SBS; here SBS)
    cmd = [
        'python', os.path.join(spatial_media_path, 'spatialmedia', '__main__.py'),
        '-i', input_path,  # Input file
        '--stereo=side-by-side',  # SBS format
        '--crop=0,0,0,0',  # No crop
        '--spatial-audio=false',  # No spatial audio
        output_path  # Output file
    ]
    subprocess.run(cmd, check=True)

# Main app
st.title("2D to 3D VR 180 Converter")
st.write("Upload a 2D video clip (e.g., from Inception) to convert it to immersive 3D VR 180.")

# User input
uploaded_file = st.file_uploader("Upload 2D Video", type=["mp4", "avi", "mov"])
spatial_media_path = st.text_input("Path to spatial-media folder", value="C:\\Users\\Administrator\\Downloads\\vr_converter\\spatial-media")

if uploaded_file and spatial_media_path:
    if st.button("Convert to VR 180"):
        with tempfile.TemporaryDirectory() as tmpdir:
            # Save uploaded video
            input_path = os.path.join(tmpdir, "input.mp4")
            with open(input_path, "wb") as f:
                f.write(uploaded_file.read())

            # Extract video info
            cap = cv2.VideoCapture(input_path)
            fps = cap.get(cv2.CAP_PROP_FPS)
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            cap.release()

            # Progress bar
            progress = st.progress(0)
            status = st.empty()

            # Process frames
            depth_model = load_depth_model()
            temp_frames_dir = os.path.join(tmpdir, "frames")
            os.makedirs(temp_frames_dir)
            sbs_video_path = os.path.join(tmpdir, "sbs_output.mp4")
            writer = cv2.VideoWriter(sbs_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width * 2, height))

            cap = cv2.VideoCapture(input_path)
            for i in tqdm(range(frame_count)):
                ret, frame = cap.read()
                if not ret:
                    break

                # Compute depth
                depth_result = depth_model(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
                depth = np.array(depth_result["depth"]).astype(np.uint8)  # Grayscale depth

                # Generate stereo
                left, right = dibr_stereo(frame, depth)

                # Combine SBS
                sbs_frame = combine_sbs(left, right)

                # Write to video
                writer.write(sbs_frame)

                # Update progress
                progress.progress((i + 1) / frame_count)
                status.text(f"Processing frame {i+1}/{frame_count}")

            writer.release()
            cap.release()

            # Add VR metadata
            vr_output_path = os.path.join(tmpdir, "vr180_output.mp4")
            add_vr180_metadata(sbs_video_path, vr_output_path, spatial_media_path)

            # Preview
            st.video(vr_output_path)  # Basic 2D preview (SBS)

            # Download
            with open(vr_output_path, "rb") as f:
                st.download_button("Download VR 180 Video", f, file_name="vr180_output.mp4")

st.write("Note: For VR viewing, load into a headset. Processing may take time for long clips.")