Spaces:

Yuxihenry
/

SpatialTrackerV2

Running on Zero

App Files Files Community

xiaoyuxi commited on Jun 23

Commit

6f5c5a6

1 Parent(s): ca773c4

vggt

Browse files

Files changed (1) hide show

app.py +168 -41

app.py CHANGED Viewed

@@ -10,6 +10,25 @@ from typing import List, Tuple
 BACKEND_SPACE_URL = "Yuxihenry/SpatialTrackerV2_Backend"  # Replace with actual backend space URL
 hf_token = os.getenv("HF_TOKEN")  # Replace with your actual Hugging Face token
 def numpy_to_base64(arr):
     """Convert numpy array to base64 string"""
     return base64.b64encode(arr.tobytes()).decode('utf-8')
@@ -40,22 +59,54 @@ def get_video_name(video_path):
     """Extract video name without extension"""
     return os.path.splitext(os.path.basename(video_path))[0]
 def handle_video_upload(video):
     """Handle video upload and extract first frame"""
     if video is None:
-        return None, None, []
     try:
-        # Load backend API
-        backend_api = gr.load(f"spaces/{BACKEND_SPACE_URL}", hf_token=hf_token)
-        # Call backend upload API
-        original_image_state, display_image_b64, selected_points, grid_size_val, vo_points_val, fps_val = backend_api.upload_video_api(video)
-        # Convert base64 image back to numpy array
-        display_image = base64_to_image(display_image_b64)
-        return original_image_state, display_image, selected_points, grid_size_val, vo_points_val, fps_val
     except Exception as e:
         print(f"Error in handle_video_upload: {e}")
@@ -67,18 +118,43 @@ def select_point(original_img: str, sel_pix: list, point_type: str, evt: gr.Sele
         return None, []
     try:
-        # Load backend API
-        backend_api = gr.load(f"spaces/{BACKEND_SPACE_URL}", hf_token=hf_token)
-        # Call backend select point API
-        display_image_b64, new_sel_pix = backend_api.select_point_api(
-            original_img, sel_pix, point_type, evt.index[0], evt.index[1]
-        )
-        # Convert base64 image back to numpy array
-        display_image = base64_to_image(display_image_b64)
-        return display_image, new_sel_pix
     except Exception as e:
         print(f"Error in select_point: {e}")
@@ -90,16 +166,33 @@ def reset_points(original_img: str, sel_pix):
         return None, []
     try:
-        # Load backend API
-        backend_api = gr.load(f"spaces/{BACKEND_SPACE_URL}", hf_token=hf_token)
-        # Call backend reset points API
-        display_image_b64, new_sel_pix = backend_api.reset_points_api(original_img, sel_pix)
-        # Convert base64 image back to numpy array
-        display_image = base64_to_image(display_image_b64)
-        return display_image, new_sel_pix
     except Exception as e:
         print(f"Error in reset_points: {e}")
@@ -111,15 +204,37 @@ def launch_viz(grid_size, vo_points, fps, original_image_state):
         return None, None
     try:
-        # Load backend API
-        backend_api = gr.load(f"spaces/{BACKEND_SPACE_URL}", hf_token=hf_token)
-        # Call backend run tracker API
-        viz_iframe_html, track_video_path = backend_api.run_tracker_api(
-            grid_size, vo_points, fps, original_image_state
-        )
-        return viz_iframe_html, track_video_path
     except Exception as e:
         print(f"Error in launch_viz: {e}")
@@ -143,6 +258,10 @@ def handle_video_change(video):
     return original_image_state, display_image, selected_points, grid_size_val, vo_points_val, fps_val
 # Build UI
 with gr.Blocks(css="""
     #advanced_settings .wrap {
@@ -202,7 +321,12 @@ with gr.Blocks(css="""
     original_image_state = gr.State()  # Store original image in state
     with gr.Row():
-        gr.Markdown("""
         # ✨ SpaTrackV2 Frontend (Client)
         <div style='background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin: 10px 0;'>
         <h2 style='color: #0066cc; margin-bottom: 15px;'>Instructions:</h2>
@@ -212,7 +336,14 @@ with gr.Blocks(css="""
             <li>⚡ Click 'Run Tracker and Visualize' when done</li>
             <li>🔍 Iterative 3D result will be shown in the visualization</li>
         </ol>
-        <p style='font-size: 22px;'>❗ This frontend connects to a private backend Space for processing</p>
         </div>
         """)
@@ -268,14 +399,10 @@ with gr.Blocks(css="""
                 examples_per_page=20  # Show all examples on one page to enable scrolling
             )
-            # Initialize with a placeholder interface instead of static file
             viz_iframe = gr.HTML("""
-                                <div style='border: 3px solid #667eea; border-radius: 10px; overflow: hidden; box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3); background: #f8f9fa; display: flex; align-items: center; justify-content: center; height: 950px;'>
-                                    <div style='text-align: center; color: #666;'>
-                                        <h3 style='margin-bottom: 20px; color: #667eea;'>🎮 Interactive 3D Tracking</h3>
-                                        <p style='font-size: 16px; margin-bottom: 10px;'>Upload a video and select points to start tracking</p>
-                                        <p style='font-size: 14px; color: #999;'>Powered by SpaTrackV2</p>
-                                    </div>
                                 </div>
                                 """)

 BACKEND_SPACE_URL = "Yuxihenry/SpatialTrackerV2_Backend"  # Replace with actual backend space URL
 hf_token = os.getenv("HF_TOKEN")  # Replace with your actual Hugging Face token
+# Flag to track if backend is available
+BACKEND_AVAILABLE = False
+backend_api = None
+def initialize_backend():
+    """Initialize backend connection"""
+    global backend_api, BACKEND_AVAILABLE
+    try:
+        print(f"Attempting to connect to backend: {BACKEND_SPACE_URL}")
+        backend_api = gr.load(f"spaces/{BACKEND_SPACE_URL}", token=hf_token)
+        BACKEND_AVAILABLE = True
+        print("✅ Backend connection successful!")
+        return True
+    except Exception as e:
+        print(f"❌ Backend connection failed: {e}")
+        print("⚠️  Running in standalone mode (some features may be limited)")
+        BACKEND_AVAILABLE = False
+        return False
 def numpy_to_base64(arr):
     """Convert numpy array to base64 string"""
     return base64.b64encode(arr.tobytes()).decode('utf-8')
     """Extract video name without extension"""
     return os.path.splitext(os.path.basename(video_path))[0]
+def extract_first_frame(video_path):
+    """Extract first frame from video file"""
+    try:
+        cap = cv2.VideoCapture(video_path)
+        ret, frame = cap.read()
+        cap.release()
+        if ret:
+            # Convert BGR to RGB
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            return frame_rgb
+        else:
+            return None
+    except Exception as e:
+        print(f"Error extracting first frame: {e}")
+        return None
 def handle_video_upload(video):
     """Handle video upload and extract first frame"""
     if video is None:
+        return None, None, [], 50, 756, 3
     try:
+        if BACKEND_AVAILABLE and backend_api:
+            # Try to use backend API
+            try:
+                original_image_state, display_image_b64, selected_points, grid_size_val, vo_points_val, fps_val = backend_api.upload_video_api(video)
+                display_image = base64_to_image(display_image_b64)
+                return original_image_state, display_image, selected_points, grid_size_val, vo_points_val, fps_val
+            except Exception as e:
+                print(f"Backend API call failed: {e}")
+                # Fallback to local processing
+                pass
+        # Fallback: local processing
+        print("Using local video processing...")
+        display_image = extract_first_frame(video)
+        # Create a simple state representation
+        original_image_state = json.dumps({
+            "video_path": video,
+            "frame": "local_processing"
+        })
+        # Default settings
+        grid_size_val, vo_points_val, fps_val = 50, 756, 3
+        return original_image_state, display_image, [], grid_size_val, vo_points_val, fps_val
     except Exception as e:
         print(f"Error in handle_video_upload: {e}")
         return None, []
     try:
+        if BACKEND_AVAILABLE and backend_api:
+            # Try to use backend API
+            try:
+                display_image_b64, new_sel_pix = backend_api.select_point_api(
+                    original_img, sel_pix, point_type, evt.index[0], evt.index[1]
+                )
+                display_image = base64_to_image(display_image_b64)
+                return display_image, new_sel_pix
+            except Exception as e:
+                print(f"Backend API call failed: {e}")
+                # Fallback to local processing
+                pass
+        # Fallback: local processing
+        print("Using local point selection...")
+        # Parse original image state
+        try:
+            state_data = json.loads(original_img)
+            video_path = state_data.get("video_path")
+        except:
+            video_path = None
+        if video_path:
+            # Re-extract frame and add point
+            display_image = extract_first_frame(video_path)
+            if display_image is not None:
+                # Add point to the image (simple visualization)
+                x, y = evt.index[0], evt.index[1]
+                color = (0, 255, 0) if point_type == 'positive_point' else (255, 0, 0)
+                cv2.circle(display_image, (x, y), 5, color, -1)
+                # Update selected points
+                new_sel_pix = sel_pix + [(x, y, point_type)]
+                return display_image, new_sel_pix
+        return None, sel_pix
     except Exception as e:
         print(f"Error in select_point: {e}")
         return None, []
     try:
+        if BACKEND_AVAILABLE and backend_api:
+            # Try to use backend API
+            try:
+                display_image_b64, new_sel_pix = backend_api.reset_points_api(original_img, sel_pix)
+                display_image = base64_to_image(display_image_b64)
+                return display_image, new_sel_pix
+            except Exception as e:
+                print(f"Backend API call failed: {e}")
+                # Fallback to local processing
+                pass
+        # Fallback: local processing
+        print("Using local point reset...")
+        # Parse original image state
+        try:
+            state_data = json.loads(original_img)
+            video_path = state_data.get("video_path")
+        except:
+            video_path = None
+        if video_path:
+            # Re-extract frame without points
+            display_image = extract_first_frame(video_path)
+            return display_image, []
+        return None, []
     except Exception as e:
         print(f"Error in reset_points: {e}")
         return None, None
     try:
+        if BACKEND_AVAILABLE and backend_api:
+            # Try to use backend API
+            try:
+                viz_iframe_html, track_video_path = backend_api.run_tracker_api(
+                    grid_size, vo_points, fps, original_image_state
+                )
+                return viz_iframe_html, track_video_path
+            except Exception as e:
+                print(f"Backend API call failed: {e}")
+                # Fallback to local processing
+                pass
+        # Fallback: show message that backend is required
+        error_message = """
+        <div style='border: 3px solid #ff6b6b; border-radius: 10px; padding: 20px; background-color: #fff5f5;'>
+            <h3 style='color: #d63031; margin-bottom: 15px;'>⚠️ Backend Connection Required</h3>
+            <p style='color: #2d3436; line-height: 1.6;'>
+                The tracking and visualization features require a connection to the backend Space.
+                Please ensure:
+            </p>
+            <ul style='color: #2d3436; line-height: 1.6;'>
+                <li>The backend Space is deployed and running</li>
+                <li>The BACKEND_SPACE_URL is correctly configured</li>
+                <li>You have proper access permissions to the backend Space</li>
+            </ul>
+            <p style='color: #2d3436; font-weight: bold; margin-top: 15px;'>
+                Current Status: Backend unavailable - Running in limited mode
+            </p>
+        </div>
+        """
+        return error_message, None
     except Exception as e:
         print(f"Error in launch_viz: {e}")
     return original_image_state, display_image, selected_points, grid_size_val, vo_points_val, fps_val
+# Initialize backend connection
+print("🔧 Initializing backend connection...")
+initialize_backend()
 # Build UI
 with gr.Blocks(css="""
     #advanced_settings .wrap {
     original_image_state = gr.State()  # Store original image in state
     with gr.Row():
+        # Show backend status
+        status_color = "#28a745" if BACKEND_AVAILABLE else "#dc3545"
+        status_text = "Connected" if BACKEND_AVAILABLE else "Disconnected"
+        status_icon = "✅" if BACKEND_AVAILABLE else "❌"
+        gr.Markdown(f"""
         # ✨ SpaTrackV2 Frontend (Client)
         <div style='background-color: #e6f3ff; padding: 20px; border-radius: 10px; margin: 10px 0;'>
         <h2 style='color: #0066cc; margin-bottom: 15px;'>Instructions:</h2>
             <li>⚡ Click 'Run Tracker and Visualize' when done</li>
             <li>🔍 Iterative 3D result will be shown in the visualization</li>
         </ol>
+        <div style='background-color: {status_color}20; border: 2px solid {status_color}; border-radius: 8px; padding: 10px; margin-top: 15px;'>
+            <p style='font-size: 18px; color: {status_color}; margin: 0;'>
+                {status_icon} Backend Status: {status_text}
+            </p>
+            <p style='font-size: 14px; color: #666; margin: 5px 0 0 0;'>
+                {BACKEND_SPACE_URL}
+            </p>
+        </div>
         </div>
         """)
                 examples_per_page=20  # Show all examples on one page to enable scrolling
             )
+            # Initialize with the template interface showing "Interactive 3D Tracking"
             viz_iframe = gr.HTML("""
+                                <div style='border: 3px solid #667eea; border-radius: 10px; overflow: hidden; box-shadow: 0 8px 32px rgba(102, 126, 234, 0.3);'>
+                                    <iframe id="viz_iframe" src="/gradio_api/file=_viz/viz_template.html" width="100%" height="950px" style="border:none;"></iframe>
                                 </div>
                                 """)