trellis-3d-api

Running on Zero

App Files Files Community

Steven18 commited on Apr 11

Commit

856fb1f

1 Parent(s): 4197679

fix image_to_3d with api output

Browse files

Files changed (1) hide show

app.py +31 -26

app.py CHANGED Viewed

@@ -119,26 +119,30 @@ def image_to_3d(
     slat_sampling_steps: int,
     multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
-) -> Tuple[dict, str]:
     """
-    Convert an image to a 3D model.
     Args:
-        image (Image.Image): The input image.
-        multiimages (List[Tuple[Image.Image, str]]): The input images in multi-image mode.
-        is_multiimage (bool): Whether is in multi-image mode.
-        seed (int): The random seed.
-        ss_guidance_strength (float): The guidance strength for sparse structure generation.
-        ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
-        slat_guidance_strength (float): The guidance strength for structured latent generation.
-        slat_sampling_steps (int): The number of sampling steps for structured latent generation.
-        multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
     Returns:
-        dict: The information of the generated 3D model.
-        str: The path to the video of the 3D model.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     if not is_multiimage:
         outputs = pipeline.run(
             image,
@@ -156,7 +160,7 @@ def image_to_3d(
         )
     else:
         outputs = pipeline.run_multi_image(
-            [image[0] for image in multiimages],
             seed=seed,
             formats=["gaussian", "mesh"],
             preprocess_image=False,
@@ -170,25 +174,21 @@ def image_to_3d(
             },
             mode=multiimage_algo,
         )
-    # video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
-    # video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
-    # video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
-    # video_path = os.path.join(user_dir, 'sample.mp4')
-    # imageio.mimsave(video_path, video, fps=15)
-    # state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
-    # torch.cuda.empty_cache()
-    # return state, video_path
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
-    os.makedirs(os.path.dirname(video_path), exist_ok=True)
     imageio.mimsave(video_path, video, fps=15)
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
     torch.cuda.empty_cache()
-    return state, video_path
 @spaces.GPU(duration=90)
@@ -324,6 +324,7 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     is_multiimage = gr.State(False)
     output_buf = gr.State()
     # Example images at the bottom of the page
     with gr.Row() as single_image_example:
@@ -378,8 +379,12 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         outputs=[seed],
     ).then(
         image_to_3d,
-        inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],
-        outputs=[output_buf, video_output],
     ).then(
         lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
         outputs=[extract_glb_btn, extract_gs_btn],

     slat_sampling_steps: int,
     multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
+) -> Tuple[dict, dict, str]:
     """
+    Convert an image (or multiple images) into a 3D model and return its state and video.
     Args:
+        image (Image.Image): The input image for single-image mode.
+        multiimages (List[Tuple[Image.Image, str]]): List of images with captions for multi-image mode.
+        is_multiimage (bool): Whether to use multi-image generation.
+        seed (int): Random seed for reproducibility.
+        ss_guidance_strength (float): Sparse structure guidance strength.
+        ss_sampling_steps (int): Sparse structure sampling steps.
+        slat_guidance_strength (float): SLAT guidance strength.
+        slat_sampling_steps (int): SLAT sampling steps.
+        multiimage_algo (str): Multi-image algorithm to use.
     Returns:
+        dict: Packed state (Gaussian + Mesh) for later usage (e.g., extract_glb).
+        dict: Gradio-compatible video dictionary {"video": ..., "subtitles": None}.
+        str: Path to raw video file (used by Gradio Client or download logic).
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(user_dir, exist_ok=True)
+    # Run pipeline depending on mode
     if not is_multiimage:
         outputs = pipeline.run(
             image,
         )
     else:
         outputs = pipeline.run_multi_image(
+            [img[0] for img in multiimages],
             seed=seed,
             formats=["gaussian", "mesh"],
             preprocess_image=False,
             },
             mode=multiimage_algo,
         )
+    # Render the 3D video combining color and geometry
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+    # Save the video
     video_path = os.path.join(user_dir, 'sample.mp4')
     imageio.mimsave(video_path, video, fps=15)
+    # Pack state for downstream use
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
     torch.cuda.empty_cache()
+    return state, {"video": video_path, "subtitles": None}, video_path
 @spaces.GPU(duration=90)
     is_multiimage = gr.State(False)
     output_buf = gr.State()
+    video_file_path = gr.Textbox(visible=False, label="Video Path")
     # Example images at the bottom of the page
     with gr.Row() as single_image_example:
         outputs=[seed],
     ).then(
         image_to_3d,
+        inputs=[
+            image_prompt, multiimage_prompt, is_multiimage, seed,
+            ss_guidance_strength, ss_sampling_steps,
+            slat_guidance_strength, slat_sampling_steps, multiimage_algo
+        ],
+        outputs=[output_buf, video_output, video_file_path],  # multi output
     ).then(
         lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
         outputs=[extract_glb_btn, extract_gs_btn],