Hunyuan3D-2.1

Running on Zero

App Files Files Community

asimfayaz commited on 4 days ago

Commit

a4dff59

1 Parent(s): 4f7fdcd

Fixed tensor dimension issues for multi-view processing -- again

Browse files

Files changed (1) hide show

hy3dshape/hy3dshape/pipelines.py +35 -4

hy3dshape/hy3dshape/pipelines.py CHANGED Viewed

@@ -500,10 +500,41 @@ class Hunyuan3DDiTPipeline:
         # Handle dictionary input (multi-view mode)
         if isinstance(image, dict):
-            # Use the multi-view image processor for dictionaries
-            from .preprocessors import MVImageProcessorV2
-            mv_processor = MVImageProcessorV2(size=self.image_processor.size)
-            return mv_processor(image)
         if not isinstance(image, list):
             image = [image]

         # Handle dictionary input (multi-view mode)
         if isinstance(image, dict):
+            # Process each view individually with the single-image processor
+            # and then combine them appropriately
+            processed_views = []
+            view_order = []
+            # Define the standard view order
+            view_mapping = {'front': 0, 'left': 1, 'back': 2, 'right': 3}
+            # Sort views by their standard order
+            sorted_views = sorted(image.items(), key=lambda x: view_mapping.get(x[0], 999))
+            for view_name, view_image in sorted_views:
+                # Process each view individually
+                view_output = self.image_processor(view_image)
+                processed_views.append(view_output)
+                view_order.append(view_mapping.get(view_name, 0))
+            # Combine all views into a single batch
+            # Each view_output has shape [1, 3, H, W], we want to concatenate along batch dimension
+            combined_images = []
+            combined_masks = []
+            for view_output in processed_views:
+                combined_images.append(view_output['image'])
+                combined_masks.append(view_output['mask'])
+            # Concatenate along batch dimension: [num_views, 3, H, W]
+            final_image = torch.cat(combined_images, dim=0)
+            final_mask = torch.cat(combined_masks, dim=0)
+            return {
+                'image': final_image,
+                'mask': final_mask,
+                'view_idxs': view_order
+            }
         if not isinstance(image, list):
             image = [image]