Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

EmbodiedGen-Image-to-3D / embodied_gen /utils /config.py

xinjie.wang

update

631a83a 12 days ago

6.57 kB

	# Project EmbodiedGen
	#
	# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
	# implied. See the License for the specific language governing
	# permissions and limitations under the License.

	from dataclasses import dataclass, field
	from typing import List, Optional, Union

	from gsplat.strategy import DefaultStrategy, MCMCStrategy
	from typing_extensions import Literal, assert_never

	__all__ = [
	"Pano2MeshSRConfig",
	"GsplatTrainConfig",
	]


	@dataclass
	class Pano2MeshSRConfig:
	mesh_file: str = "mesh_model.ply"
	gs_data_file: str = "gs_data.pt"
	device: str = "cuda"
	blur_radius: int = 0
	faces_per_pixel: int = 8
	fov: int = 90
	pano_w: int = 2048
	pano_h: int = 1024
	cubemap_w: int = 512
	cubemap_h: int = 512
	pose_scale: float = 0.6
	pano_center_offset: tuple = (-0.2, 0.3)
	inpaint_frame_stride: int = 20
	trajectory_dir: str = "apps/assets/example_scene/camera_trajectory"
	visualize: bool = False
	depth_scale_factor: float = 3.4092
	kernel_size: tuple = (9, 9)
	upscale_factor: int = 4


	@dataclass
	class GsplatTrainConfig:
	# Path to the .pt files. If provide, it will skip training and run evaluation only.
	ckpt: Optional[List[str]] = None
	# Render trajectory path
	render_traj_path: str = "interp"

	# Path to the Mip-NeRF 360 dataset
	data_dir: str = "outputs/bg"
	# Downsample factor for the dataset
	data_factor: int = 4
	# Directory to save results
	result_dir: str = "outputs/bg"
	# Every N images there is a test image
	test_every: int = 8
	# Random crop size for training (experimental)
	patch_size: Optional[int] = None
	# A global scaler that applies to the scene size related parameters
	global_scale: float = 1.0
	# Normalize the world space
	normalize_world_space: bool = True
	# Camera model
	camera_model: Literal["pinhole", "ortho", "fisheye"] = "pinhole"

	# Port for the viewer server
	port: int = 8080

	# Batch size for training. Learning rates are scaled automatically
	batch_size: int = 1
	# A global factor to scale the number of training steps
	steps_scaler: float = 1.0

	# Number of training steps
	max_steps: int = 30_000
	# Steps to evaluate the model
	eval_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
	# Steps to save the model
	save_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
	# Whether to save ply file (storage size can be large)
	save_ply: bool = True
	# Steps to save the model as ply
	ply_steps: List[int] = field(default_factory=lambda: [7_000, 30_000])
	# Whether to disable video generation during training and evaluation
	disable_video: bool = False

	# Initial number of GSs. Ignored if using sfm
	init_num_pts: int = 100_000
	# Initial extent of GSs as a multiple of the camera extent. Ignored if using sfm
	init_extent: float = 3.0
	# Degree of spherical harmonics
	sh_degree: int = 1
	# Turn on another SH degree every this steps
	sh_degree_interval: int = 1000
	# Initial opacity of GS
	init_opa: float = 0.1
	# Initial scale of GS
	init_scale: float = 1.0
	# Weight for SSIM loss
	ssim_lambda: float = 0.2

	# Near plane clipping distance
	near_plane: float = 0.01
	# Far plane clipping distance
	far_plane: float = 1e10

	# Strategy for GS densification
	strategy: Union[DefaultStrategy, MCMCStrategy] = field(
	default_factory=DefaultStrategy
	)
	# Use packed mode for rasterization, this leads to less memory usage but slightly slower.
	packed: bool = False
	# Use sparse gradients for optimization. (experimental)
	sparse_grad: bool = False
	# Use visible adam from Taming 3DGS. (experimental)
	visible_adam: bool = False
	# Anti-aliasing in rasterization. Might slightly hurt quantitative metrics.
	antialiased: bool = False

	# Use random background for training to discourage transparency
	random_bkgd: bool = False

	# LR for 3D point positions
	means_lr: float = 1.6e-4
	# LR for Gaussian scale factors
	scales_lr: float = 5e-3
	# LR for alpha blending weights
	opacities_lr: float = 5e-2
	# LR for orientation (quaternions)
	quats_lr: float = 1e-3
	# LR for SH band 0 (brightness)
	sh0_lr: float = 2.5e-3
	# LR for higher-order SH (detail)
	shN_lr: float = 2.5e-3 / 20

	# Opacity regularization
	opacity_reg: float = 0.0
	# Scale regularization
	scale_reg: float = 0.0

	# Enable depth loss. (experimental)
	depth_loss: bool = False
	# Weight for depth loss
	depth_lambda: float = 1e-2

	# Dump information to tensorboard every this steps
	tb_every: int = 200
	# Save training images to tensorboard
	tb_save_image: bool = False

	lpips_net: Literal["vgg", "alex"] = "alex"

	# 3DGUT (uncented transform + eval 3D)
	with_ut: bool = False
	with_eval3d: bool = False

	scene_scale: float = 1.0

	def adjust_steps(self, factor: float):
	self.eval_steps = [int(i * factor) for i in self.eval_steps]
	self.save_steps = [int(i * factor) for i in self.save_steps]
	self.ply_steps = [int(i * factor) for i in self.ply_steps]
	self.max_steps = int(self.max_steps * factor)
	self.sh_degree_interval = int(self.sh_degree_interval * factor)

	strategy = self.strategy
	if isinstance(strategy, DefaultStrategy):
	strategy.refine_start_iter = int(
	strategy.refine_start_iter * factor
	)
	strategy.refine_stop_iter = int(strategy.refine_stop_iter * factor)
	strategy.reset_every = int(strategy.reset_every * factor)
	strategy.refine_every = int(strategy.refine_every * factor)
	elif isinstance(strategy, MCMCStrategy):
	strategy.refine_start_iter = int(
	strategy.refine_start_iter * factor
	)
	strategy.refine_stop_iter = int(strategy.refine_stop_iter * factor)
	strategy.refine_every = int(strategy.refine_every * factor)
	else:
	assert_never(strategy)