mlactimel commited on
Commit
31ca7a5
·
verified ·
1 Parent(s): 714f312

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +67 -0
  2. .github/workflows/codeql.yml +100 -0
  3. .gitignore +398 -0
  4. .gitmodules +3 -0
  5. CODE_OF_CONDUCT.md +9 -0
  6. DATASET.md +231 -0
  7. LICENSE +21 -0
  8. README.md +327 -8
  9. SECURITY.md +41 -0
  10. SUPPORT.md +25 -0
  11. app.py +405 -0
  12. app1.py +405 -0
  13. app_text.py +266 -0
  14. assets/T.ply +3 -0
  15. assets/example_image/T.png +3 -0
  16. assets/example_image/typical_building_building.png +3 -0
  17. assets/example_image/typical_building_castle.png +3 -0
  18. assets/example_image/typical_building_colorful_cottage.png +3 -0
  19. assets/example_image/typical_building_maya_pyramid.png +3 -0
  20. assets/example_image/typical_building_mushroom.png +3 -0
  21. assets/example_image/typical_building_space_station.png +3 -0
  22. assets/example_image/typical_creature_dragon.png +3 -0
  23. assets/example_image/typical_creature_elephant.png +3 -0
  24. assets/example_image/typical_creature_furry.png +3 -0
  25. assets/example_image/typical_creature_quadruped.png +3 -0
  26. assets/example_image/typical_creature_robot_crab.png +3 -0
  27. assets/example_image/typical_creature_robot_dinosour.png +3 -0
  28. assets/example_image/typical_creature_rock_monster.png +3 -0
  29. assets/example_image/typical_humanoid_block_robot.png +3 -0
  30. assets/example_image/typical_humanoid_dragonborn.png +3 -0
  31. assets/example_image/typical_humanoid_dwarf.png +3 -0
  32. assets/example_image/typical_humanoid_goblin.png +3 -0
  33. assets/example_image/typical_humanoid_mech.png +3 -0
  34. assets/example_image/typical_misc_crate.png +3 -0
  35. assets/example_image/typical_misc_fireplace.png +3 -0
  36. assets/example_image/typical_misc_gate.png +3 -0
  37. assets/example_image/typical_misc_lantern.png +3 -0
  38. assets/example_image/typical_misc_magicbook.png +3 -0
  39. assets/example_image/typical_misc_mailbox.png +3 -0
  40. assets/example_image/typical_misc_monster_chest.png +3 -0
  41. assets/example_image/typical_misc_paper_machine.png +3 -0
  42. assets/example_image/typical_misc_phonograph.png +3 -0
  43. assets/example_image/typical_misc_portal2.png +3 -0
  44. assets/example_image/typical_misc_storage_chest.png +3 -0
  45. assets/example_image/typical_misc_telephone.png +3 -0
  46. assets/example_image/typical_misc_television.png +3 -0
  47. assets/example_image/typical_misc_workbench.png +3 -0
  48. assets/example_image/typical_vehicle_biplane.png +3 -0
  49. assets/example_image/typical_vehicle_bulldozer.png +3 -0
  50. assets/example_image/typical_vehicle_cart.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,70 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/T.ply filter=lfs diff=lfs merge=lfs -text
37
+ assets/example_image/T.png filter=lfs diff=lfs merge=lfs -text
38
+ assets/example_image/typical_building_building.png filter=lfs diff=lfs merge=lfs -text
39
+ assets/example_image/typical_building_castle.png filter=lfs diff=lfs merge=lfs -text
40
+ assets/example_image/typical_building_colorful_cottage.png filter=lfs diff=lfs merge=lfs -text
41
+ assets/example_image/typical_building_maya_pyramid.png filter=lfs diff=lfs merge=lfs -text
42
+ assets/example_image/typical_building_mushroom.png filter=lfs diff=lfs merge=lfs -text
43
+ assets/example_image/typical_building_space_station.png filter=lfs diff=lfs merge=lfs -text
44
+ assets/example_image/typical_creature_dragon.png filter=lfs diff=lfs merge=lfs -text
45
+ assets/example_image/typical_creature_elephant.png filter=lfs diff=lfs merge=lfs -text
46
+ assets/example_image/typical_creature_furry.png filter=lfs diff=lfs merge=lfs -text
47
+ assets/example_image/typical_creature_quadruped.png filter=lfs diff=lfs merge=lfs -text
48
+ assets/example_image/typical_creature_robot_crab.png filter=lfs diff=lfs merge=lfs -text
49
+ assets/example_image/typical_creature_robot_dinosour.png filter=lfs diff=lfs merge=lfs -text
50
+ assets/example_image/typical_creature_rock_monster.png filter=lfs diff=lfs merge=lfs -text
51
+ assets/example_image/typical_humanoid_block_robot.png filter=lfs diff=lfs merge=lfs -text
52
+ assets/example_image/typical_humanoid_dragonborn.png filter=lfs diff=lfs merge=lfs -text
53
+ assets/example_image/typical_humanoid_dwarf.png filter=lfs diff=lfs merge=lfs -text
54
+ assets/example_image/typical_humanoid_goblin.png filter=lfs diff=lfs merge=lfs -text
55
+ assets/example_image/typical_humanoid_mech.png filter=lfs diff=lfs merge=lfs -text
56
+ assets/example_image/typical_misc_crate.png filter=lfs diff=lfs merge=lfs -text
57
+ assets/example_image/typical_misc_fireplace.png filter=lfs diff=lfs merge=lfs -text
58
+ assets/example_image/typical_misc_gate.png filter=lfs diff=lfs merge=lfs -text
59
+ assets/example_image/typical_misc_lantern.png filter=lfs diff=lfs merge=lfs -text
60
+ assets/example_image/typical_misc_magicbook.png filter=lfs diff=lfs merge=lfs -text
61
+ assets/example_image/typical_misc_mailbox.png filter=lfs diff=lfs merge=lfs -text
62
+ assets/example_image/typical_misc_monster_chest.png filter=lfs diff=lfs merge=lfs -text
63
+ assets/example_image/typical_misc_paper_machine.png filter=lfs diff=lfs merge=lfs -text
64
+ assets/example_image/typical_misc_phonograph.png filter=lfs diff=lfs merge=lfs -text
65
+ assets/example_image/typical_misc_portal2.png filter=lfs diff=lfs merge=lfs -text
66
+ assets/example_image/typical_misc_storage_chest.png filter=lfs diff=lfs merge=lfs -text
67
+ assets/example_image/typical_misc_telephone.png filter=lfs diff=lfs merge=lfs -text
68
+ assets/example_image/typical_misc_television.png filter=lfs diff=lfs merge=lfs -text
69
+ assets/example_image/typical_misc_workbench.png filter=lfs diff=lfs merge=lfs -text
70
+ assets/example_image/typical_vehicle_biplane.png filter=lfs diff=lfs merge=lfs -text
71
+ assets/example_image/typical_vehicle_bulldozer.png filter=lfs diff=lfs merge=lfs -text
72
+ assets/example_image/typical_vehicle_cart.png filter=lfs diff=lfs merge=lfs -text
73
+ assets/example_image/typical_vehicle_excavator.png filter=lfs diff=lfs merge=lfs -text
74
+ assets/example_image/typical_vehicle_helicopter.png filter=lfs diff=lfs merge=lfs -text
75
+ assets/example_image/typical_vehicle_locomotive.png filter=lfs diff=lfs merge=lfs -text
76
+ assets/example_image/typical_vehicle_pirate_ship.png filter=lfs diff=lfs merge=lfs -text
77
+ assets/example_image/weatherworn_misc_paper_machine3.png filter=lfs diff=lfs merge=lfs -text
78
+ assets/example_multi_image/character_1.png filter=lfs diff=lfs merge=lfs -text
79
+ assets/example_multi_image/character_2.png filter=lfs diff=lfs merge=lfs -text
80
+ assets/example_multi_image/character_3.png filter=lfs diff=lfs merge=lfs -text
81
+ assets/example_multi_image/mushroom_1.png filter=lfs diff=lfs merge=lfs -text
82
+ assets/example_multi_image/mushroom_2.png filter=lfs diff=lfs merge=lfs -text
83
+ assets/example_multi_image/mushroom_3.png filter=lfs diff=lfs merge=lfs -text
84
+ assets/example_multi_image/orangeguy_1.png filter=lfs diff=lfs merge=lfs -text
85
+ assets/example_multi_image/orangeguy_2.png filter=lfs diff=lfs merge=lfs -text
86
+ assets/example_multi_image/orangeguy_3.png filter=lfs diff=lfs merge=lfs -text
87
+ assets/example_multi_image/popmart_1.png filter=lfs diff=lfs merge=lfs -text
88
+ assets/example_multi_image/popmart_2.png filter=lfs diff=lfs merge=lfs -text
89
+ assets/example_multi_image/popmart_3.png filter=lfs diff=lfs merge=lfs -text
90
+ assets/example_multi_image/rabbit_1.png filter=lfs diff=lfs merge=lfs -text
91
+ assets/example_multi_image/rabbit_2.png filter=lfs diff=lfs merge=lfs -text
92
+ assets/example_multi_image/rabbit_3.png filter=lfs diff=lfs merge=lfs -text
93
+ assets/example_multi_image/tiger_1.png filter=lfs diff=lfs merge=lfs -text
94
+ assets/example_multi_image/tiger_2.png filter=lfs diff=lfs merge=lfs -text
95
+ assets/example_multi_image/tiger_3.png filter=lfs diff=lfs merge=lfs -text
96
+ assets/example_multi_image/yoimiya_1.png filter=lfs diff=lfs merge=lfs -text
97
+ assets/example_multi_image/yoimiya_2.png filter=lfs diff=lfs merge=lfs -text
98
+ assets/example_multi_image/yoimiya_3.png filter=lfs diff=lfs merge=lfs -text
99
+ assets/logo.webp filter=lfs diff=lfs merge=lfs -text
100
+ assets/teaser.png filter=lfs diff=lfs merge=lfs -text
101
+ trellis/representations/mesh/flexicubes/images/block_init.png filter=lfs diff=lfs merge=lfs -text
102
+ trellis/representations/mesh/flexicubes/images/teaser_top.png filter=lfs diff=lfs merge=lfs -text
.github/workflows/codeql.yml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For most projects, this workflow file will not need changing; you simply need
2
+ # to commit it to your repository.
3
+ #
4
+ # You may wish to alter this file to override the set of languages analyzed,
5
+ # or to provide custom queries or build logic.
6
+ #
7
+ # ******** NOTE ********
8
+ # We have attempted to detect the languages in your repository. Please check
9
+ # the `language` matrix defined below to confirm you have the correct set of
10
+ # supported CodeQL languages.
11
+ #
12
+ name: "CodeQL Advanced"
13
+
14
+ on:
15
+ push:
16
+ branches: [ "main" ]
17
+ pull_request:
18
+ branches: [ "main" ]
19
+ schedule:
20
+ - cron: '31 15 * * 6'
21
+
22
+ jobs:
23
+ analyze:
24
+ name: Analyze (${{ matrix.language }})
25
+ # Runner size impacts CodeQL analysis time. To learn more, please see:
26
+ # - https://gh.io/recommended-hardware-resources-for-running-codeql
27
+ # - https://gh.io/supported-runners-and-hardware-resources
28
+ # - https://gh.io/using-larger-runners (GitHub.com only)
29
+ # Consider using larger runners or machines with greater resources for possible analysis time improvements.
30
+ runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
31
+ permissions:
32
+ # required for all workflows
33
+ security-events: write
34
+
35
+ # required to fetch internal or private CodeQL packs
36
+ packages: read
37
+
38
+ # only required for workflows in private repositories
39
+ actions: read
40
+ contents: read
41
+
42
+ strategy:
43
+ fail-fast: false
44
+ matrix:
45
+ include:
46
+ - language: c-cpp
47
+ build-mode: none
48
+ - language: python
49
+ build-mode: autobuild
50
+ # CodeQL supports the following values keywords for 'language': 'actions', 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
51
+ # Use `c-cpp` to analyze code written in C, C++ or both
52
+ # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
53
+ # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
54
+ # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
55
+ # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
56
+ # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
57
+ # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
58
+ steps:
59
+ - name: Checkout repository
60
+ uses: actions/checkout@v4
61
+
62
+ # Add any setup steps before running the `github/codeql-action/init` action.
63
+ # This includes steps like installing compilers or runtimes (`actions/setup-node`
64
+ # or others). This is typically only required for manual builds.
65
+ # - name: Setup runtime (example)
66
+ # uses: actions/setup-example@v1
67
+
68
+ # Initializes the CodeQL tools for scanning.
69
+ - name: Initialize CodeQL
70
+ uses: github/codeql-action/init@v3
71
+ with:
72
+ languages: ${{ matrix.language }}
73
+ build-mode: ${{ matrix.build-mode }}
74
+ # If you wish to specify custom queries, you can do so here or in a config file.
75
+ # By default, queries listed here will override any specified in a config file.
76
+ # Prefix the list here with "+" to use these queries and those in the config file.
77
+
78
+ # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
79
+ # queries: security-extended,security-and-quality
80
+
81
+ # If the analyze step fails for one of the languages you are analyzing with
82
+ # "We were unable to automatically build your code", modify the matrix above
83
+ # to set the build mode to "manual" for that language. Then modify this step
84
+ # to build your code.
85
+ # ℹ️ Command-line programs to run using the OS shell.
86
+ # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
87
+ - if: matrix.build-mode == 'manual'
88
+ shell: bash
89
+ run: |
90
+ echo 'If you are using a "manual" build mode for one or more of the' \
91
+ 'languages you are analyzing, replace this with the commands to build' \
92
+ 'your code, for example:'
93
+ echo ' make bootstrap'
94
+ echo ' make release'
95
+ exit 1
96
+
97
+ - name: Perform CodeQL Analysis
98
+ uses: github/codeql-action/analyze@v3
99
+ with:
100
+ category: "/language:${{matrix.language}}"
.gitignore ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Ignore Visual Studio temporary files, build results, and
2
+ ## files generated by popular Visual Studio add-ons.
3
+ ##
4
+ ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5
+
6
+ # User-specific files
7
+ *.rsuser
8
+ *.suo
9
+ *.user
10
+ *.userosscache
11
+ *.sln.docstates
12
+
13
+ # User-specific files (MonoDevelop/Xamarin Studio)
14
+ *.userprefs
15
+
16
+ # Mono auto generated files
17
+ mono_crash.*
18
+
19
+ # Build results
20
+ [Dd]ebug/
21
+ [Dd]ebugPublic/
22
+ [Rr]elease/
23
+ [Rr]eleases/
24
+ x64/
25
+ x86/
26
+ [Ww][Ii][Nn]32/
27
+ [Aa][Rr][Mm]/
28
+ [Aa][Rr][Mm]64/
29
+ bld/
30
+ [Bb]in/
31
+ [Oo]bj/
32
+ [Ll]og/
33
+ [Ll]ogs/
34
+
35
+ # Visual Studio 2015/2017 cache/options directory
36
+ .vs/
37
+ # Uncomment if you have tasks that create the project's static files in wwwroot
38
+ #wwwroot/
39
+
40
+ # Visual Studio 2017 auto generated files
41
+ Generated\ Files/
42
+
43
+ # MSTest test Results
44
+ [Tt]est[Rr]esult*/
45
+ [Bb]uild[Ll]og.*
46
+
47
+ # NUnit
48
+ *.VisualState.xml
49
+ TestResult.xml
50
+ nunit-*.xml
51
+
52
+ # Build Results of an ATL Project
53
+ [Dd]ebugPS/
54
+ [Rr]eleasePS/
55
+ dlldata.c
56
+
57
+ # Benchmark Results
58
+ BenchmarkDotNet.Artifacts/
59
+
60
+ # .NET Core
61
+ project.lock.json
62
+ project.fragment.lock.json
63
+ artifacts/
64
+
65
+ # ASP.NET Scaffolding
66
+ ScaffoldingReadMe.txt
67
+
68
+ # StyleCop
69
+ StyleCopReport.xml
70
+
71
+ # Files built by Visual Studio
72
+ *_i.c
73
+ *_p.c
74
+ *_h.h
75
+ *.ilk
76
+ *.meta
77
+ *.obj
78
+ *.iobj
79
+ *.pch
80
+ *.pdb
81
+ *.ipdb
82
+ *.pgc
83
+ *.pgd
84
+ *.rsp
85
+ *.sbr
86
+ *.tlb
87
+ *.tli
88
+ *.tlh
89
+ *.tmp
90
+ *.tmp_proj
91
+ *_wpftmp.csproj
92
+ *.log
93
+ *.tlog
94
+ *.vspscc
95
+ *.vssscc
96
+ .builds
97
+ *.pidb
98
+ *.svclog
99
+ *.scc
100
+
101
+ # Chutzpah Test files
102
+ _Chutzpah*
103
+
104
+ # Visual C++ cache files
105
+ ipch/
106
+ *.aps
107
+ *.ncb
108
+ *.opendb
109
+ *.opensdf
110
+ *.sdf
111
+ *.cachefile
112
+ *.VC.db
113
+ *.VC.VC.opendb
114
+
115
+ # Visual Studio profiler
116
+ *.psess
117
+ *.vsp
118
+ *.vspx
119
+ *.sap
120
+
121
+ # Visual Studio Trace Files
122
+ *.e2e
123
+
124
+ # TFS 2012 Local Workspace
125
+ $tf/
126
+
127
+ # Guidance Automation Toolkit
128
+ *.gpState
129
+
130
+ # ReSharper is a .NET coding add-in
131
+ _ReSharper*/
132
+ *.[Rr]e[Ss]harper
133
+ *.DotSettings.user
134
+
135
+ # TeamCity is a build add-in
136
+ _TeamCity*
137
+
138
+ # DotCover is a Code Coverage Tool
139
+ *.dotCover
140
+
141
+ # AxoCover is a Code Coverage Tool
142
+ .axoCover/*
143
+ !.axoCover/settings.json
144
+
145
+ # Coverlet is a free, cross platform Code Coverage Tool
146
+ coverage*.json
147
+ coverage*.xml
148
+ coverage*.info
149
+
150
+ # Visual Studio code coverage results
151
+ *.coverage
152
+ *.coveragexml
153
+
154
+ # NCrunch
155
+ _NCrunch_*
156
+ .*crunch*.local.xml
157
+ nCrunchTemp_*
158
+
159
+ # MightyMoose
160
+ *.mm.*
161
+ AutoTest.Net/
162
+
163
+ # Web workbench (sass)
164
+ .sass-cache/
165
+
166
+ # Installshield output folder
167
+ [Ee]xpress/
168
+
169
+ # DocProject is a documentation generator add-in
170
+ DocProject/buildhelp/
171
+ DocProject/Help/*.HxT
172
+ DocProject/Help/*.HxC
173
+ DocProject/Help/*.hhc
174
+ DocProject/Help/*.hhk
175
+ DocProject/Help/*.hhp
176
+ DocProject/Help/Html2
177
+ DocProject/Help/html
178
+
179
+ # Click-Once directory
180
+ publish/
181
+
182
+ # Publish Web Output
183
+ *.[Pp]ublish.xml
184
+ *.azurePubxml
185
+ # Note: Comment the next line if you want to checkin your web deploy settings,
186
+ # but database connection strings (with potential passwords) will be unencrypted
187
+ *.pubxml
188
+ *.publishproj
189
+
190
+ # Microsoft Azure Web App publish settings. Comment the next line if you want to
191
+ # checkin your Azure Web App publish settings, but sensitive information contained
192
+ # in these scripts will be unencrypted
193
+ PublishScripts/
194
+
195
+ # NuGet Packages
196
+ *.nupkg
197
+ # NuGet Symbol Packages
198
+ *.snupkg
199
+ # The packages folder can be ignored because of Package Restore
200
+ **/[Pp]ackages/*
201
+ # except build/, which is used as an MSBuild target.
202
+ !**/[Pp]ackages/build/
203
+ # Uncomment if necessary however generally it will be regenerated when needed
204
+ #!**/[Pp]ackages/repositories.config
205
+ # NuGet v3's project.json files produces more ignorable files
206
+ *.nuget.props
207
+ *.nuget.targets
208
+
209
+ # Microsoft Azure Build Output
210
+ csx/
211
+ *.build.csdef
212
+
213
+ # Microsoft Azure Emulator
214
+ ecf/
215
+ rcf/
216
+
217
+ # Windows Store app package directories and files
218
+ AppPackages/
219
+ BundleArtifacts/
220
+ Package.StoreAssociation.xml
221
+ _pkginfo.txt
222
+ *.appx
223
+ *.appxbundle
224
+ *.appxupload
225
+
226
+ # Visual Studio cache files
227
+ # files ending in .cache can be ignored
228
+ *.[Cc]ache
229
+ # but keep track of directories ending in .cache
230
+ !?*.[Cc]ache/
231
+
232
+ # Others
233
+ ClientBin/
234
+ ~$*
235
+ *~
236
+ *.dbmdl
237
+ *.dbproj.schemaview
238
+ *.jfm
239
+ *.pfx
240
+ *.publishsettings
241
+ orleans.codegen.cs
242
+
243
+ # Including strong name files can present a security risk
244
+ # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245
+ #*.snk
246
+
247
+ # Since there are multiple workflows, uncomment next line to ignore bower_components
248
+ # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249
+ #bower_components/
250
+
251
+ # RIA/Silverlight projects
252
+ Generated_Code/
253
+
254
+ # Backup & report files from converting an old project file
255
+ # to a newer Visual Studio version. Backup files are not needed,
256
+ # because we have git ;-)
257
+ _UpgradeReport_Files/
258
+ Backup*/
259
+ UpgradeLog*.XML
260
+ UpgradeLog*.htm
261
+ ServiceFabricBackup/
262
+ *.rptproj.bak
263
+
264
+ # SQL Server files
265
+ *.mdf
266
+ *.ldf
267
+ *.ndf
268
+
269
+ # Business Intelligence projects
270
+ *.rdl.data
271
+ *.bim.layout
272
+ *.bim_*.settings
273
+ *.rptproj.rsuser
274
+ *- [Bb]ackup.rdl
275
+ *- [Bb]ackup ([0-9]).rdl
276
+ *- [Bb]ackup ([0-9][0-9]).rdl
277
+
278
+ # Microsoft Fakes
279
+ FakesAssemblies/
280
+
281
+ # GhostDoc plugin setting file
282
+ *.GhostDoc.xml
283
+
284
+ # Node.js Tools for Visual Studio
285
+ .ntvs_analysis.dat
286
+ node_modules/
287
+
288
+ # Visual Studio 6 build log
289
+ *.plg
290
+
291
+ # Visual Studio 6 workspace options file
292
+ *.opt
293
+
294
+ # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295
+ *.vbw
296
+
297
+ # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298
+ *.vbp
299
+
300
+ # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301
+ *.dsw
302
+ *.dsp
303
+
304
+ # Visual Studio 6 technical files
305
+ *.ncb
306
+ *.aps
307
+
308
+ # Visual Studio LightSwitch build output
309
+ **/*.HTMLClient/GeneratedArtifacts
310
+ **/*.DesktopClient/GeneratedArtifacts
311
+ **/*.DesktopClient/ModelManifest.xml
312
+ **/*.Server/GeneratedArtifacts
313
+ **/*.Server/ModelManifest.xml
314
+ _Pvt_Extensions
315
+
316
+ # Paket dependency manager
317
+ .paket/paket.exe
318
+ paket-files/
319
+
320
+ # FAKE - F# Make
321
+ .fake/
322
+
323
+ # CodeRush personal settings
324
+ .cr/personal
325
+
326
+ # Python Tools for Visual Studio (PTVS)
327
+ __pycache__/
328
+ *.pyc
329
+
330
+ # Cake - Uncomment if you are using it
331
+ # tools/**
332
+ # !tools/packages.config
333
+
334
+ # Tabs Studio
335
+ *.tss
336
+
337
+ # Telerik's JustMock configuration file
338
+ *.jmconfig
339
+
340
+ # BizTalk build output
341
+ *.btp.cs
342
+ *.btm.cs
343
+ *.odx.cs
344
+ *.xsd.cs
345
+
346
+ # OpenCover UI analysis results
347
+ OpenCover/
348
+
349
+ # Azure Stream Analytics local run output
350
+ ASALocalRun/
351
+
352
+ # MSBuild Binary and Structured Log
353
+ *.binlog
354
+
355
+ # NVidia Nsight GPU debugger configuration file
356
+ *.nvuser
357
+
358
+ # MFractors (Xamarin productivity tool) working folder
359
+ .mfractor/
360
+
361
+ # Local History for Visual Studio
362
+ .localhistory/
363
+
364
+ # Visual Studio History (VSHistory) files
365
+ .vshistory/
366
+
367
+ # BeatPulse healthcheck temp database
368
+ healthchecksdb
369
+
370
+ # Backup folder for Package Reference Convert tool in Visual Studio 2017
371
+ MigrationBackup/
372
+
373
+ # Ionide (cross platform F# VS Code tools) working folder
374
+ .ionide/
375
+
376
+ # Fody - auto-generated XML schema
377
+ FodyWeavers.xsd
378
+
379
+ # VS Code files for those working on multiple tools
380
+ .vscode/*
381
+ !.vscode/settings.json
382
+ !.vscode/tasks.json
383
+ !.vscode/launch.json
384
+ !.vscode/extensions.json
385
+ *.code-workspace
386
+
387
+ # Local History for Visual Studio Code
388
+ .history/
389
+
390
+ # Windows Installer files from build outputs
391
+ *.cab
392
+ *.msi
393
+ *.msix
394
+ *.msm
395
+ *.msp
396
+
397
+ # JetBrains Rider
398
+ *.sln.iml
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "trellis/representations/mesh/flexicubes"]
2
+ path = trellis/representations/mesh/flexicubes
3
+ url = https://github.com/MaxtirError/FlexiCubes.git
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Microsoft Open Source Code of Conduct
2
+
3
+ This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
4
+
5
+ Resources:
6
+
7
+ - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
8
+ - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
9
+ - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
DATASET.md ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TRELLIS-500K
2
+
3
+ TRELLIS-500K is a dataset of 500K 3D assets curated from [Objaverse(XL)](https://objaverse.allenai.org/), [ABO](https://amazon-berkeley-objects.s3.amazonaws.com/index.html), [3D-FUTURE](https://tianchi.aliyun.com/specials/promotion/alibaba-3d-future), [HSSD](https://huggingface.co/datasets/hssd/hssd-models), and [Toys4k](https://github.com/rehg-lab/lowshot-shapebias/tree/main/toys4k), filtered based on aesthetic scores.
4
+ This dataset serves for 3D generation tasks.
5
+
6
+ The dataset is provided as csv files containing the 3D assets' metadata.
7
+
8
+ ## Dataset Statistics
9
+
10
+ The following table summarizes the dataset's filtering and composition:
11
+
12
+ ***NOTE: Some of the 3D assets lack text captions. Please filter out such assets if captions are required.***
13
+ | Source | Aesthetic Score Threshold | Filtered Size | With Captions |
14
+ |:-:|:-:|:-:|:-:|
15
+ | ObjaverseXL (sketchfab) | 5.5 | 168307 | 167638 |
16
+ | ObjaverseXL (github) | 5.5 | 311843 | 306790 |
17
+ | ABO | 4.5 | 4485 | 4390 |
18
+ | 3D-FUTURE | 4.5 | 9472 | 9291 |
19
+ | HSSD | 4.5 | 6670 | 6661 |
20
+ | All (training set) | - | 500777 | 494770 |
21
+ | Toys4k (evaluation set) | 4.5 | 3229 | 3180 |
22
+
23
+ ## Dataset Location
24
+
25
+ The dataset is hosted on Hugging Face Datasets. You can preview the dataset at
26
+
27
+ [https://huggingface.co/datasets/JeffreyXiang/TRELLIS-500K](https://huggingface.co/datasets/JeffreyXiang/TRELLIS-500K)
28
+
29
+ There is no need to download the csv files manually. We provide toolkits to load and prepare the dataset.
30
+
31
+ ## Dataset Toolkits
32
+
33
+ We provide [toolkits](dataset_toolkits) for data preparation.
34
+
35
+ ### Step 1: Install Dependencies
36
+
37
+ ```
38
+ . ./dataset_toolkits/setup.sh
39
+ ```
40
+
41
+ ### Step 2: Load Metadata
42
+
43
+ First, we need to load the metadata of the dataset.
44
+
45
+ ```
46
+ python dataset_toolkits/build_metadata.py <SUBSET> --output_dir <OUTPUT_DIR> [--source <SOURCE>]
47
+ ```
48
+
49
+ - `SUBSET`: The subset of the dataset to load. Options are `ObjaverseXL`, `ABO`, `3D-FUTURE`, `HSSD`, and `Toys4k`.
50
+ - `OUTPUT_DIR`: The directory to save the data.
51
+ - `SOURCE`: Required if `SUBSET` is `ObjaverseXL`. Options are `sketchfab` and `github`.
52
+
53
+ For example, to load the metadata of the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
54
+
55
+ ```
56
+ python dataset_toolkits/build_metadata.py ObjaverseXL --source sketchfab --output_dir datasets/ObjaverseXL_sketchfab
57
+ ```
58
+
59
+ ### Step 3: Download Data
60
+
61
+ Next, we need to download the 3D assets.
62
+
63
+ ```
64
+ python dataset_toolkits/download.py <SUBSET> --output_dir <OUTPUT_DIR> [--rank <RANK> --world_size <WORLD_SIZE>]
65
+ ```
66
+
67
+ - `SUBSET`: The subset of the dataset to download. Options are `ObjaverseXL`, `ABO`, `3D-FUTURE`, `HSSD`, and `Toys4k`.
68
+ - `OUTPUT_DIR`: The directory to save the data.
69
+
70
+ You can also specify the `RANK` and `WORLD_SIZE` of the current process if you are using multiple nodes for data preparation.
71
+
72
+ For example, to download the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
73
+
74
+ ***NOTE: The example command below sets a large `WORLD_SIZE` for demonstration purposes. Only a small portion of the dataset will be downloaded.***
75
+
76
+ ```
77
+ python dataset_toolkits/download.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab --world_size 160000
78
+ ```
79
+
80
+ Some datasets may require interactive login to Hugging Face or manual downloading. Please follow the instructions given by the toolkits.
81
+
82
+ After downloading, update the metadata file with:
83
+
84
+ ```
85
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
86
+ ```
87
+
88
+ ### Step 4: Render Multiview Images
89
+
90
+ Multiview images can be rendered with:
91
+
92
+ ```
93
+ python dataset_toolkits/render.py <SUBSET> --output_dir <OUTPUT_DIR> [--num_views <NUM_VIEWS>] [--rank <RANK> --world_size <WORLD_SIZE>]
94
+ ```
95
+
96
+ - `SUBSET`: The subset of the dataset to render. Options are `ObjaverseXL`, `ABO`, `3D-FUTURE`, `HSSD`, and `Toys4k`.
97
+ - `OUTPUT_DIR`: The directory to save the data.
98
+ - `NUM_VIEWS`: The number of views to render. Default is 150.
99
+ - `RANK` and `WORLD_SIZE`: Multi-node configuration.
100
+
101
+ For example, to render the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
102
+
103
+ ```
104
+ python dataset_toolkits/render.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
105
+ ```
106
+
107
+ Don't forget to update the metadata file with:
108
+
109
+ ```
110
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
111
+ ```
112
+
113
+ ### Step 5: Voxelize 3D Models
114
+
115
+ We can voxelize the 3D models with:
116
+
117
+ ```
118
+ python dataset_toolkits/voxelize.py <SUBSET> --output_dir <OUTPUT_DIR> [--rank <RANK> --world_size <WORLD_SIZE>]
119
+ ```
120
+
121
+ - `SUBSET`: The subset of the dataset to voxelize. Options are `ObjaverseXL`, `ABO`, `3D-FUTURE`, `HSSD`, and `Toys4k`.
122
+ - `OUTPUT_DIR`: The directory to save the data.
123
+ - `RANK` and `WORLD_SIZE`: Multi-node configuration.
124
+
125
+ For example, to voxelize the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
126
+ ```
127
+ python dataset_toolkits/voxelize.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
128
+ ```
129
+
130
+ Then update the metadata file with:
131
+
132
+ ```
133
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
134
+ ```
135
+
136
+ ### Step 6: Extract DINO Features
137
+
138
+ To prepare the training data for SLat VAE, we need to extract DINO features from multiview images and aggregate them into sparse voxel grids.
139
+
140
+ ```
141
+ python dataset_toolkits/extract_features.py --output_dir <OUTPUT_DIR> [--rank <RANK> --world_size <WORLD_SIZE>]
142
+ ```
143
+
144
+ - `OUTPUT_DIR`: The directory to save the data.
145
+ - `RANK` and `WORLD_SIZE`: Multi-node configuration.
146
+
147
+
148
+ For example, to extract DINO features from the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
149
+
150
+ ```
151
+ python dataset_toolkits/extract_feature.py --output_dir datasets/ObjaverseXL_sketchfab
152
+ ```
153
+
154
+ Then update the metadata file with:
155
+
156
+ ```
157
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
158
+ ```
159
+
160
+ ### Step 7: Encode Sparse Structures
161
+
162
+ Encoding the sparse structures into latents to train the first stage generator:
163
+
164
+ ```
165
+ python dataset_toolkits/encode_ss_latent.py --output_dir <OUTPUT_DIR> [--rank <RANK> --world_size <WORLD_SIZE>]
166
+ ```
167
+
168
+ - `OUTPUT_DIR`: The directory to save the data.
169
+ - `RANK` and `WORLD_SIZE`: Multi-node configuration.
170
+
171
+ For example, to encode the sparse structures into latents for the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
172
+
173
+ ```
174
+ python dataset_toolkits/encode_ss_latent.py --output_dir datasets/ObjaverseXL_sketchfab
175
+ ```
176
+
177
+ Then update the metadata file with:
178
+
179
+ ```
180
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
181
+ ```
182
+
183
+ ### Step 8: Encode SLat
184
+
185
+ Encoding SLat for second stage generator training:
186
+
187
+ ```
188
+ python dataset_toolkits/encode_latent.py --output_dir <OUTPUT_DIR> [--rank <RANK> --world_size <WORLD_SIZE>]
189
+ ```
190
+
191
+ - `OUTPUT_DIR`: The directory to save the data.
192
+ - `RANK` and `WORLD_SIZE`: Multi-node configuration.
193
+
194
+ For example, to encode SLat for the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
195
+
196
+ ```
197
+ python dataset_toolkits/encode_latent.py --output_dir datasets/ObjaverseXL_sketchfab
198
+ ```
199
+
200
+ Then update the metadata file with:
201
+
202
+ ```
203
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
204
+ ```
205
+
206
+ ### Step 9: Render Image Conditions
207
+
208
+ To train the image conditioned generator, we need to render image conditions with augmented views.
209
+
210
+ ```
211
+ python dataset_toolkits/render_cond.py <SUBSET> --output_dir <OUTPUT_DIR> [--num_views <NUM_VIEWS>] [--rank <RANK> --world_size <WORLD_SIZE>]
212
+ ```
213
+
214
+ - `SUBSET`: The subset of the dataset to render. Options are `ObjaverseXL`, `ABO`, `3D-FUTURE`, `HSSD`, and `Toys4k`.
215
+ - `OUTPUT_DIR`: The directory to save the data.
216
+ - `NUM_VIEWS`: The number of views to render. Default is 24.
217
+ - `RANK` and `WORLD_SIZE`: Multi-node configuration.
218
+
219
+ For example, to render image conditions for the ObjaverseXL (sketchfab) subset and save it to `datasets/ObjaverseXL_sketchfab`, we can run:
220
+
221
+ ```
222
+ python dataset_toolkits/render_cond.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
223
+ ```
224
+
225
+ Then update the metadata file with:
226
+
227
+ ```
228
+ python dataset_toolkits/build_metadata.py ObjaverseXL --output_dir datasets/ObjaverseXL_sketchfab
229
+ ```
230
+
231
+
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) Microsoft Corporation.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE
README.md CHANGED
@@ -1,12 +1,331 @@
1
  ---
2
- title: Tr
3
- emoji: 📉
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.31.0
8
- app_file: app.py
9
- pinned: false
10
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: tr
3
+ app_file: app_text.py
 
 
4
  sdk: gradio
5
+ sdk_version: 4.44.1
 
 
6
  ---
7
+ <img src="assets/logo.webp" width="100%" align="center">
8
+ <h1 align="center">Structured 3D Latents<br>for Scalable and Versatile 3D Generation</h1>
9
+ <p align="center"><a href="https://arxiv.org/abs/2412.01506"><img src='https://img.shields.io/badge/arXiv-Paper-red?logo=arxiv&logoColor=white' alt='arXiv'></a>
10
+ <a href='https://trellis3d.github.io'><img src='https://img.shields.io/badge/Project_Page-Website-green?logo=googlechrome&logoColor=white' alt='Project Page'></a>
11
+ <a href='https://huggingface.co/spaces?q=TRELLIS'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Live_Demo-blue'></a>
12
+ </p>
13
+ <p align="center"><img src="assets/teaser.png" width="100%"></p>
14
+
15
+ <span style="font-size: 16px; font-weight: 600;">T</span><span style="font-size: 12px; font-weight: 700;">RELLIS</span> is a large 3D asset generation model. It takes in text or image prompts and generates high-quality 3D assets in various formats, such as Radiance Fields, 3D Gaussians, and meshes. The cornerstone of <span style="font-size: 16px; font-weight: 600;">T</span><span style="font-size: 12px; font-weight: 700;">RELLIS</span> is a unified Structured LATent (<span style="font-size: 16px; font-weight: 600;">SL</span><span style="font-size: 12px; font-weight: 700;">AT</span>) representation that allows decoding to different output formats and Rectified Flow Transformers tailored for <span style="font-size: 16px; font-weight: 600;">SL</span><span style="font-size: 12px; font-weight: 700;">AT</span> as the powerful backbones. We provide large-scale pre-trained models with up to 2 billion parameters on a large 3D asset dataset of 500K diverse objects. <span style="font-size: 16px; font-weight: 600;">T</span><span style="font-size: 12px; font-weight: 700;">RELLIS</span> significantly surpasses existing methods, including recent ones at similar scales, and showcases flexible output format selection and local 3D editing capabilities which were not offered by previous models.
16
+
17
+ ***Check out our [Project Page](https://trellis3d.github.io) for more videos and interactive demos!***
18
+
19
+ <!-- Features -->
20
+ ## 🌟 Features
21
+ - **High Quality**: It produces diverse 3D assets at high quality with intricate shape and texture details.
22
+ - **Versatility**: It takes text or image prompts and can generate various final 3D representations including but not limited to *Radiance Fields*, *3D Gaussians*, and *meshes*, accommodating diverse downstream requirements.
23
+ - **Flexible Editing**: It allows for easy editings of generated 3D assets, such as generating variants of the same object or local editing of the 3D asset.
24
+
25
+ <!-- Updates -->
26
+ ## ⏩ Updates
27
+
28
+ **03/25/2025**
29
+ - Release training code.
30
+ - Release **TRELLIS-text** models and asset variants generation.
31
+ - Examples are provided as [example_text.py](example_text.py) and [example_variant.py](example_variant.py).
32
+ - Gradio demo is provided as [app_text.py](app_text.py).
33
+ - *Note: It is always recommended to do text to 3D generation by first generating images using text-to-image models and then using TRELLIS-image models for 3D generation. Text-conditioned models are less creative and detailed due to data limitations.*
34
+
35
+ **12/26/2024**
36
+ - Release [**TRELLIS-500K**](https://github.com/microsoft/TRELLIS#-dataset) dataset and toolkits for data preparation.
37
+
38
+ **12/18/2024**
39
+ - Implementation of multi-image conditioning for **TRELLIS-image** model. ([#7](https://github.com/microsoft/TRELLIS/issues/7)). This is based on tuning-free algorithm without training a specialized model, so it may not give the best results for all input images.
40
+ - Add Gaussian export in `app.py` and `example.py`. ([#40](https://github.com/microsoft/TRELLIS/issues/40))
41
+
42
+ <!-- Installation -->
43
+ ## 📦 Installation
44
+
45
+ ### Prerequisites
46
+ - **System**: The code is currently tested only on **Linux**. For windows setup, you may refer to [#3](https://github.com/microsoft/TRELLIS/issues/3) (not fully tested).
47
+ - **Hardware**: An NVIDIA GPU with at least 16GB of memory is necessary. The code has been verified on NVIDIA A100 and A6000 GPUs.
48
+ - **Software**:
49
+ - The [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit-archive) is needed to compile certain submodules. The code has been tested with CUDA versions 11.8 and 12.2.
50
+ - [Conda](https://docs.anaconda.com/miniconda/install/#quick-command-line-install) is recommended for managing dependencies.
51
+ - Python version 3.8 or higher is required.
52
+
53
+ ### Installation Steps
54
+ 1. Clone the repo:
55
+ ```sh
56
+ git clone --recurse-submodules https://github.com/microsoft/TRELLIS.git
57
+ cd TRELLIS
58
+ ```
59
+
60
+ 2. Install the dependencies:
61
+
62
+ **Before running the following command there are somethings to note:**
63
+ - By adding `--new-env`, a new conda environment named `trellis` will be created. If you want to use an existing conda environment, please remove this flag.
64
+ - By default the `trellis` environment will use pytorch 2.4.0 with CUDA 11.8. If you want to use a different version of CUDA (e.g., if you have CUDA Toolkit 12.2 installed and do not want to install another 11.8 version for submodule compilation), you can remove the `--new-env` flag and manually install the required dependencies. Refer to [PyTorch](https://pytorch.org/get-started/previous-versions/) for the installation command.
65
+ - If you have multiple CUDA Toolkit versions installed, `PATH` should be set to the correct version before running the command. For example, if you have CUDA Toolkit 11.8 and 12.2 installed, you should run `export PATH=/usr/local/cuda-11.8/bin:$PATH` before running the command.
66
+ - By default, the code uses the `flash-attn` backend for attention. For GPUs do not support `flash-attn` (e.g., NVIDIA V100), you can remove the `--flash-attn` flag to install `xformers` only and set the `ATTN_BACKEND` environment variable to `xformers` before running the code. See the [Minimal Example](#minimal-example) for more details.
67
+ - The installation may take a while due to the large number of dependencies. Please be patient. If you encounter any issues, you can try to install the dependencies one by one, specifying one flag at a time.
68
+ - If you encounter any issues during the installation, feel free to open an issue or contact us.
69
+
70
+ Create a new conda environment named `trellis` and install the dependencies:
71
+ ```sh
72
+ . ./setup.sh --new-env --basic --xformers --flash-attn --diffoctreerast --spconv --mipgaussian --kaolin --nvdiffrast
73
+ ```
74
+ The detailed usage of `setup.sh` can be found by running `. ./setup.sh --help`.
75
+ ```sh
76
+ Usage: setup.sh [OPTIONS]
77
+ Options:
78
+ -h, --help Display this help message
79
+ --new-env Create a new conda environment
80
+ --basic Install basic dependencies
81
+ --train Install training dependencies
82
+ --xformers Install xformers
83
+ --flash-attn Install flash-attn
84
+ --diffoctreerast Install diffoctreerast
85
+ --vox2seq Install vox2seq
86
+ --spconv Install spconv
87
+ --mipgaussian Install mip-splatting
88
+ --kaolin Install kaolin
89
+ --nvdiffrast Install nvdiffrast
90
+ --demo Install all dependencies for demo
91
+ ```
92
+
93
+ <!-- Usage -->
94
+ ## 💡 Usage
95
+
96
+ ### Minimal Example
97
+
98
+ Here is an [example](example.py) of how to use the pretrained models for 3D asset generation.
99
+
100
+ ```python
101
+ import os
102
+ # os.environ['ATTN_BACKEND'] = 'xformers' # Can be 'flash-attn' or 'xformers', default is 'flash-attn'
103
+ os.environ['SPCONV_ALGO'] = 'native' # Can be 'native' or 'auto', default is 'auto'.
104
+ # 'auto' is faster but will do benchmarking at the beginning.
105
+ # Recommended to set to 'native' if run only once.
106
+
107
+ import imageio
108
+ from PIL import Image
109
+ from trellis.pipelines import TrellisImageTo3DPipeline
110
+ from trellis.utils import render_utils, postprocessing_utils
111
+
112
+ # Load a pipeline from a model folder or a Hugging Face model hub.
113
+ pipeline = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
114
+ pipeline.cuda()
115
+
116
+ # Load an image
117
+ image = Image.open("assets/example_image/T.png")
118
+
119
+ # Run the pipeline
120
+ outputs = pipeline.run(
121
+ image,
122
+ seed=1,
123
+ # Optional parameters
124
+ # sparse_structure_sampler_params={
125
+ # "steps": 12,
126
+ # "cfg_strength": 7.5,
127
+ # },
128
+ # slat_sampler_params={
129
+ # "steps": 12,
130
+ # "cfg_strength": 3,
131
+ # },
132
+ )
133
+ # outputs is a dictionary containing generated 3D assets in different formats:
134
+ # - outputs['gaussian']: a list of 3D Gaussians
135
+ # - outputs['radiance_field']: a list of radiance fields
136
+ # - outputs['mesh']: a list of meshes
137
+
138
+ # Render the outputs
139
+ video = render_utils.render_video(outputs['gaussian'][0])['color']
140
+ imageio.mimsave("sample_gs.mp4", video, fps=30)
141
+ video = render_utils.render_video(outputs['radiance_field'][0])['color']
142
+ imageio.mimsave("sample_rf.mp4", video, fps=30)
143
+ video = render_utils.render_video(outputs['mesh'][0])['normal']
144
+ imageio.mimsave("sample_mesh.mp4", video, fps=30)
145
+
146
+ # GLB files can be extracted from the outputs
147
+ glb = postprocessing_utils.to_glb(
148
+ outputs['gaussian'][0],
149
+ outputs['mesh'][0],
150
+ # Optional parameters
151
+ simplify=0.95, # Ratio of triangles to remove in the simplification process
152
+ texture_size=1024, # Size of the texture used for the GLB
153
+ )
154
+ glb.export("sample.glb")
155
+
156
+ # Save Gaussians as PLY files
157
+ outputs['gaussian'][0].save_ply("sample.ply")
158
+ ```
159
+
160
+ After running the code, you will get the following files:
161
+ - `sample_gs.mp4`: a video showing the 3D Gaussian representation
162
+ - `sample_rf.mp4`: a video showing the Radiance Field representation
163
+ - `sample_mesh.mp4`: a video showing the mesh representation
164
+ - `sample.glb`: a GLB file containing the extracted textured mesh
165
+ - `sample.ply`: a PLY file containing the 3D Gaussian representation
166
+
167
+
168
+ ### Web Demo
169
+
170
+ [app.py](app.py) provides a simple web demo for 3D asset generation. Since this demo is based on [Gradio](https://gradio.app/), additional dependencies are required:
171
+ ```sh
172
+ . ./setup.sh --demo
173
+ ```
174
+
175
+ After installing the dependencies, you can run the demo with the following command:
176
+ ```sh
177
+ python app.py
178
+ ```
179
+
180
+ Then, you can access the demo at the address shown in the terminal.
181
+
182
+
183
+ <!-- Dataset -->
184
+ ## 📚 Dataset
185
+
186
+ We provide **TRELLIS-500K**, a large-scale dataset containing 500K 3D assets curated from [Objaverse(XL)](https://objaverse.allenai.org/), [ABO](https://amazon-berkeley-objects.s3.amazonaws.com/index.html), [3D-FUTURE](https://tianchi.aliyun.com/specials/promotion/alibaba-3d-future), [HSSD](https://huggingface.co/datasets/hssd/hssd-models), and [Toys4k](https://github.com/rehg-lab/lowshot-shapebias/tree/main/toys4k), filtered based on aesthetic scores. Please refer to the [dataset README](DATASET.md) for more details.
187
+
188
+
189
+ <!-- Training -->
190
+ ## 🏋️‍♂️ Training
191
+
192
+ TRELLIS’s training framework is organized to provide a flexible and modular approach to building and fine-tuning large-scale 3D generation models. The training code is centered around `train.py` and is structured into several directories to clearly separate dataset handling, model components, training logic, and visualization utilities.
193
+
194
+ ### Code Structure
195
+
196
+ - **train.py**: Main entry point for training.
197
+ - **trellis/datasets**: Dataset loading and preprocessing.
198
+ - **trellis/models**: Different models and their components.
199
+ - **trellis/modules**: Custom modules for various models.
200
+ - **trellis/pipelines**: Inference pipelines for different models.
201
+ - **trellis/renderers**: Renderers for different 3D representations.
202
+ - **trellis/representations**: Different 3D representations.
203
+ - **trellis/trainers**: Training logic for different models.
204
+ - **trellis/utils**: Utility functions for training and visualization.
205
+
206
+ ### Training Setup
207
+
208
+ 1. **Prepare the Environment:**
209
+ - Ensure all training dependencies are installed.
210
+ - Use a Linux system with an NVIDIA GPU (The models are trained on NVIDIA A100 GPUs).
211
+ - For distributed training, verify that your nodes can communicate through the designated master address and port.
212
+
213
+ 2. **Dataset Preparation:**
214
+ - Organize your dataset similar to TRELLIS-500K. Specify your dataset path using the `--data_dir` argument when launching training.
215
+
216
+ 3. **Configuration Files:**
217
+ - Training hyperparameters and model architectures are defined in configuration files under the `configs/` directory.
218
+ - Example configuration files include:
219
+
220
+ | Config | Description |
221
+ | --- | --- |
222
+ | [`vae/ss_vae_conv3d_16l8_fp16.json`](configs/vae/ss_vae_conv3d_16l8_fp16.json) | Sparse structure VAE |
223
+ | [`vae/slat_vae_enc_dec_gs_swin8_B_64l8_fp16.json`](configs/vae/slat_vae_enc_dec_gs_swin8_B_64l8_fp16.json) | SLat VAE with Gaussian Decoder |
224
+ | [`vae/slat_vae_dec_rf_swin8_B_64l8_fp16.json`](configs/vae/slat_vae_dec_rf_swin8_B_64l8_fp16.json) | SLat Radiance Field Decoder |
225
+ | [`vae/slat_vae_dec_mesh_swin8_B_64l8_fp16.json`](configs/vae/slat_vae_dec_mesh_swin8_B_64l8_fp16.json) | SLat Mesh Decoder |
226
+ | [`generation/ss_flow_img_dit_L_16l8_fp16.json`](configs/generation/ss_flow_img_dit_L_16l8_fp16.json) | Image conditioned sparse structure Flow Model |
227
+ | [`generation/slat_flow_img_dit_L_64l8p2_fp16.json`](configs/generation/slat_flow_img_dit_L_64l8p2_fp16.json) | Image conditioned SLat Flow Model |
228
+ | [`generation/ss_flow_txt_dit_B_16l8_fp16.json`](configs/generation/ss_flow_txt_dit_B_16l8_fp16.json) | Base text-conditioned sparse structure Flow Model |
229
+ | [`generation/slat_flow_txt_dit_B_64l8p2_fp16.json`](configs/generation/slat_flow_txt_dit_B_64l8p2_fp16.json) | Base text-conditioned SLat Flow Model |
230
+ | [`generation/ss_flow_txt_dit_L_16l8_fp16.json`](configs/generation/ss_flow_txt_dit_L_16l8_fp16.json) | Large text-conditioned sparse structure Flow Model |
231
+ | [`generation/slat_flow_txt_dit_L_64l8p2_fp16.json`](configs/generation/slat_flow_txt_dit_L_64l8p2_fp16.json) | Large text-conditioned SLat Flow Model |
232
+ | [`generation/ss_flow_txt_dit_XL_16l8_fp16.json`](configs/generation/ss_flow_txt_dit_XL_16l8_fp16.json) | Extra-large text-conditioned sparse structure Flow Model |
233
+ | [`generation/slat_flow_txt_dit_XL_64l8p2_fp16.json`](configs/generation/slat_flow_txt_dit_XL_64l8p2_fp16.json) | Extra-large text-conditioned SLat Flow Model |
234
+
235
+
236
+ ### Command-Line Options
237
+
238
+ The training script can be run as follows:
239
+ ```sh
240
+ usage: train.py [-h] --config CONFIG --output_dir OUTPUT_DIR [--load_dir LOAD_DIR] [--ckpt CKPT] [--data_dir DATA_DIR] [--auto_retry AUTO_RETRY] [--tryrun] [--profile] [--num_nodes NUM_NODES] [--node_rank NODE_RANK] [--num_gpus NUM_GPUS] [--master_addr MASTER_ADDR] [--master_port MASTER_PORT]
241
+
242
+ options:
243
+ -h, --help show this help message and exit
244
+ --config CONFIG Experiment config file
245
+ --output_dir OUTPUT_DIR Output directory
246
+ --load_dir LOAD_DIR Load directory, default to output_dir
247
+ --ckpt CKPT Checkpoint step to resume training, default to latest
248
+ --data_dir DATA_DIR Data directory
249
+ --auto_retry AUTO_RETRY Number of retries on error
250
+ --tryrun Try run without training
251
+ --profile Profile training
252
+ --num_nodes NUM_NODES Number of nodes
253
+ --node_rank NODE_RANK Node rank
254
+ --num_gpus NUM_GPUS Number of GPUs per node, default to all
255
+ --master_addr MASTER_ADDR Master address for distributed training
256
+ --master_port MASTER_PORT Port for distributed training
257
+ ```
258
+
259
+ ### Example Training Commands
260
+
261
+ #### Single-node Training
262
+
263
+ To train a image-to-3D stage 2 model with a single machine.
264
+ ```sh
265
+ python train.py \
266
+ --config configs/vae/slat_vae_dec_mesh_swin8_B_64l8_fp16.json \
267
+ --output_dir outputs/slat_vae_dec_mesh_swin8_B_64l8_fp16_1node \
268
+ --data_dir /path/to/your/dataset1,/path/to/your/dataset2 \
269
+ ```
270
+ The script will automatically distribute the training across all available GPUs. Specify the number of GPUs with the `--num_gpus` flag if you want to limit the number of GPUs used.
271
+
272
+ #### Multi-node Training
273
+
274
+ To train a image-to-3D stage 2 model with multiple GPUs across nodes (e.g., 2 nodes):
275
+ ```sh
276
+ python train.py \
277
+ --config configs/generation/slat_flow_img_dit_L_64l8p2_fp16.json \
278
+ --output_dir outputs/slat_flow_img_dit_L_64l8p2_fp16_2nodes \
279
+ --data_dir /path/to/your/dataset1,/path/to/your/dataset2 \
280
+ --num_nodes 2 \
281
+ --node_rank 0 \
282
+ --master_addr $MASTER_ADDR \
283
+ --master_port $MASTER_PORT
284
+ ```
285
+ Be sure to adjust `node_rank`, `master_addr`, and `master_port` for each node accordingly.
286
+
287
+ #### Resuming Training
288
+
289
+ By default, training will resume from the latest saved checkpoint in the same output directory. To specify a specific checkpoint to resume from, use the `--load_dir` and `--ckpt` flags:
290
+ ```sh
291
+ python train.py \
292
+ --config configs/generation/slat_flow_img_dit_L_64l8p2_fp16.json \
293
+ --output_dir outputs/slat_flow_img_dit_L_64l8p2_fp16_resume \
294
+ --data_dir /path/to/your/dataset1,/path/to/your/dataset2 \
295
+ --load_dir /path/to/your/checkpoint \
296
+ --ckpt [step]
297
+ ```
298
+
299
+ ### Additional Options
300
+
301
+ - **Auto Retry:** Use the `--auto_retry` flag to specify the number of retries in case of intermittent errors.
302
+ - **Dry Run:** The `--tryrun` flag allows you to check your configuration and environment without launching full training.
303
+ - **Profiling:** Enable profiling with the `--profile` flag to gain insights into training performance and diagnose bottlenecks.
304
+
305
+ Adjust the file paths and parameters to match your experimental setup.
306
+
307
+
308
+ <!-- License -->
309
+ ## ⚖️ License
310
+
311
+ TRELLIS models and the majority of the code are licensed under the [MIT License](LICENSE). The following submodules may have different licenses:
312
+ - [**diffoctreerast**](https://github.com/JeffreyXiang/diffoctreerast): We developed a CUDA-based real-time differentiable octree renderer for rendering radiance fields as part of this project. This renderer is derived from the [diff-gaussian-rasterization](https://github.com/graphdeco-inria/diff-gaussian-rasterization) project and is available under the [LICENSE](https://github.com/JeffreyXiang/diffoctreerast/blob/master/LICENSE).
313
+
314
+
315
+ - [**Modified Flexicubes**](https://github.com/MaxtirError/FlexiCubes): In this project, we used a modified version of [Flexicubes](https://github.com/nv-tlabs/FlexiCubes) to support vertex attributes. This modified version is licensed under the [LICENSE](https://github.com/nv-tlabs/FlexiCubes/blob/main/LICENSE.txt).
316
+
317
+
318
+ <!-- Citation -->
319
+ ## 📜 Citation
320
+
321
+ If you find this work helpful, please consider citing our paper:
322
+
323
+ ```bibtex
324
+ @article{xiang2024structured,
325
+ title = {Structured 3D Latents for Scalable and Versatile 3D Generation},
326
+ author = {Xiang, Jianfeng and Lv, Zelong and Xu, Sicheng and Deng, Yu and Wang, Ruicheng and Zhang, Bowen and Chen, Dong and Tong, Xin and Yang, Jiaolong},
327
+ journal = {arXiv preprint arXiv:2412.01506},
328
+ year = {2024}
329
+ }
330
+ ```
331
 
 
SECURITY.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK -->
2
+
3
+ ## Security
4
+
5
+ Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin).
6
+
7
+ If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below.
8
+
9
+ ## Reporting Security Issues
10
+
11
+ **Please do not report security vulnerabilities through public GitHub issues.**
12
+
13
+ Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report).
14
+
15
+ If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp).
16
+
17
+ You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
18
+
19
+ Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20
+
21
+ * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22
+ * Full paths of source file(s) related to the manifestation of the issue
23
+ * The location of the affected source code (tag/branch/commit or direct URL)
24
+ * Any special configuration required to reproduce the issue
25
+ * Step-by-step instructions to reproduce the issue
26
+ * Proof-of-concept or exploit code (if possible)
27
+ * Impact of the issue, including how an attacker might exploit the issue
28
+
29
+ This information will help us triage your report more quickly.
30
+
31
+ If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs.
32
+
33
+ ## Preferred Languages
34
+
35
+ We prefer all communications to be in English.
36
+
37
+ ## Policy
38
+
39
+ Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd).
40
+
41
+ <!-- END MICROSOFT SECURITY.MD BLOCK -->
SUPPORT.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: The maintainer of this repo has not yet edited this file
2
+
3
+ **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
4
+
5
+ - **No CSS support:** Fill out this template with information about how to file issues and get help.
6
+ - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps.
7
+ - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide.
8
+
9
+ *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10
+
11
+ # Support
12
+
13
+ ## How to file issues and get help
14
+
15
+ This project uses GitHub Issues to track bugs and feature requests. Please search the existing
16
+ issues before filing new issues to avoid duplicates. For new issues, file your bug or
17
+ feature request as a new Issue.
18
+
19
+ For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE
20
+ FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21
+ CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22
+
23
+ ## Microsoft Support Policy
24
+
25
+ Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
app.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_litmodel3d import LitModel3D
3
+
4
+ import os
5
+ os.environ['ATTN_BACKEND'] = 'xformers'
6
+ os.environ['ATTN_BACKEND'] = 'xformers'
7
+ import shutil
8
+ from typing import *
9
+ import torch
10
+ import numpy as np
11
+ import imageio
12
+ from easydict import EasyDict as edict
13
+ from PIL import Image
14
+ from trellis.pipelines import TrellisImageTo3DPipeline
15
+ from trellis.representations import Gaussian, MeshExtractResult
16
+ from trellis.utils import render_utils, postprocessing_utils
17
+
18
+
19
+ MAX_SEED = np.iinfo(np.int32).max
20
+ TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
21
+ os.makedirs(TMP_DIR, exist_ok=True)
22
+
23
+
24
+ def start_session(req: gr.Request):
25
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
26
+ os.makedirs(user_dir, exist_ok=True)
27
+
28
+
29
+ def end_session(req: gr.Request):
30
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
31
+ shutil.rmtree(user_dir)
32
+
33
+
34
+ def preprocess_image(image: Image.Image) -> Image.Image:
35
+ """
36
+ Preprocess the input image.
37
+
38
+ Args:
39
+ image (Image.Image): The input image.
40
+
41
+ Returns:
42
+ Image.Image: The preprocessed image.
43
+ """
44
+ processed_image = pipeline.preprocess_image(image)
45
+ return processed_image
46
+
47
+
48
+ def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
49
+ """
50
+ Preprocess a list of input images.
51
+
52
+ Args:
53
+ images (List[Tuple[Image.Image, str]]): The input images.
54
+
55
+ Returns:
56
+ List[Image.Image]: The preprocessed images.
57
+ """
58
+ images = [image[0] for image in images]
59
+ processed_images = [pipeline.preprocess_image(image) for image in images]
60
+ return processed_images
61
+
62
+
63
+ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
64
+ return {
65
+ 'gaussian': {
66
+ **gs.init_params,
67
+ '_xyz': gs._xyz.cpu().numpy(),
68
+ '_features_dc': gs._features_dc.cpu().numpy(),
69
+ '_scaling': gs._scaling.cpu().numpy(),
70
+ '_rotation': gs._rotation.cpu().numpy(),
71
+ '_opacity': gs._opacity.cpu().numpy(),
72
+ },
73
+ 'mesh': {
74
+ 'vertices': mesh.vertices.cpu().numpy(),
75
+ 'faces': mesh.faces.cpu().numpy(),
76
+ },
77
+ }
78
+
79
+
80
+ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
81
+ gs = Gaussian(
82
+ aabb=state['gaussian']['aabb'],
83
+ sh_degree=state['gaussian']['sh_degree'],
84
+ mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
85
+ scaling_bias=state['gaussian']['scaling_bias'],
86
+ opacity_bias=state['gaussian']['opacity_bias'],
87
+ scaling_activation=state['gaussian']['scaling_activation'],
88
+ )
89
+ gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
90
+ gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
91
+ gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
92
+ gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
93
+ gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
94
+
95
+ mesh = edict(
96
+ vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
97
+ faces=torch.tensor(state['mesh']['faces'], device='cuda'),
98
+ )
99
+
100
+ return gs, mesh
101
+
102
+
103
+ def get_seed(randomize_seed: bool, seed: int) -> int:
104
+ """
105
+ Get the random seed.
106
+ """
107
+ return np.random.randint(0, MAX_SEED) if randomize_seed else seed
108
+
109
+
110
+ def image_to_3d(
111
+ image: Image.Image,
112
+ multiimages: List[Tuple[Image.Image, str]],
113
+ is_multiimage: bool,
114
+ seed: int,
115
+ ss_guidance_strength: float,
116
+ ss_sampling_steps: int,
117
+ slat_guidance_strength: float,
118
+ slat_sampling_steps: int,
119
+ multiimage_algo: Literal["multidiffusion", "stochastic"],
120
+ req: gr.Request,
121
+ ) -> Tuple[dict, str]:
122
+ """
123
+ Convert an image to a 3D model.
124
+
125
+ Args:
126
+ image (Image.Image): The input image.
127
+ multiimages (List[Tuple[Image.Image, str]]): The input images in multi-image mode.
128
+ is_multiimage (bool): Whether is in multi-image mode.
129
+ seed (int): The random seed.
130
+ ss_guidance_strength (float): The guidance strength for sparse structure generation.
131
+ ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
132
+ slat_guidance_strength (float): The guidance strength for structured latent generation.
133
+ slat_sampling_steps (int): The number of sampling steps for structured latent generation.
134
+ multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
135
+
136
+ Returns:
137
+ dict: The information of the generated 3D model.
138
+ str: The path to the video of the 3D model.
139
+ """
140
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
141
+ if not is_multiimage:
142
+ outputs = pipeline.run(
143
+ image,
144
+ seed=seed,
145
+ formats=["gaussian", "mesh"],
146
+ preprocess_image=False,
147
+ sparse_structure_sampler_params={
148
+ "steps": ss_sampling_steps,
149
+ "cfg_strength": ss_guidance_strength,
150
+ },
151
+ slat_sampler_params={
152
+ "steps": slat_sampling_steps,
153
+ "cfg_strength": slat_guidance_strength,
154
+ },
155
+ )
156
+ else:
157
+ outputs = pipeline.run_multi_image(
158
+ [image[0] for image in multiimages],
159
+ seed=seed,
160
+ formats=["gaussian", "mesh"],
161
+ preprocess_image=False,
162
+ sparse_structure_sampler_params={
163
+ "steps": ss_sampling_steps,
164
+ "cfg_strength": ss_guidance_strength,
165
+ },
166
+ slat_sampler_params={
167
+ "steps": slat_sampling_steps,
168
+ "cfg_strength": slat_guidance_strength,
169
+ },
170
+ mode=multiimage_algo,
171
+ )
172
+ video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
173
+ video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
174
+ video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
175
+ video_path = os.path.join(user_dir, 'sample.mp4')
176
+ imageio.mimsave(video_path, video, fps=15)
177
+ state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
178
+ torch.cuda.empty_cache()
179
+ return state, video_path
180
+
181
+
182
+ def extract_glb(
183
+ state: dict,
184
+ mesh_simplify: float,
185
+ texture_size: int,
186
+ req: gr.Request,
187
+ ) -> Tuple[str, str]:
188
+ """
189
+ Extract a GLB file from the 3D model.
190
+
191
+ Args:
192
+ state (dict): The state of the generated 3D model.
193
+ mesh_simplify (float): The mesh simplification factor.
194
+ texture_size (int): The texture resolution.
195
+
196
+ Returns:
197
+ str: The path to the extracted GLB file.
198
+ """
199
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
200
+ gs, mesh = unpack_state(state)
201
+ glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
202
+ glb_path = os.path.join(user_dir, 'sample.glb')
203
+ glb.export(glb_path)
204
+ torch.cuda.empty_cache()
205
+ return glb_path, glb_path
206
+
207
+
208
+ def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
209
+ """
210
+ Extract a Gaussian file from the 3D model.
211
+
212
+ Args:
213
+ state (dict): The state of the generated 3D model.
214
+
215
+ Returns:
216
+ str: The path to the extracted Gaussian file.
217
+ """
218
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
219
+ gs, _ = unpack_state(state)
220
+ gaussian_path = os.path.join(user_dir, 'sample.ply')
221
+ gs.save_ply(gaussian_path)
222
+ torch.cuda.empty_cache()
223
+ return gaussian_path, gaussian_path
224
+
225
+
226
+ def prepare_multi_example() -> List[Image.Image]:
227
+ multi_case = list(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
228
+ images = []
229
+ for case in multi_case:
230
+ _images = []
231
+ for i in range(1, 4):
232
+ img = Image.open(f'assets/example_multi_image/{case}_{i}.png')
233
+ W, H = img.size
234
+ img = img.resize((int(W / H * 512), 512))
235
+ _images.append(np.array(img))
236
+ images.append(Image.fromarray(np.concatenate(_images, axis=1)))
237
+ return images
238
+
239
+
240
+ def split_image(image: Image.Image) -> List[Image.Image]:
241
+ """
242
+ Split an image into multiple views.
243
+ """
244
+ image = np.array(image)
245
+ alpha = image[..., 3]
246
+ alpha = np.any(alpha>0, axis=0)
247
+ start_pos = np.where(~alpha[:-1] & alpha[1:])[0].tolist()
248
+ end_pos = np.where(alpha[:-1] & ~alpha[1:])[0].tolist()
249
+ images = []
250
+ for s, e in zip(start_pos, end_pos):
251
+ images.append(Image.fromarray(image[:, s:e+1]))
252
+ return [preprocess_image(image) for image in images]
253
+
254
+
255
+ with gr.Blocks(delete_cache=(600, 600)) as demo:
256
+ gr.Markdown("""
257
+ ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
258
+ * Upload an image and click "Generate" to create a 3D asset. If the image has alpha channel, it be used as the mask. Otherwise, we use `rembg` to remove the background.
259
+ * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
260
+ """)
261
+
262
+ with gr.Row():
263
+ with gr.Column():
264
+ with gr.Tabs() as input_tabs:
265
+ with gr.Tab(label="Single Image", id=0) as single_image_input_tab:
266
+ image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
267
+ with gr.Tab(label="Multiple Images", id=1) as multiimage_input_tab:
268
+ multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
269
+ gr.Markdown("""
270
+ Input different views of the object in separate images.
271
+
272
+ *NOTE: this is an experimental algorithm without training a specialized model. It may not produce the best results for all images, especially those having different poses or inconsistent details.*
273
+ """)
274
+
275
+ with gr.Accordion(label="Generation Settings", open=False):
276
+ seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
277
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
278
+ gr.Markdown("Stage 1: Sparse Structure Generation")
279
+ with gr.Row():
280
+ ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
281
+ ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
282
+ gr.Markdown("Stage 2: Structured Latent Generation")
283
+ with gr.Row():
284
+ slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
285
+ slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
286
+ multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Multi-image Algorithm", value="stochastic")
287
+
288
+ generate_btn = gr.Button("Generate")
289
+
290
+ with gr.Accordion(label="GLB Extraction Settings", open=False):
291
+ mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
292
+ texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
293
+
294
+ with gr.Row():
295
+ extract_glb_btn = gr.Button("Extract GLB", interactive=False)
296
+ extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
297
+ gr.Markdown("""
298
+ *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
299
+ """)
300
+
301
+ with gr.Column():
302
+ video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
303
+ model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=10.0, height=300)
304
+
305
+ with gr.Row():
306
+ download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
307
+ download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
308
+
309
+ is_multiimage = gr.State(False)
310
+ output_buf = gr.State()
311
+
312
+ # Example images at the bottom of the page
313
+ with gr.Row() as single_image_example:
314
+ examples = gr.Examples(
315
+ examples=[
316
+ f'assets/example_image/{image}'
317
+ for image in os.listdir("assets/example_image")
318
+ ],
319
+ inputs=[image_prompt],
320
+ fn=preprocess_image,
321
+ outputs=[image_prompt],
322
+ run_on_click=True,
323
+ examples_per_page=64,
324
+ )
325
+ with gr.Row(visible=False) as multiimage_example:
326
+ examples_multi = gr.Examples(
327
+ examples=prepare_multi_example(),
328
+ inputs=[image_prompt],
329
+ fn=split_image,
330
+ outputs=[multiimage_prompt],
331
+ run_on_click=True,
332
+ examples_per_page=8,
333
+ )
334
+
335
+ # Handlers
336
+ demo.load(start_session)
337
+ demo.unload(end_session)
338
+
339
+ single_image_input_tab.select(
340
+ lambda: tuple([False, gr.Row.update(visible=True), gr.Row.update(visible=False)]),
341
+ outputs=[is_multiimage, single_image_example, multiimage_example]
342
+ )
343
+ multiimage_input_tab.select(
344
+ lambda: tuple([True, gr.Row.update(visible=False), gr.Row.update(visible=True)]),
345
+ outputs=[is_multiimage, single_image_example, multiimage_example]
346
+ )
347
+
348
+ image_prompt.upload(
349
+ preprocess_image,
350
+ inputs=[image_prompt],
351
+ outputs=[image_prompt],
352
+ )
353
+ multiimage_prompt.upload(
354
+ preprocess_images,
355
+ inputs=[multiimage_prompt],
356
+ outputs=[multiimage_prompt],
357
+ )
358
+
359
+ generate_btn.click(
360
+ get_seed,
361
+ inputs=[randomize_seed, seed],
362
+ outputs=[seed],
363
+ ).then(
364
+ image_to_3d,
365
+ inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],
366
+ outputs=[output_buf, video_output],
367
+ ).then(
368
+ lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
369
+ outputs=[extract_glb_btn, extract_gs_btn],
370
+ )
371
+
372
+ video_output.clear(
373
+ lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
374
+ outputs=[extract_glb_btn, extract_gs_btn],
375
+ )
376
+
377
+ extract_glb_btn.click(
378
+ extract_glb,
379
+ inputs=[output_buf, mesh_simplify, texture_size],
380
+ outputs=[model_output, download_glb],
381
+ ).then(
382
+ lambda: gr.Button(interactive=True),
383
+ outputs=[download_glb],
384
+ )
385
+
386
+ extract_gs_btn.click(
387
+ extract_gaussian,
388
+ inputs=[output_buf],
389
+ outputs=[model_output, download_gs],
390
+ ).then(
391
+ lambda: gr.Button(interactive=True),
392
+ outputs=[download_gs],
393
+ )
394
+
395
+ model_output.clear(
396
+ lambda: gr.Button(interactive=False),
397
+ outputs=[download_glb],
398
+ )
399
+
400
+
401
+ # Launch the Gradio app
402
+ if __name__ == "__main__":
403
+ pipeline = TrellisImageTo3DPipeline.from_pretrained("jetx/trellis-image-large")
404
+ pipeline.cuda()
405
+ demo.launch(share=True)
app1.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_litmodel3d import LitModel3D
3
+
4
+ import os
5
+ os.environ['ATTN_BACKEND'] = 'xformers'
6
+ os.environ['ATTN_BACKEND'] = 'xformers'
7
+ import shutil
8
+ from typing import *
9
+ import torch
10
+ import numpy as np
11
+ import imageio
12
+ from easydict import EasyDict as edict
13
+ from PIL import Image
14
+ from trellis.pipelines import TrellisImageTo3DPipeline
15
+ from trellis.representations import Gaussian, MeshExtractResult
16
+ from trellis.utils import render_utils, postprocessing_utils
17
+
18
+
19
+ MAX_SEED = np.iinfo(np.int32).max
20
+ TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
21
+ os.makedirs(TMP_DIR, exist_ok=True)
22
+
23
+
24
+ def start_session(req: gr.Request):
25
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
26
+ os.makedirs(user_dir, exist_ok=True)
27
+
28
+
29
+ def end_session(req: gr.Request):
30
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
31
+ shutil.rmtree(user_dir)
32
+
33
+
34
+ def preprocess_image(image: Image.Image) -> Image.Image:
35
+ """
36
+ Preprocess the input image.
37
+
38
+ Args:
39
+ image (Image.Image): The input image.
40
+
41
+ Returns:
42
+ Image.Image: The preprocessed image.
43
+ """
44
+ processed_image = pipeline.preprocess_image(image)
45
+ return processed_image
46
+
47
+
48
+ def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
49
+ """
50
+ Preprocess a list of input images.
51
+
52
+ Args:
53
+ images (List[Tuple[Image.Image, str]]): The input images.
54
+
55
+ Returns:
56
+ List[Image.Image]: The preprocessed images.
57
+ """
58
+ images = [image[0] for image in images]
59
+ processed_images = [pipeline.preprocess_image(image) for image in images]
60
+ return processed_images
61
+
62
+
63
+ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
64
+ return {
65
+ 'gaussian': {
66
+ **gs.init_params,
67
+ '_xyz': gs._xyz.cpu().numpy(),
68
+ '_features_dc': gs._features_dc.cpu().numpy(),
69
+ '_scaling': gs._scaling.cpu().numpy(),
70
+ '_rotation': gs._rotation.cpu().numpy(),
71
+ '_opacity': gs._opacity.cpu().numpy(),
72
+ },
73
+ 'mesh': {
74
+ 'vertices': mesh.vertices.cpu().numpy(),
75
+ 'faces': mesh.faces.cpu().numpy(),
76
+ },
77
+ }
78
+
79
+
80
+ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
81
+ gs = Gaussian(
82
+ aabb=state['gaussian']['aabb'],
83
+ sh_degree=state['gaussian']['sh_degree'],
84
+ mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
85
+ scaling_bias=state['gaussian']['scaling_bias'],
86
+ opacity_bias=state['gaussian']['opacity_bias'],
87
+ scaling_activation=state['gaussian']['scaling_activation'],
88
+ )
89
+ gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
90
+ gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
91
+ gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
92
+ gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
93
+ gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
94
+
95
+ mesh = edict(
96
+ vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
97
+ faces=torch.tensor(state['mesh']['faces'], device='cuda'),
98
+ )
99
+
100
+ return gs, mesh
101
+
102
+
103
+ def get_seed(randomize_seed: bool, seed: int) -> int:
104
+ """
105
+ Get the random seed.
106
+ """
107
+ return np.random.randint(0, MAX_SEED) if randomize_seed else seed
108
+
109
+
110
+ def image_to_3d(
111
+ image: Image.Image,
112
+ multiimages: List[Tuple[Image.Image, str]],
113
+ is_multiimage: bool,
114
+ seed: int,
115
+ ss_guidance_strength: float,
116
+ ss_sampling_steps: int,
117
+ slat_guidance_strength: float,
118
+ slat_sampling_steps: int,
119
+ multiimage_algo: Literal["multidiffusion", "stochastic"],
120
+ req: gr.Request,
121
+ ) -> Tuple[dict, str]:
122
+ """
123
+ Convert an image to a 3D model.
124
+
125
+ Args:
126
+ image (Image.Image): The input image.
127
+ multiimages (List[Tuple[Image.Image, str]]): The input images in multi-image mode.
128
+ is_multiimage (bool): Whether is in multi-image mode.
129
+ seed (int): The random seed.
130
+ ss_guidance_strength (float): The guidance strength for sparse structure generation.
131
+ ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
132
+ slat_guidance_strength (float): The guidance strength for structured latent generation.
133
+ slat_sampling_steps (int): The number of sampling steps for structured latent generation.
134
+ multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
135
+
136
+ Returns:
137
+ dict: The information of the generated 3D model.
138
+ str: The path to the video of the 3D model.
139
+ """
140
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
141
+ if not is_multiimage:
142
+ outputs = pipeline.run(
143
+ image,
144
+ seed=seed,
145
+ formats=["gaussian", "mesh"],
146
+ preprocess_image=False,
147
+ sparse_structure_sampler_params={
148
+ "steps": ss_sampling_steps,
149
+ "cfg_strength": ss_guidance_strength,
150
+ },
151
+ slat_sampler_params={
152
+ "steps": slat_sampling_steps,
153
+ "cfg_strength": slat_guidance_strength,
154
+ },
155
+ )
156
+ else:
157
+ outputs = pipeline.run_multi_image(
158
+ [image[0] for image in multiimages],
159
+ seed=seed,
160
+ formats=["gaussian", "mesh"],
161
+ preprocess_image=False,
162
+ sparse_structure_sampler_params={
163
+ "steps": ss_sampling_steps,
164
+ "cfg_strength": ss_guidance_strength,
165
+ },
166
+ slat_sampler_params={
167
+ "steps": slat_sampling_steps,
168
+ "cfg_strength": slat_guidance_strength,
169
+ },
170
+ mode=multiimage_algo,
171
+ )
172
+ video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
173
+ video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
174
+ video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
175
+ video_path = os.path.join(user_dir, 'sample.mp4')
176
+ imageio.mimsave(video_path, video, fps=15)
177
+ state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
178
+ torch.cuda.empty_cache()
179
+ return state, video_path
180
+
181
+
182
+ def extract_glb(
183
+ state: dict,
184
+ mesh_simplify: float,
185
+ texture_size: int,
186
+ req: gr.Request,
187
+ ) -> Tuple[str, str]:
188
+ """
189
+ Extract a GLB file from the 3D model.
190
+
191
+ Args:
192
+ state (dict): The state of the generated 3D model.
193
+ mesh_simplify (float): The mesh simplification factor.
194
+ texture_size (int): The texture resolution.
195
+
196
+ Returns:
197
+ str: The path to the extracted GLB file.
198
+ """
199
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
200
+ gs, mesh = unpack_state(state)
201
+ glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
202
+ glb_path = os.path.join(user_dir, 'sample.glb')
203
+ glb.export(glb_path)
204
+ torch.cuda.empty_cache()
205
+ return glb_path, glb_path
206
+
207
+
208
+ def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
209
+ """
210
+ Extract a Gaussian file from the 3D model.
211
+
212
+ Args:
213
+ state (dict): The state of the generated 3D model.
214
+
215
+ Returns:
216
+ str: The path to the extracted Gaussian file.
217
+ """
218
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
219
+ gs, _ = unpack_state(state)
220
+ gaussian_path = os.path.join(user_dir, 'sample.ply')
221
+ gs.save_ply(gaussian_path)
222
+ torch.cuda.empty_cache()
223
+ return gaussian_path, gaussian_path
224
+
225
+
226
+ def prepare_multi_example() -> List[Image.Image]:
227
+ multi_case = list(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
228
+ images = []
229
+ for case in multi_case:
230
+ _images = []
231
+ for i in range(1, 4):
232
+ img = Image.open(f'assets/example_multi_image/{case}_{i}.png')
233
+ W, H = img.size
234
+ img = img.resize((int(W / H * 512), 512))
235
+ _images.append(np.array(img))
236
+ images.append(Image.fromarray(np.concatenate(_images, axis=1)))
237
+ return images
238
+
239
+
240
+ def split_image(image: Image.Image) -> List[Image.Image]:
241
+ """
242
+ Split an image into multiple views.
243
+ """
244
+ image = np.array(image)
245
+ alpha = image[..., 3]
246
+ alpha = np.any(alpha>0, axis=0)
247
+ start_pos = np.where(~alpha[:-1] & alpha[1:])[0].tolist()
248
+ end_pos = np.where(alpha[:-1] & ~alpha[1:])[0].tolist()
249
+ images = []
250
+ for s, e in zip(start_pos, end_pos):
251
+ images.append(Image.fromarray(image[:, s:e+1]))
252
+ return [preprocess_image(image) for image in images]
253
+
254
+
255
+ with gr.Blocks(delete_cache=(600, 600)) as demo:
256
+ gr.Markdown("""
257
+ ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
258
+ * Upload an image and click "Generate" to create a 3D asset. If the image has alpha channel, it be used as the mask. Otherwise, we use `rembg` to remove the background.
259
+ * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
260
+ """)
261
+
262
+ with gr.Row():
263
+ with gr.Column():
264
+ with gr.Tabs() as input_tabs:
265
+ with gr.Tab(label="Single Image", id=0) as single_image_input_tab:
266
+ image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
267
+ with gr.Tab(label="Multiple Images", id=1) as multiimage_input_tab:
268
+ multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
269
+ gr.Markdown("""
270
+ Input different views of the object in separate images.
271
+
272
+ *NOTE: this is an experimental algorithm without training a specialized model. It may not produce the best results for all images, especially those having different poses or inconsistent details.*
273
+ """)
274
+
275
+ with gr.Accordion(label="Generation Settings", open=False):
276
+ seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
277
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
278
+ gr.Markdown("Stage 1: Sparse Structure Generation")
279
+ with gr.Row():
280
+ ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
281
+ ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
282
+ gr.Markdown("Stage 2: Structured Latent Generation")
283
+ with gr.Row():
284
+ slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
285
+ slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
286
+ multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Multi-image Algorithm", value="stochastic")
287
+
288
+ generate_btn = gr.Button("Generate")
289
+
290
+ with gr.Accordion(label="GLB Extraction Settings", open=False):
291
+ mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
292
+ texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
293
+
294
+ with gr.Row():
295
+ extract_glb_btn = gr.Button("Extract GLB", interactive=False)
296
+ extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
297
+ gr.Markdown("""
298
+ *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
299
+ """)
300
+
301
+ with gr.Column():
302
+ video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
303
+ model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=10.0, height=300)
304
+
305
+ with gr.Row():
306
+ download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
307
+ download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
308
+
309
+ is_multiimage = gr.State(False)
310
+ output_buf = gr.State()
311
+
312
+ # Example images at the bottom of the page
313
+ with gr.Row() as single_image_example:
314
+ examples = gr.Examples(
315
+ examples=[
316
+ f'assets/example_image/{image}'
317
+ for image in os.listdir("assets/example_image")
318
+ ],
319
+ inputs=[image_prompt],
320
+ fn=preprocess_image,
321
+ outputs=[image_prompt],
322
+ run_on_click=True,
323
+ examples_per_page=64,
324
+ )
325
+ with gr.Row(visible=False) as multiimage_example:
326
+ examples_multi = gr.Examples(
327
+ examples=prepare_multi_example(),
328
+ inputs=[image_prompt],
329
+ fn=split_image,
330
+ outputs=[multiimage_prompt],
331
+ run_on_click=True,
332
+ examples_per_page=8,
333
+ )
334
+
335
+ # Handlers
336
+ demo.load(start_session)
337
+ demo.unload(end_session)
338
+
339
+ single_image_input_tab.select(
340
+ lambda: tuple([False, gr.Row.update(visible=True), gr.Row.update(visible=False)]),
341
+ outputs=[is_multiimage, single_image_example, multiimage_example]
342
+ )
343
+ multiimage_input_tab.select(
344
+ lambda: tuple([True, gr.Row.update(visible=False), gr.Row.update(visible=True)]),
345
+ outputs=[is_multiimage, single_image_example, multiimage_example]
346
+ )
347
+
348
+ image_prompt.upload(
349
+ preprocess_image,
350
+ inputs=[image_prompt],
351
+ outputs=[image_prompt],
352
+ )
353
+ multiimage_prompt.upload(
354
+ preprocess_images,
355
+ inputs=[multiimage_prompt],
356
+ outputs=[multiimage_prompt],
357
+ )
358
+
359
+ generate_btn.click(
360
+ get_seed,
361
+ inputs=[randomize_seed, seed],
362
+ outputs=[seed],
363
+ ).then(
364
+ image_to_3d,
365
+ inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],
366
+ outputs=[output_buf, video_output],
367
+ ).then(
368
+ lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
369
+ outputs=[extract_glb_btn, extract_gs_btn],
370
+ )
371
+
372
+ video_output.clear(
373
+ lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
374
+ outputs=[extract_glb_btn, extract_gs_btn],
375
+ )
376
+
377
+ extract_glb_btn.click(
378
+ extract_glb,
379
+ inputs=[output_buf, mesh_simplify, texture_size],
380
+ outputs=[model_output, download_glb],
381
+ ).then(
382
+ lambda: gr.Button(interactive=True),
383
+ outputs=[download_glb],
384
+ )
385
+
386
+ extract_gs_btn.click(
387
+ extract_gaussian,
388
+ inputs=[output_buf],
389
+ outputs=[model_output, download_gs],
390
+ ).then(
391
+ lambda: gr.Button(interactive=True),
392
+ outputs=[download_gs],
393
+ )
394
+
395
+ model_output.clear(
396
+ lambda: gr.Button(interactive=False),
397
+ outputs=[download_glb],
398
+ )
399
+
400
+
401
+ # Launch the Gradio app
402
+ if __name__ == "__main__":
403
+ pipeline = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
404
+ pipeline.cuda()
405
+ demo.launch(share=True)
app_text.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_litmodel3d import LitModel3D
3
+
4
+ import os
5
+ import shutil
6
+ from typing import *
7
+ import torch
8
+ import numpy as np
9
+ import imageio
10
+ from easydict import EasyDict as edict
11
+ from trellis.pipelines import TrellisTextTo3DPipeline
12
+ from trellis.representations import Gaussian, MeshExtractResult
13
+ from trellis.utils import render_utils, postprocessing_utils
14
+
15
+
16
+ MAX_SEED = np.iinfo(np.int32).max
17
+ TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
18
+ os.makedirs(TMP_DIR, exist_ok=True)
19
+
20
+
21
+ def start_session(req: gr.Request):
22
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
23
+ os.makedirs(user_dir, exist_ok=True)
24
+
25
+
26
+ def end_session(req: gr.Request):
27
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
28
+ shutil.rmtree(user_dir)
29
+
30
+
31
+ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
32
+ return {
33
+ 'gaussian': {
34
+ **gs.init_params,
35
+ '_xyz': gs._xyz.cpu().numpy(),
36
+ '_features_dc': gs._features_dc.cpu().numpy(),
37
+ '_scaling': gs._scaling.cpu().numpy(),
38
+ '_rotation': gs._rotation.cpu().numpy(),
39
+ '_opacity': gs._opacity.cpu().numpy(),
40
+ },
41
+ 'mesh': {
42
+ 'vertices': mesh.vertices.cpu().numpy(),
43
+ 'faces': mesh.faces.cpu().numpy(),
44
+ },
45
+ }
46
+
47
+
48
+ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
49
+ gs = Gaussian(
50
+ aabb=state['gaussian']['aabb'],
51
+ sh_degree=state['gaussian']['sh_degree'],
52
+ mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
53
+ scaling_bias=state['gaussian']['scaling_bias'],
54
+ opacity_bias=state['gaussian']['opacity_bias'],
55
+ scaling_activation=state['gaussian']['scaling_activation'],
56
+ )
57
+ gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
58
+ gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
59
+ gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
60
+ gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
61
+ gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
62
+
63
+ mesh = edict(
64
+ vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
65
+ faces=torch.tensor(state['mesh']['faces'], device='cuda'),
66
+ )
67
+
68
+ return gs, mesh
69
+
70
+
71
+ def get_seed(randomize_seed: bool, seed: int) -> int:
72
+ """
73
+ Get the random seed.
74
+ """
75
+ return np.random.randint(0, MAX_SEED) if randomize_seed else seed
76
+
77
+
78
+ def text_to_3d(
79
+ prompt: str,
80
+ seed: int,
81
+ ss_guidance_strength: float,
82
+ ss_sampling_steps: int,
83
+ slat_guidance_strength: float,
84
+ slat_sampling_steps: int,
85
+ req: gr.Request,
86
+ ) -> Tuple[dict, str]:
87
+ """
88
+ Convert an text prompt to a 3D model.
89
+
90
+ Args:
91
+ prompt (str): The text prompt.
92
+ seed (int): The random seed.
93
+ ss_guidance_strength (float): The guidance strength for sparse structure generation.
94
+ ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
95
+ slat_guidance_strength (float): The guidance strength for structured latent generation.
96
+ slat_sampling_steps (int): The number of sampling steps for structured latent generation.
97
+
98
+ Returns:
99
+ dict: The information of the generated 3D model.
100
+ str: The path to the video of the 3D model.
101
+ """
102
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
103
+ outputs = pipeline.run(
104
+ prompt,
105
+ seed=seed,
106
+ formats=["gaussian", "mesh"],
107
+ sparse_structure_sampler_params={
108
+ "steps": ss_sampling_steps,
109
+ "cfg_strength": ss_guidance_strength,
110
+ },
111
+ slat_sampler_params={
112
+ "steps": slat_sampling_steps,
113
+ "cfg_strength": slat_guidance_strength,
114
+ },
115
+ )
116
+ video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
117
+ video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
118
+ video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
119
+ video_path = os.path.join(user_dir, 'sample.mp4')
120
+ imageio.mimsave(video_path, video, fps=15)
121
+ state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
122
+ torch.cuda.empty_cache()
123
+ return state, video_path
124
+
125
+
126
+ def extract_glb(
127
+ state: dict,
128
+ mesh_simplify: float,
129
+ texture_size: int,
130
+ req: gr.Request,
131
+ ) -> Tuple[str, str]:
132
+ """
133
+ Extract a GLB file from the 3D model.
134
+
135
+ Args:
136
+ state (dict): The state of the generated 3D model.
137
+ mesh_simplify (float): The mesh simplification factor.
138
+ texture_size (int): The texture resolution.
139
+
140
+ Returns:
141
+ str: The path to the extracted GLB file.
142
+ """
143
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
144
+ gs, mesh = unpack_state(state)
145
+ glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
146
+ glb_path = os.path.join(user_dir, 'sample.glb')
147
+ glb.export(glb_path)
148
+ torch.cuda.empty_cache()
149
+ return glb_path, glb_path
150
+
151
+
152
+ def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
153
+ """
154
+ Extract a Gaussian file from the 3D model.
155
+
156
+ Args:
157
+ state (dict): The state of the generated 3D model.
158
+
159
+ Returns:
160
+ str: The path to the extracted Gaussian file.
161
+ """
162
+ user_dir = os.path.join(TMP_DIR, str(req.session_hash))
163
+ gs, _ = unpack_state(state)
164
+ gaussian_path = os.path.join(user_dir, 'sample.ply')
165
+ gs.save_ply(gaussian_path)
166
+ torch.cuda.empty_cache()
167
+ return gaussian_path, gaussian_path
168
+
169
+
170
+ with gr.Blocks(delete_cache=(600, 600)) as demo:
171
+ gr.Markdown("""
172
+ ## Text to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
173
+ * Type a text prompt and click "Generate" to create a 3D asset.
174
+ * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
175
+ """)
176
+
177
+ with gr.Row():
178
+ with gr.Column():
179
+ text_prompt = gr.Textbox(label="Text Prompt", lines=5)
180
+
181
+ with gr.Accordion(label="Generation Settings", open=False):
182
+ seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
183
+ randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
184
+ gr.Markdown("Stage 1: Sparse Structure Generation")
185
+ with gr.Row():
186
+ ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
187
+ ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=25, step=1)
188
+ gr.Markdown("Stage 2: Structured Latent Generation")
189
+ with gr.Row():
190
+ slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
191
+ slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=25, step=1)
192
+
193
+ generate_btn = gr.Button("Generate")
194
+
195
+ with gr.Accordion(label="GLB Extraction Settings", open=False):
196
+ mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
197
+ texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
198
+
199
+ with gr.Row():
200
+ extract_glb_btn = gr.Button("Extract GLB", interactive=False)
201
+ extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
202
+ gr.Markdown("""
203
+ *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
204
+ """)
205
+
206
+ with gr.Column():
207
+ video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
208
+ model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=10.0, height=300)
209
+
210
+ with gr.Row():
211
+ download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
212
+ download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
213
+
214
+ output_buf = gr.State()
215
+
216
+ # Handlers
217
+ demo.load(start_session)
218
+ demo.unload(end_session)
219
+
220
+ generate_btn.click(
221
+ get_seed,
222
+ inputs=[randomize_seed, seed],
223
+ outputs=[seed],
224
+ ).then(
225
+ text_to_3d,
226
+ inputs=[text_prompt, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
227
+ outputs=[output_buf, video_output],
228
+ ).then(
229
+ lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
230
+ outputs=[extract_glb_btn, extract_gs_btn],
231
+ )
232
+
233
+ video_output.clear(
234
+ lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
235
+ outputs=[extract_glb_btn, extract_gs_btn],
236
+ )
237
+
238
+ extract_glb_btn.click(
239
+ extract_glb,
240
+ inputs=[output_buf, mesh_simplify, texture_size],
241
+ outputs=[model_output, download_glb],
242
+ ).then(
243
+ lambda: gr.Button(interactive=True),
244
+ outputs=[download_glb],
245
+ )
246
+
247
+ extract_gs_btn.click(
248
+ extract_gaussian,
249
+ inputs=[output_buf],
250
+ outputs=[model_output, download_gs],
251
+ ).then(
252
+ lambda: gr.Button(interactive=True),
253
+ outputs=[download_gs],
254
+ )
255
+
256
+ model_output.clear(
257
+ lambda: gr.Button(interactive=False),
258
+ outputs=[download_glb],
259
+ )
260
+
261
+
262
+ # Launch the Gradio app
263
+ if __name__ == "__main__":
264
+ pipeline = TrellisTextTo3DPipeline.from_pretrained("microsoft/TRELLIS-text-xlarge")
265
+ pipeline.cuda()
266
+ demo.launch()
assets/T.ply ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163e3efe355f4c7fe36eb3b55563d1897ac1384c5ab2eb1acfc68700de2dc31b
3
+ size 2089367
assets/example_image/T.png ADDED

Git LFS Details

  • SHA256: e29ddc83a5bd3a05fe9b34732169bc4ea7131f7c36527fdc5f626a90a73076d2
  • Pointer size: 131 Bytes
  • Size of remote file: 955 kB
assets/example_image/typical_building_building.png ADDED

Git LFS Details

  • SHA256: 8faa11d557be95c000c475247e61a773d511114c7d1e517c04f8d3d88a6049ec
  • Pointer size: 131 Bytes
  • Size of remote file: 547 kB
assets/example_image/typical_building_castle.png ADDED

Git LFS Details

  • SHA256: 076f0554b087b921863643d2b1ab3e0572a13a347fd66bc29cd9d194034affae
  • Pointer size: 131 Bytes
  • Size of remote file: 426 kB
assets/example_image/typical_building_colorful_cottage.png ADDED

Git LFS Details

  • SHA256: 687305b4e35da759692be0de614d728583a2a9cd2fd3a55593fa753e567d0d47
  • Pointer size: 131 Bytes
  • Size of remote file: 609 kB
assets/example_image/typical_building_maya_pyramid.png ADDED

Git LFS Details

  • SHA256: 4d514f7f4db244ee184af4ddfbc5948d417b4e5bf1c6ee5f5a592679561690df
  • Pointer size: 131 Bytes
  • Size of remote file: 232 kB
assets/example_image/typical_building_mushroom.png ADDED

Git LFS Details

  • SHA256: de9b72d3e13e967e70844ddc54643832a84a1b35ca043a11e7c774371d0ccdab
  • Pointer size: 131 Bytes
  • Size of remote file: 488 kB
assets/example_image/typical_building_space_station.png ADDED

Git LFS Details

  • SHA256: 212c7b4c27ba1e01a7908dbc7f245e7115850eadbc9974aa726327cf35062846
  • Pointer size: 131 Bytes
  • Size of remote file: 620 kB
assets/example_image/typical_creature_dragon.png ADDED

Git LFS Details

  • SHA256: 0e8d6720dfa1e7b332b76e897e617b7f0863187f30879451b4724f482c84185a
  • Pointer size: 131 Bytes
  • Size of remote file: 564 kB
assets/example_image/typical_creature_elephant.png ADDED

Git LFS Details

  • SHA256: 86a171e37a3d781e7215977f565cd63e813341c1f89e2c586fa61937e4ed6916
  • Pointer size: 131 Bytes
  • Size of remote file: 482 kB
assets/example_image/typical_creature_furry.png ADDED

Git LFS Details

  • SHA256: 5b5445b8f1996cf6d72497b2d7564c656f4048e6c1fa626fd7bb3ee582fee671
  • Pointer size: 131 Bytes
  • Size of remote file: 648 kB
assets/example_image/typical_creature_quadruped.png ADDED

Git LFS Details

  • SHA256: 7469f43f58389adec101e9685f60188bd4e7fbede77eef975102f6a8865bc786
  • Pointer size: 131 Bytes
  • Size of remote file: 685 kB
assets/example_image/typical_creature_robot_crab.png ADDED

Git LFS Details

  • SHA256: d7e716abe8f8895080f562d1dc26b14fa0e20a05aa5beb2770c6fb3b87b3476a
  • Pointer size: 131 Bytes
  • Size of remote file: 594 kB
assets/example_image/typical_creature_robot_dinosour.png ADDED

Git LFS Details

  • SHA256: d0986f29557a6fddf9b52b5251a6b6103728c61e201b1cfad1e709b090b72f56
  • Pointer size: 131 Bytes
  • Size of remote file: 632 kB
assets/example_image/typical_creature_rock_monster.png ADDED

Git LFS Details

  • SHA256: e29458a6110bee8374c0d4d12471e7167a6c1c98c18f6e2d7ff4f5f0ca3fa01b
  • Pointer size: 131 Bytes
  • Size of remote file: 648 kB
assets/example_image/typical_humanoid_block_robot.png ADDED

Git LFS Details

  • SHA256: 3a0acbb532668e1bf35f3eef5bcbfdd094c22219ef2d837fa01ccf51cce75ca3
  • Pointer size: 131 Bytes
  • Size of remote file: 441 kB
assets/example_image/typical_humanoid_dragonborn.png ADDED

Git LFS Details

  • SHA256: 5d7c547909a6c12da55dbab1c1c98181ff09e58c9ba943682ca105e71be9548e
  • Pointer size: 131 Bytes
  • Size of remote file: 481 kB
assets/example_image/typical_humanoid_dwarf.png ADDED

Git LFS Details

  • SHA256: a4a7c157d5d8071128c27594e45a7a03e5113b3333b7f1c5ff1379481e3e0264
  • Pointer size: 131 Bytes
  • Size of remote file: 498 kB
assets/example_image/typical_humanoid_goblin.png ADDED

Git LFS Details

  • SHA256: 2b0e9a04ae3e7bef44b7180a70306f95374b60727ffa0f6f01fd6c746595cd77
  • Pointer size: 131 Bytes
  • Size of remote file: 496 kB
assets/example_image/typical_humanoid_mech.png ADDED

Git LFS Details

  • SHA256: a244ec54b7984e646e54d433de6897657081dd5b9cd5ccd3d865328d813beb49
  • Pointer size: 131 Bytes
  • Size of remote file: 850 kB
assets/example_image/typical_misc_crate.png ADDED

Git LFS Details

  • SHA256: 59fd9884301faca93265166d90078e8c31e76c7f93524b1db31975df4b450748
  • Pointer size: 131 Bytes
  • Size of remote file: 642 kB
assets/example_image/typical_misc_fireplace.png ADDED

Git LFS Details

  • SHA256: 2288c034603e289192d63cbc73565107caefd99e81c4b7afa2983c8b13e34440
  • Pointer size: 131 Bytes
  • Size of remote file: 558 kB
assets/example_image/typical_misc_gate.png ADDED

Git LFS Details

  • SHA256: ec8db5389b74fe56b826e3c6d860234541033387350e09268591c46d411cc8e9
  • Pointer size: 131 Bytes
  • Size of remote file: 572 kB
assets/example_image/typical_misc_lantern.png ADDED

Git LFS Details

  • SHA256: e17bd83adf433ebfca17abd220097b2b7f08affc649518bd7822e03797e83d41
  • Pointer size: 131 Bytes
  • Size of remote file: 300 kB
assets/example_image/typical_misc_magicbook.png ADDED

Git LFS Details

  • SHA256: aff9c14589c340e31b61bf82e4506d77d72c511e741260fa1e600cefa4e103e6
  • Pointer size: 131 Bytes
  • Size of remote file: 496 kB
assets/example_image/typical_misc_mailbox.png ADDED

Git LFS Details

  • SHA256: 01e86a5d68edafb7e11d7a86f7e8081f5ed1b02578198a3271554c5fb8fb9fcf
  • Pointer size: 131 Bytes
  • Size of remote file: 631 kB
assets/example_image/typical_misc_monster_chest.png ADDED

Git LFS Details

  • SHA256: c57a598e842225a31b9770bf3bbb9ae86197ec57d0c2883caf8cb5eed4908fbc
  • Pointer size: 131 Bytes
  • Size of remote file: 690 kB
assets/example_image/typical_misc_paper_machine.png ADDED

Git LFS Details

  • SHA256: 2d55400ae5d4df2377258400d800ece75766d5274e80ce07c3b29a4d1fd1fa36
  • Pointer size: 131 Bytes
  • Size of remote file: 614 kB
assets/example_image/typical_misc_phonograph.png ADDED

Git LFS Details

  • SHA256: 14fff9a27ea769d3ca711e9ff55ab3d9385486a5e8b99117f506df326a0a357e
  • Pointer size: 131 Bytes
  • Size of remote file: 517 kB
assets/example_image/typical_misc_portal2.png ADDED

Git LFS Details

  • SHA256: 57aab2bba56bc946523a3fca77ca70651a4ad8c6fbf1b91a1a824418df48faae
  • Pointer size: 131 Bytes
  • Size of remote file: 386 kB
assets/example_image/typical_misc_storage_chest.png ADDED

Git LFS Details

  • SHA256: 0e4ac1c67fdda902ecb709447b8defd949c738954c844c1b8364b8e3f7d9e55a
  • Pointer size: 131 Bytes
  • Size of remote file: 632 kB
assets/example_image/typical_misc_telephone.png ADDED

Git LFS Details

  • SHA256: 00048be46234a2709c12614b04cbad61c6e3c7e63c2a4ef33d999185f5393e36
  • Pointer size: 131 Bytes
  • Size of remote file: 648 kB
assets/example_image/typical_misc_television.png ADDED

Git LFS Details

  • SHA256: 6a1947b737398bf535ec212668a4d78cd38fe84cf9da1ccd6c0c0d838337755e
  • Pointer size: 131 Bytes
  • Size of remote file: 627 kB
assets/example_image/typical_misc_workbench.png ADDED

Git LFS Details

  • SHA256: a6d9ed4d005a5253b8571fd976b0d102e293512d7b5a8ed5e3f7f17c5f4e19da
  • Pointer size: 131 Bytes
  • Size of remote file: 463 kB
assets/example_image/typical_vehicle_biplane.png ADDED

Git LFS Details

  • SHA256: c73e98112eb603b4ba635b8965cad7807d0588f083811bc2faa0c7ab9668a65a
  • Pointer size: 131 Bytes
  • Size of remote file: 574 kB
assets/example_image/typical_vehicle_bulldozer.png ADDED

Git LFS Details

  • SHA256: 23d821b4daea61cbea28cc6ddd3ae46712514dfcdff995c2664f5a70d21f4ef3
  • Pointer size: 131 Bytes
  • Size of remote file: 693 kB
assets/example_image/typical_vehicle_cart.png ADDED

Git LFS Details

  • SHA256: b72c04a2aa5cf57717c05151a2982d6dc31afde130d5e830adf37a84a70616cb
  • Pointer size: 131 Bytes
  • Size of remote file: 693 kB