salma-remyx commited on
Commit
3b9a814
·
verified ·
1 Parent(s): 8bfadbb

adding benchmark breakdowns

Browse files
Files changed (1) hide show
  1. README.md +229 -48
README.md CHANGED
@@ -21,54 +21,235 @@ tags:
21
  - quantitative-spatial-reasoning
22
  new_version: remyxai/SpaceThinker-Qwen2.5VL-3B
23
  model-index:
24
- - name: SpaceQwen2.5-VL-3B-Instruct
25
- results:
26
- - task:
27
- type: visual-question-answering
28
- name: Spatial Reasoning
29
- dataset:
30
- name: 3DSRBench
31
- type: benchmark
32
- metrics:
33
- - type: success_rate
34
- value: 0.515
35
- name: Overall Success Rate
36
- - type: success_rate
37
- value: 0.5
38
- name: Overall Success Rate
39
- - type: success_rate
40
- value: 0.3045
41
- name: Overall Success Rate
42
- - type: success_rate
43
- value: 0.5767
44
- name: Overall Success Rate
45
- - type: success_rate
46
- value: 0.3663
47
- name: Overall Success Rate
48
- - type: success_rate
49
- value: 0.33
50
- name: Overall Success Rate
51
- - type: success_rate
52
- value: 0.4392
53
- name: Overall Success Rate
54
- - type: success_rate
55
- value: 0.6554
56
- name: Overall Success Rate
57
- - type: success_rate
58
- value: 0.2615
59
- name: Overall Success Rate
60
- - type: success_rate
61
- value: 0.2322
62
- name: Overall Success Rate
63
- - type: success_rate
64
- value: 0.7373
65
- name: Overall Success Rate
66
- - type: success_rate
67
- value: 0.5179
68
- name: Overall Success Rate
69
- - type: success_rate
70
- value: 0.4879
71
- name: Overall Success Rate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  ---
73
 
74
  <img src="https://cdn-uploads.huggingface.co/production/uploads/647777304ae93470ffc28913/v4edJliSy46xBA8g5ZXf8.png" width="500"/>
 
21
  - quantitative-spatial-reasoning
22
  new_version: remyxai/SpaceThinker-Qwen2.5VL-3B
23
  model-index:
24
+ - name: SpaceQwen2.5-VL-3B-Instruct
25
+ results:
26
+ - task:
27
+ type: visual-question-answering
28
+ name: Spatial Reasoning
29
+ dataset:
30
+ name: 3DSRBench
31
+ type: benchmark
32
+ metrics:
33
+ - type: success_rate
34
+ name: Overall Success Rate
35
+ value: 0.515
36
+ results_by_subcategory:
37
+ - name: 3D Positional Relation / Orientation
38
+ success_rate: 0.4706
39
+ - name: Object Localization / 3D Localization
40
+ success_rate: 0.5629
41
+ - name: Object Properties / Size
42
+ success_rate: 0.5116
43
+ - task:
44
+ type: visual-question-answering
45
+ name: Spatial Reasoning
46
+ dataset:
47
+ name: BLINK
48
+ type: benchmark
49
+ metrics:
50
+ - type: success_rate
51
+ name: Overall Success Rate
52
+ value: 0.5
53
+ results_by_subcategory:
54
+ - name: 3D Positional Relation / Orientation
55
+ success_rate: 0.6503
56
+ - name: Counting / Object Counting
57
+ success_rate: 0.6083
58
+ - name: Depth and Distance / Relative
59
+ success_rate: 0.5161
60
+ - name: Object Localization / 2D Localization
61
+ success_rate: 0.4426
62
+ - name: Point and Object Tracking / Point Correspondence
63
+ success_rate: 0.2849
64
+ - task:
65
+ type: visual-question-answering
66
+ name: Spatial Reasoning
67
+ dataset:
68
+ name: MMIU
69
+ type: benchmark
70
+ metrics:
71
+ - type: success_rate
72
+ name: Overall Success Rate
73
+ value: 0.3045
74
+ results_by_subcategory:
75
+ - name: Camera and Image Transformation / 2D Transformation
76
+ success_rate: 0.245
77
+ - name: Camera and Image Transformation / 3D Camera Pose
78
+ success_rate: 0.215
79
+ - name: Camera and Image Transformation / Camera Motion
80
+ success_rate: 0.4436
81
+ - name: Depth and Distance / Absolute
82
+ success_rate: 0.265
83
+ - name: Object Localization / 3D Localization
84
+ success_rate: 0.48
85
+ - name: Point and Object Tracking / 3D Tracking
86
+ success_rate: 0.24
87
+ - name: Point and Object Tracking / Point Correspondence
88
+ success_rate: 0.28
89
+ - task:
90
+ type: visual-question-answering
91
+ name: Spatial Reasoning
92
+ dataset:
93
+ name: MMVP
94
+ type: benchmark
95
+ metrics:
96
+ - type: success_rate
97
+ name: Overall Success Rate
98
+ value: 0.5767
99
+ results_by_subcategory:
100
+ - name: Others / Miscellaneous
101
+ success_rate: 0.5767
102
+ - task:
103
+ type: visual-question-answering
104
+ name: Spatial Reasoning
105
+ dataset:
106
+ name: QSpatialBench-Plus
107
+ type: benchmark
108
+ metrics:
109
+ - type: success_rate
110
+ name: Overall Success Rate
111
+ value: 0.3663
112
+ results_by_subcategory:
113
+ - name: Depth and Distance / Absolute
114
+ success_rate: 0.3663
115
+ - task:
116
+ type: visual-question-answering
117
+ name: Spatial Reasoning
118
+ dataset:
119
+ name: QSpatialBench-ScanNet
120
+ type: benchmark
121
+ metrics:
122
+ - type: success_rate
123
+ name: Overall Success Rate
124
+ value: 0.33
125
+ results_by_subcategory:
126
+ - name: Depth and Distance / Absolute
127
+ success_rate: 0.216
128
+ - name: Object Properties / Size
129
+ success_rate: 0.4444
130
+ - task:
131
+ type: visual-question-answering
132
+ name: Spatial Reasoning
133
+ dataset:
134
+ name: RealWorldQA
135
+ type: benchmark
136
+ metrics:
137
+ - type: success_rate
138
+ name: Overall Success Rate
139
+ value: 0.4392
140
+ results_by_subcategory:
141
+ - name: Others / Miscellaneous
142
+ success_rate: 0.4392
143
+ - task:
144
+ type: visual-question-answering
145
+ name: Spatial Reasoning
146
+ dataset:
147
+ name: SpatialSense
148
+ type: benchmark
149
+ metrics:
150
+ - type: success_rate
151
+ name: Overall Success Rate
152
+ value: 0.6554
153
+ results_by_subcategory:
154
+ - name: 3D Positional Relation / Orientation
155
+ success_rate: 0.6554
156
+ - task:
157
+ type: visual-question-answering
158
+ name: Spatial Reasoning
159
+ dataset:
160
+ name: VGBench
161
+ type: benchmark
162
+ metrics:
163
+ - type: success_rate
164
+ name: Overall Success Rate
165
+ value: 0.2615
166
+ results_by_subcategory:
167
+ - name: Camera and Image Transformation / 2D Transformation
168
+ success_rate: 0.2277
169
+ - name: Camera and Image Transformation / 3D Camera Pose
170
+ success_rate: 0.2438
171
+ - name: Depth and Distance / Absolute
172
+ success_rate: 0.2696
173
+ - name: Depth and Distance / Relative
174
+ success_rate: 0.1945
175
+ - name: Object Localization / 3D Localization
176
+ success_rate: 0.3733
177
+ - name: Point and Object Tracking / 3D Tracking
178
+ success_rate: 0.2655
179
+ - task:
180
+ type: visual-question-answering
181
+ name: Spatial Reasoning
182
+ dataset:
183
+ name: VSI-Bench_8
184
+ type: benchmark
185
+ metrics:
186
+ - type: success_rate
187
+ name: Overall Success Rate
188
+ value: 0.2322
189
+ results_by_subcategory:
190
+ - name: 3D Positional Relation / Orientation
191
+ success_rate: 0.3843
192
+ - name: Counting / Object Counting
193
+ success_rate: 0.1715
194
+ - name: Depth and Distance / Absolute
195
+ success_rate: 0.0299
196
+ - name: Depth and Distance / Relative
197
+ success_rate: 0.3521
198
+ - name: Object Properties / Size
199
+ success_rate: 0.2323
200
+ - name: Others / Miscellaneous
201
+ success_rate: 0.2525
202
+ - task:
203
+ type: visual-question-answering
204
+ name: Spatial Reasoning
205
+ dataset:
206
+ name: VSR-ZeroShot
207
+ type: benchmark
208
+ metrics:
209
+ - type: success_rate
210
+ name: Overall Success Rate
211
+ value: 0.7373
212
+ results_by_subcategory:
213
+ - name: 3D Positional Relation / Orientation
214
+ success_rate: 0.7373
215
+ - task:
216
+ type: visual-question-answering
217
+ name: Spatial Reasoning
218
+ dataset:
219
+ name: cvbench
220
+ type: benchmark
221
+ metrics:
222
+ - type: success_rate
223
+ name: Overall Success Rate
224
+ value: 0.5179
225
+ results_by_subcategory:
226
+ - name: Counting / Object Counting
227
+ success_rate: 0.6168
228
+ - name: Depth and Distance / Relative
229
+ success_rate: 0.4925
230
+ - name: Object Localization / 3D Localization
231
+ success_rate: 0.4446
232
+ - task:
233
+ type: visual-question-answering
234
+ name: Spatial Reasoning
235
+ dataset:
236
+ name: spatialbench
237
+ type: benchmark
238
+ metrics:
239
+ - type: success_rate
240
+ name: Overall Success Rate
241
+ value: 0.4879
242
+ results_by_subcategory:
243
+ - name: 3D Positional Relation / Orientation
244
+ success_rate: 0.5294
245
+ - name: Counting / Object Counting
246
+ success_rate: 0.7
247
+ - name: Object Properties / Existence
248
+ success_rate: 0.45
249
+ - name: Object Properties / Reachability
250
+ success_rate: 0.5
251
+ - name: Object Properties / Size
252
+ success_rate: 0.25
253
  ---
254
 
255
  <img src="https://cdn-uploads.huggingface.co/production/uploads/647777304ae93470ffc28913/v4edJliSy46xBA8g5ZXf8.png" width="500"/>