Xenova HF Staff commited on
Commit
0dfccf0
·
verified ·
1 Parent(s): 82b15e5

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +243 -17
index.html CHANGED
@@ -1,19 +1,245 @@
1
- <!doctype html>
2
  <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
  <html>
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>SmolVLM Benchmark Demo</title>
6
+ <style>
7
+ body { font-family: Arial, sans-serif; margin: 20px; }
8
+ fieldset { margin-bottom: 20px; padding: 10px; }
9
+ legend { font-weight: bold; }
10
+ label { display: block; margin-top: 5px; }
11
+ input, select { margin-bottom: 5px; width: 100%; max-width: 400px; }
12
+ table { border-collapse: collapse; margin-top: 20px; width: 100%; max-width: 600px; }
13
+ th, td { border: 1px solid #ccc; padding: 8px; text-align: left; }
14
+ button { padding: 10px 20px; }
15
+ </style>
16
+ </head>
17
+ <body>
18
+ <h1>SmolVLM Benchmark Demo</h1>
19
+
20
+ <!-- Model Options -->
21
+ <fieldset id="model-options">
22
+ <legend>Model Options</legend>
23
+ <label for="model-id">Select Model ID:</label>
24
+ <select id="model-id">
25
+ <option value="hf-internal-testing/tiny-random-Idefics3ForConditionalGeneration">hf-internal-testing/tiny-random-Idefics3ForConditionalGeneration</option>
26
+ <option value="HuggingFaceTB/SmolVLM-256M-Instruct" selected>HuggingFaceTB/SmolVLM-256M-Instruct</option>
27
+ <option value="HuggingFaceTB/SmolVLM-500M-Instruct">HuggingFaceTB/SmolVLM-500M-Instruct</option>
28
+ <option value="HuggingFaceTB/SmolVLM-Instruct">HuggingFaceTB/SmolVLM-Instruct</option>
29
+ </select>
30
+
31
+ <label for="decoder-dtype">Decoder (decoder_model_merged) dtype:</label>
32
+ <select id="decoder-dtype">
33
+ <option value="fp32">fp32</option>
34
+ <option value="fp16">fp16</option>
35
+ <option value="q8">q8</option>
36
+ <option value="q4">q4</option>
37
+ <option value="q4f16">q4f16</option>
38
+ </select>
39
+
40
+ <label for="embed-dtype">Embed Tokens dtype:</label>
41
+ <select id="embed-dtype">
42
+ <option value="fp32">fp32</option>
43
+ <option value="fp16">fp16</option>
44
+ <option value="q8">q8</option>
45
+ <option value="q4">q4</option>
46
+ <option value="q4f16">q4f16</option>
47
+ </select>
48
+
49
+ <label for="vision-dtype">Vision Encoder dtype:</label>
50
+ <select id="vision-dtype">
51
+ <option value="fp32">fp32</option>
52
+ <option value="fp16">fp16</option>
53
+ <option value="q8">q8</option>
54
+ <option value="q4">q4</option>
55
+ <option value="q4f16">q4f16</option>
56
+ </select>
57
+ </fieldset>
58
+
59
+ <!-- Hardware Options -->
60
+ <fieldset id="hardware-options">
61
+ <legend>Hardware Options</legend>
62
+ <label for="device">Select Device:</label>
63
+ <select id="device">
64
+ <option value="wasm">wasm</option>
65
+ <option value="webgpu" selected>webgpu</option>
66
+ </select>
67
+ </fieldset>
68
+
69
+ <!-- Benchmark Options -->
70
+ <fieldset id="benchmark-options">
71
+ <legend>Benchmark Options</legend>
72
+ <label for="image-url">Image URL:</label>
73
+ <input type="text" id="image-url" value="https://huggingface.co/spaces/merve/chameleon-7b/resolve/main/bee.jpg">
74
+
75
+ <label for="do-split">Do Image Splitting (do_image_splitting)</label>
76
+ <input type="checkbox" id="do-split" checked>
77
+
78
+ <label for="max-tokens">Number of Tokens to Generate:</label>
79
+ <input type="number" id="max-tokens" value="128">
80
+
81
+ <label for="num-runs">Number of Runs:</label>
82
+ <input type="number" id="num-runs" value="5">
83
+ </fieldset>
84
+
85
+ <button id="start-benchmark">Start Benchmark</button>
86
+
87
+ <div id="results"></div>
88
+
89
+ <script type="module">
90
+ import {
91
+ AutoProcessor,
92
+ AutoModelForVision2Seq,
93
+ load_image,
94
+ TextStreamer,
95
+ } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
96
+
97
+ class SmolVLM {
98
+ static model = null;
99
+ static processor = null;
100
+ static model_id = null;
101
+
102
+ static async getInstance(modelId, dtypeSettings, device) {
103
+ this.processor ??= await AutoProcessor.from_pretrained(modelId);
104
+ this.model ??= await AutoModelForVision2Seq.from_pretrained(modelId, {
105
+ dtype: {
106
+ embed_tokens: dtypeSettings.embed,
107
+ vision_encoder: dtypeSettings.vision,
108
+ decoder_model_merged: dtypeSettings.decoder,
109
+ },
110
+ device: device,
111
+ });
112
+ return [this.processor, this.model];
113
+ }
114
+ }
115
+
116
+ async function runBenchmark() {
117
+ document.getElementById("model-options").disabled = true;
118
+ document.getElementById("hardware-options").disabled = true;
119
+
120
+ const resultsDiv = document.getElementById("results");
121
+ resultsDiv.innerHTML = "<p>Loading model and running benchmark...</p>";
122
+
123
+ const modelId = document.getElementById("model-id").value;
124
+ let decoderDefault = "fp32", embedDefault = "fp32", visionDefault = "fp32";
125
+ const decoder_dtype = document.getElementById("decoder-dtype").value || decoderDefault;
126
+ const embed_dtype = document.getElementById("embed-dtype").value || embedDefault;
127
+ const vision_dtype = document.getElementById("vision-dtype").value || visionDefault;
128
+ const device = document.getElementById("device").value;
129
+ const imageUrl = document.getElementById("image-url").value;
130
+ const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
131
+ const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
132
+ const doImageSplitting = document.getElementById("do-split").checked;
133
+
134
+ document.getElementById("decoder-dtype").value = decoder_dtype;
135
+ document.getElementById("embed-dtype").value = embed_dtype;
136
+ document.getElementById("vision-dtype").value = vision_dtype;
137
+
138
+ const image = await load_image(imageUrl);
139
+ const dtypeSettings = {
140
+ decoder: decoder_dtype,
141
+ embed: embed_dtype,
142
+ vision: vision_dtype,
143
+ };
144
+
145
+ // Pre-run warmup (compiling shaders, initialization) with max_new_tokens: 1.
146
+ try {
147
+ const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
148
+ const messages = [{
149
+ role: "user",
150
+ content: [
151
+ { type: "image" },
152
+ { type: "text", text: "Can you describe this image?" },
153
+ ],
154
+ }];
155
+ const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
156
+ const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
157
+ await model.generate({
158
+ ...inputs,
159
+ max_new_tokens: 1,
160
+ });
161
+ } catch (e) {
162
+ resultsDiv.innerHTML = "<p>Error during warmup: " + e.toString() + "</p>";
163
+ return;
164
+ }
165
+
166
+ // Benchmark runs using streaming generation.
167
+ let totalTime = 0;
168
+ let totalTps = 0;
169
+ let runsResults = [];
170
+
171
+ for (let i = 0; i < numRuns; ++i) {
172
+ try {
173
+ const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
174
+ const messages = [{
175
+ role: "user",
176
+ content: [
177
+ { type: "image" },
178
+ { type: "text", text: "Can you describe this image?" },
179
+ ],
180
+ }];
181
+ const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
182
+ const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
183
+
184
+ // Initialize streaming measurement variables.
185
+ let startTime, numTokens = 0, tps = 0;
186
+ const token_callback_function = () => {
187
+ // Initialize startTime on the first token.
188
+ startTime = startTime || performance.now();
189
+ tps = (numTokens++ / (performance.now() - startTime)) * 1000;
190
+ };
191
+ // Optional callback function (here we do nothing with intermediate outputs).
192
+ const callback_function = (output) => {};
193
+
194
+ // Create a new streamer with the callbacks.
195
+ const streamer = new TextStreamer(processor.tokenizer, {
196
+ skip_prompt: true,
197
+ skip_special_tokens: true,
198
+ callback_function,
199
+ token_callback_function,
200
+ });
201
+
202
+ // Run generation with the streamer.
203
+ const generateStartTime = performance.now();
204
+ await model.generate({
205
+ ...inputs,
206
+ max_new_tokens: maxTokens,
207
+ min_new_tokens: maxTokens,
208
+ streamer,
209
+ });
210
+
211
+ // Calculate elapsed time from when the first token arrived.
212
+ const endTime = performance.now();
213
+ const elapsed = endTime - generateStartTime;
214
+
215
+ totalTime += elapsed;
216
+ totalTps += tps;
217
+ runsResults.push({
218
+ run: i + 1,
219
+ time: elapsed.toFixed(2),
220
+ tps: tps.toFixed(2)
221
+ });
222
+ } catch (e) {
223
+ runsResults.push({ run: i + 1, time: "Error", tps: "Error" });
224
+ }
225
+ }
226
+
227
+ const avgTime = (totalTime / numRuns).toFixed(2);
228
+ const avgTps = (totalTps / numRuns).toFixed(2);
229
+
230
+ let tableHtml = "<h2>Benchmark Results</h2>";
231
+ tableHtml += "<table>";
232
+ tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
233
+ runsResults.forEach(r => {
234
+ tableHtml += `<tr><td>${r.run}</td><td>${r.time}</td><td>${r.tps}</td></tr>`;
235
+ });
236
+ tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
237
+ tableHtml += "</table>";
238
+
239
+ resultsDiv.innerHTML = tableHtml;
240
+ }
241
+
242
+ document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
243
+ </script>
244
+ </body>
245
  </html>