smolvlm-web-benchmarking-all

Running

App Files Files Community

Xenova HF Staff commited on Apr 3

Commit

0dfccf0

verified ·

1 Parent(s): 82b15e5

Update index.html

Browse files

Files changed (1) hide show

index.html +243 -17

index.html CHANGED Viewed

@@ -1,19 +1,245 @@
-<!doctype html>
 <html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
 </html>

+<!DOCTYPE html>
 <html>
+<head>
+  <meta charset="UTF-8">
+  <title>SmolVLM Benchmark Demo</title>
+  <style>
+    body { font-family: Arial, sans-serif; margin: 20px; }
+    fieldset { margin-bottom: 20px; padding: 10px; }
+    legend { font-weight: bold; }
+    label { display: block; margin-top: 5px; }
+    input, select { margin-bottom: 5px; width: 100%; max-width: 400px; }
+    table { border-collapse: collapse; margin-top: 20px; width: 100%; max-width: 600px; }
+    th, td { border: 1px solid #ccc; padding: 8px; text-align: left; }
+    button { padding: 10px 20px; }
+  </style>
+</head>
+<body>
+  <h1>SmolVLM Benchmark Demo</h1>
+  <!-- Model Options -->
+  <fieldset id="model-options">
+    <legend>Model Options</legend>
+    <label for="model-id">Select Model ID:</label>
+    <select id="model-id">
+      <option value="hf-internal-testing/tiny-random-Idefics3ForConditionalGeneration">hf-internal-testing/tiny-random-Idefics3ForConditionalGeneration</option>
+      <option value="HuggingFaceTB/SmolVLM-256M-Instruct" selected>HuggingFaceTB/SmolVLM-256M-Instruct</option>
+      <option value="HuggingFaceTB/SmolVLM-500M-Instruct">HuggingFaceTB/SmolVLM-500M-Instruct</option>
+      <option value="HuggingFaceTB/SmolVLM-Instruct">HuggingFaceTB/SmolVLM-Instruct</option>
+    </select>
+    <label for="decoder-dtype">Decoder (decoder_model_merged) dtype:</label>
+    <select id="decoder-dtype">
+      <option value="fp32">fp32</option>
+      <option value="fp16">fp16</option>
+      <option value="q8">q8</option>
+      <option value="q4">q4</option>
+      <option value="q4f16">q4f16</option>
+    </select>
+    <label for="embed-dtype">Embed Tokens dtype:</label>
+    <select id="embed-dtype">
+        <option value="fp32">fp32</option>
+        <option value="fp16">fp16</option>
+        <option value="q8">q8</option>
+        <option value="q4">q4</option>
+        <option value="q4f16">q4f16</option>
+    </select>
+    <label for="vision-dtype">Vision Encoder dtype:</label>
+    <select id="vision-dtype">
+        <option value="fp32">fp32</option>
+        <option value="fp16">fp16</option>
+        <option value="q8">q8</option>
+        <option value="q4">q4</option>
+        <option value="q4f16">q4f16</option>
+    </select>
+  </fieldset>
+  <!-- Hardware Options -->
+  <fieldset id="hardware-options">
+    <legend>Hardware Options</legend>
+    <label for="device">Select Device:</label>
+    <select id="device">
+      <option value="wasm">wasm</option>
+      <option value="webgpu" selected>webgpu</option>
+    </select>
+  </fieldset>
+  <!-- Benchmark Options -->
+  <fieldset id="benchmark-options">
+    <legend>Benchmark Options</legend>
+    <label for="image-url">Image URL:</label>
+    <input type="text" id="image-url" value="https://huggingface.co/spaces/merve/chameleon-7b/resolve/main/bee.jpg">
+    <label for="do-split">Do Image Splitting (do_image_splitting)</label>
+    <input type="checkbox" id="do-split" checked>
+    <label for="max-tokens">Number of Tokens to Generate:</label>
+    <input type="number" id="max-tokens" value="128">
+    <label for="num-runs">Number of Runs:</label>
+    <input type="number" id="num-runs" value="5">
+  </fieldset>
+  <button id="start-benchmark">Start Benchmark</button>
+  <div id="results"></div>
+  <script type="module">
+    import {
+      AutoProcessor,
+      AutoModelForVision2Seq,
+      load_image,
+      TextStreamer,
+    } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.2";
+    class SmolVLM {
+      static model = null;
+      static processor = null;
+      static model_id = null;
+      static async getInstance(modelId, dtypeSettings, device) {
+        this.processor ??= await AutoProcessor.from_pretrained(modelId);
+        this.model ??= await AutoModelForVision2Seq.from_pretrained(modelId, {
+        dtype: {
+            embed_tokens: dtypeSettings.embed,
+            vision_encoder: dtypeSettings.vision,
+            decoder_model_merged: dtypeSettings.decoder,
+        },
+        device: device,
+        });
+        return [this.processor, this.model];
+      }
+    }
+    async function runBenchmark() {
+      document.getElementById("model-options").disabled = true;
+      document.getElementById("hardware-options").disabled = true;
+      const resultsDiv = document.getElementById("results");
+      resultsDiv.innerHTML = "<p>Loading model and running benchmark...</p>";
+      const modelId = document.getElementById("model-id").value;
+      let decoderDefault = "fp32", embedDefault = "fp32", visionDefault = "fp32";
+      const decoder_dtype = document.getElementById("decoder-dtype").value || decoderDefault;
+      const embed_dtype = document.getElementById("embed-dtype").value || embedDefault;
+      const vision_dtype = document.getElementById("vision-dtype").value || visionDefault;
+      const device = document.getElementById("device").value;
+      const imageUrl = document.getElementById("image-url").value;
+      const maxTokens = parseInt(document.getElementById("max-tokens").value) || 128;
+      const numRuns = parseInt(document.getElementById("num-runs").value) || 5;
+      const doImageSplitting = document.getElementById("do-split").checked;
+      document.getElementById("decoder-dtype").value = decoder_dtype;
+      document.getElementById("embed-dtype").value = embed_dtype;
+      document.getElementById("vision-dtype").value = vision_dtype;
+      const image = await load_image(imageUrl);
+      const dtypeSettings = {
+        decoder: decoder_dtype,
+        embed: embed_dtype,
+        vision: vision_dtype,
+      };
+      // Pre-run warmup (compiling shaders, initialization) with max_new_tokens: 1.
+      try {
+        const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
+        const messages = [{
+          role: "user",
+          content: [
+            { type: "image" },
+            { type: "text", text: "Can you describe this image?" },
+          ],
+        }];
+        const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
+        const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
+        await model.generate({
+          ...inputs,
+          max_new_tokens: 1,
+        });
+      } catch (e) {
+        resultsDiv.innerHTML = "<p>Error during warmup: " + e.toString() + "</p>";
+        return;
+      }
+      // Benchmark runs using streaming generation.
+      let totalTime = 0;
+      let totalTps = 0;
+      let runsResults = [];
+      for (let i = 0; i < numRuns; ++i) {
+        try {
+          const [processor, model] = await SmolVLM.getInstance(modelId, dtypeSettings, device);
+          const messages = [{
+            role: "user",
+            content: [
+              { type: "image" },
+              { type: "text", text: "Can you describe this image?" },
+            ],
+          }];
+          const text = processor.apply_chat_template(messages, { add_generation_prompt: true });
+          const inputs = await processor(text, [image], { do_image_splitting: doImageSplitting });
+          // Initialize streaming measurement variables.
+          let startTime, numTokens = 0, tps = 0;
+          const token_callback_function = () => {
+            // Initialize startTime on the first token.
+            startTime = startTime || performance.now();
+            tps = (numTokens++ / (performance.now() - startTime)) * 1000;
+          };
+          // Optional callback function (here we do nothing with intermediate outputs).
+          const callback_function = (output) => {};
+          // Create a new streamer with the callbacks.
+          const streamer = new TextStreamer(processor.tokenizer, {
+            skip_prompt: true,
+            skip_special_tokens: true,
+            callback_function,
+            token_callback_function,
+          });
+          // Run generation with the streamer.
+          const generateStartTime = performance.now();
+          await model.generate({
+            ...inputs,
+            max_new_tokens: maxTokens,
+            min_new_tokens: maxTokens,
+            streamer,
+          });
+          // Calculate elapsed time from when the first token arrived.
+          const endTime = performance.now();
+          const elapsed = endTime - generateStartTime;
+          totalTime += elapsed;
+          totalTps += tps;
+          runsResults.push({
+            run: i + 1,
+            time: elapsed.toFixed(2),
+            tps: tps.toFixed(2)
+          });
+        } catch (e) {
+          runsResults.push({ run: i + 1, time: "Error", tps: "Error" });
+        }
+      }
+      const avgTime = (totalTime / numRuns).toFixed(2);
+      const avgTps = (totalTps / numRuns).toFixed(2);
+      let tableHtml = "<h2>Benchmark Results</h2>";
+      tableHtml += "<table>";
+      tableHtml += "<tr><th>Run</th><th>Execution Time (ms)</th><th>Tokens per Second</th></tr>";
+      runsResults.forEach(r => {
+        tableHtml += `<tr><td>${r.run}</td><td>${r.time}</td><td>${r.tps}</td></tr>`;
+      });
+      tableHtml += `<tr><td><strong>Average</strong></td><td><strong>${avgTime}</strong></td><td><strong>${avgTps}</strong></td></tr>`;
+      tableHtml += "</table>";
+      resultsDiv.innerHTML = tableHtml;
+    }
+    document.getElementById("start-benchmark").addEventListener("click", runBenchmark);
+  </script>
+</body>
 </html>