Spaces:

smolagents
/

smolagents-leaderboard

Running

App Files Files Community

m-ric commited on Feb 27

Commit

f6dd71f

verified ·

1 Parent(s): f19fba4

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -75

app.py CHANGED Viewed

@@ -1,84 +1,185 @@
-from fastapi import FastAPI
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
-from fastapi.staticfiles import StaticFiles
-import numpy as np
-import argparse
-import os
-from datasets import load_dataset
-HOST = os.environ.get("API_URL", "0.0.0.0")
-PORT = os.environ.get("PORT", 7860)
-parser = argparse.ArgumentParser()
-parser.add_argument("--host", default=HOST)
-parser.add_argument("--port", type=int, default=PORT)
-parser.add_argument("--reload", action="store_true", default=True)
-parser.add_argument("--ssl_certfile")
-parser.add_argument("--ssl_keyfile")
-args = parser.parse_args()
-app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.get("/api/results")
-async def get_results():
-    try:
-        # Load the dataset
-        dataset = load_dataset("smolagents/results")
-        # Convert to list for processing
-        data = dataset["train"].to_pandas()
-        # Log some info to help debug
-        print("Dataset loaded, shape:", data.shape)
-        print("Columns:", data.columns)
-        print("First row:", data.iloc[0])
-        # Process the data to group by model and calculate scores
-        processed_data = []
-        grouped = data.groupby('model_id')
-        for model_id, group in grouped:
-            model_data = {
-                'model_id': model_id,
-                'scores': {}
-            }
-            # Calculate scores for each source
-            for source in group['source'].unique():
-                source_data = group[group['source'] == source]
-                avg_acc = source_data['acc'].mean()
-                model_data['scores'][source] = float(avg_acc)
-            model_data['scores']["Average"] = group["acc"].mean()
-            processed_data.append(model_data)
-        return processed_data
-    except Exception as e:
-        # Print the full error traceback to your logs
-        print("Error occurred:", str(e))
-        raise HTTPException(status_code=500, detail=str(e))
-app.mount("/", StaticFiles(directory="static", html=True), name="static")
-if __name__ == "__main__":
-    import uvicorn
-    print(args)
-    uvicorn.run(
-        "app:app",
-        host=args.host,
-        port=args.port,
-        reload=args.reload,
-        ssl_certfile=args.ssl_certfile,
-        ssl_keyfile=args.ssl_keyfile,
     )

+import React, { useState, useEffect } from 'react';
+import { chain } from 'lodash';
+import './App.css';
+const ScoreBar = ({ score }) => {
+  if (score === undefined || score === null) return null;
+  const percentage = score <= 1 ? score * 100 : score;
+  const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
+  const backgroundColor = `hsl(${hue}, 80%, 50%)`;
+  return (
+    <div className="score-bar">
+      <div
+        className="score-fill"
+        style={{
+          width: `${percentage}%`,
+          backgroundColor
+        }}
+      />
+      <span className="score-text">
+        {percentage.toFixed(1)}%
+      </span>
+    </div>
+  );
+};
+const App = () => {
+  const [allData, setAllData] = useState([]);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState(null);
+  const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
+  const [searchQuery, setSearchQuery] = useState('');
+  const [showVanilla, setShowVanilla] = useState(true);
+  const [showToolCalling, setShowToolCalling] = useState(false);
+  useEffect(() => {
+    const fetchData = async () => {
+      try {
+        setLoading(true);
+        // Fetch all data from API
+        const response = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results');
+        if (!response.ok) {
+          throw new Error(`HTTP error! status: ${response.status}`);
+        }
+        const jsonData = await response.json();
+        setAllData(jsonData);
+      } catch (err) {
+        console.error('Error fetching data:', err);
+        setError(err.message);
+      } finally {
+        setLoading(false);
+      }
+    };
+    fetchData();
+  }, []);
+  const handleSort = (key) => {
+    const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
+    setSortConfig({ key, direction });
+  };
+  // Filter data based on selected action type
+  const getFilteredData = () => {
+    const actionType = showToolCalling ? 'tool-calling' : 'code';
+    return allData.filter(item => item.source === actionType);
+  };
+  // Get vanilla score for a model
+  const getVanillaScore = (modelId, metric) => {
+    const vanillaEntry = allData.find(item =>
+      item.model_id === modelId && item.source === 'vanilla'
+    );
+    return vanillaEntry?.scores[metric];
+  };
+  const filteredAndSortedData = chain(getFilteredData())
+    .filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
+    .orderBy(
+      [item => {
+        if (sortConfig.key === 'model') {
+          return item.model_id;
+        }
+        return item.scores[sortConfig.key] || 0;
+      }],
+      [sortConfig.direction]
     )
+    .value();
+  if (loading) return <div className="container">Loading benchmark results...</div>;
+  if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>;
+  return (
+    <div className="container">
+      <div className="header">
+        <h1 className="title">Smolagents Leaderboard</h1>
+        <p className="subtitle">How do different LLMs compare for powering agents?</p>
+        <p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
+      </div>
+      <div className="search-container">
+        <div className="search-with-options">
+          <input
+            type="text"
+            className="search-input"
+            placeholder="Search models..."
+            value={searchQuery}
+            onChange={(e) => setSearchQuery(e.target.value)}
+          />
+          <div className="options-container">
+            <label className="option-label">
+              <input
+                type="checkbox"
+                checked={showVanilla}
+                onChange={() => setShowVanilla(!showVanilla)}
+              />
+              Show Vanilla Scores
+            </label>
+            <label className="option-label">
+              <input
+                type="checkbox"
+                checked={showToolCalling}
+                onChange={() => setShowToolCalling(!showToolCalling)}
+              />
+              Show Tool-Calling Scores
+            </label>
+          </div>
+        </div>
+      </div>
+      <div className="table-container">
+        <table>
+          <thead>
+            <tr>
+              <th onClick={() => handleSort('model')}>
+                Model {sortConfig.key === 'model' && (
+                  sortConfig.direction === 'desc' ? '↓' : '↑'
+                )}
+              </th>
+              {["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
+                <th key={benchmark} onClick={() => handleSort(benchmark)}>
+                  {benchmark} {sortConfig.key === benchmark && (
+                    sortConfig.direction === 'desc' ? '↓' : '↑'
+                  )}
+                </th>
+              ))}
+            </tr>
+          </thead>
+          <tbody>
+            {filteredAndSortedData.map((item, index) => (
+              <tr key={index}>
+                <td className="model-cell">
+                  <div className="model-name">{item.model_id}</div>
+                  {showVanilla && (
+                    <div className="vanilla-name">
+                      {`vanilla: ${getVanillaScore(item.model_id, 'Average')?.toFixed(1) || 'N/A'}%`}
+                    </div>
+                  )}
+                </td>
+                {["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
+                  <td key={metric}>
+                    <ScoreBar score={item.scores[metric]} />
+                    {showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
+                      <ScoreBar score={getVanillaScore(item.model_id, metric)} />
+                    )}
+                  </td>
+                ))}
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+      <div className="legend">
+        <p><strong>Agent type:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p>
+      </div>
+    </div>
+  );
+};
+export default App;