m-ric commited on
Commit
f6dd71f
Β·
verified Β·
1 Parent(s): f19fba4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -75
app.py CHANGED
@@ -1,84 +1,185 @@
1
- from fastapi import FastAPI
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.responses import JSONResponse
4
- from fastapi.staticfiles import StaticFiles
5
- import numpy as np
6
- import argparse
7
- import os
8
- from datasets import load_dataset
9
 
10
- HOST = os.environ.get("API_URL", "0.0.0.0")
11
- PORT = os.environ.get("PORT", 7860)
12
- parser = argparse.ArgumentParser()
13
- parser.add_argument("--host", default=HOST)
14
- parser.add_argument("--port", type=int, default=PORT)
15
- parser.add_argument("--reload", action="store_true", default=True)
16
- parser.add_argument("--ssl_certfile")
17
- parser.add_argument("--ssl_keyfile")
18
- args = parser.parse_args()
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- app = FastAPI()
21
- app.add_middleware(
22
- CORSMiddleware,
23
- allow_origins=["*"],
24
- allow_credentials=True,
25
- allow_methods=["*"],
26
- allow_headers=["*"],
27
- )
28
 
29
-
30
- @app.get("/api/results")
31
- async def get_results():
32
- try:
33
- # Load the dataset
34
- dataset = load_dataset("smolagents/results")
35
-
36
- # Convert to list for processing
37
- data = dataset["train"].to_pandas()
38
-
39
- # Log some info to help debug
40
- print("Dataset loaded, shape:", data.shape)
41
- print("Columns:", data.columns)
42
- print("First row:", data.iloc[0])
43
-
44
- # Process the data to group by model and calculate scores
45
- processed_data = []
46
- grouped = data.groupby('model_id')
47
-
48
- for model_id, group in grouped:
49
- model_data = {
50
- 'model_id': model_id,
51
- 'scores': {}
52
- }
53
-
54
- # Calculate scores for each source
55
- for source in group['source'].unique():
56
- source_data = group[group['source'] == source]
57
- avg_acc = source_data['acc'].mean()
58
- model_data['scores'][source] = float(avg_acc)
59
- model_data['scores']["Average"] = group["acc"].mean()
60
-
61
- processed_data.append(model_data)
62
 
63
- return processed_data
64
-
65
- except Exception as e:
66
- # Print the full error traceback to your logs
67
- print("Error occurred:", str(e))
68
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
69
 
 
 
 
 
70
 
71
- app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
 
 
 
72
 
73
- if __name__ == "__main__":
74
- import uvicorn
 
 
 
 
 
75
 
76
- print(args)
77
- uvicorn.run(
78
- "app:app",
79
- host=args.host,
80
- port=args.port,
81
- reload=args.reload,
82
- ssl_certfile=args.ssl_certfile,
83
- ssl_keyfile=args.ssl_keyfile,
 
 
84
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useEffect } from 'react';
2
+ import { chain } from 'lodash';
3
+ import './App.css';
 
 
 
 
 
4
 
5
+ const ScoreBar = ({ score }) => {
6
+ if (score === undefined || score === null) return null;
7
+
8
+ const percentage = score <= 1 ? score * 100 : score;
9
+ const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
10
+ const backgroundColor = `hsl(${hue}, 80%, 50%)`;
11
+
12
+ return (
13
+ <div className="score-bar">
14
+ <div
15
+ className="score-fill"
16
+ style={{
17
+ width: `${percentage}%`,
18
+ backgroundColor
19
+ }}
20
+ />
21
+ <span className="score-text">
22
+ {percentage.toFixed(1)}%
23
+ </span>
24
+ </div>
25
+ );
26
+ };
27
 
28
+ const App = () => {
29
+ const [allData, setAllData] = useState([]);
30
+ const [loading, setLoading] = useState(true);
31
+ const [error, setError] = useState(null);
32
+ const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
33
+ const [searchQuery, setSearchQuery] = useState('');
34
+ const [showVanilla, setShowVanilla] = useState(true);
35
+ const [showToolCalling, setShowToolCalling] = useState(false);
36
 
37
+ useEffect(() => {
38
+ const fetchData = async () => {
39
+ try {
40
+ setLoading(true);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ // Fetch all data from API
43
+ const response = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results');
44
+ if (!response.ok) {
45
+ throw new Error(`HTTP error! status: ${response.status}`);
46
+ }
47
+ const jsonData = await response.json();
48
+ setAllData(jsonData);
49
+ } catch (err) {
50
+ console.error('Error fetching data:', err);
51
+ setError(err.message);
52
+ } finally {
53
+ setLoading(false);
54
+ }
55
+ };
56
+
57
+ fetchData();
58
+ }, []);
59
 
60
+ const handleSort = (key) => {
61
+ const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
62
+ setSortConfig({ key, direction });
63
+ };
64
 
65
+ // Filter data based on selected action type
66
+ const getFilteredData = () => {
67
+ const actionType = showToolCalling ? 'tool-calling' : 'code';
68
+ return allData.filter(item => item.source === actionType);
69
+ };
70
 
71
+ // Get vanilla score for a model
72
+ const getVanillaScore = (modelId, metric) => {
73
+ const vanillaEntry = allData.find(item =>
74
+ item.model_id === modelId && item.source === 'vanilla'
75
+ );
76
+ return vanillaEntry?.scores[metric];
77
+ };
78
 
79
+ const filteredAndSortedData = chain(getFilteredData())
80
+ .filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
81
+ .orderBy(
82
+ [item => {
83
+ if (sortConfig.key === 'model') {
84
+ return item.model_id;
85
+ }
86
+ return item.scores[sortConfig.key] || 0;
87
+ }],
88
+ [sortConfig.direction]
89
  )
90
+ .value();
91
+
92
+ if (loading) return <div className="container">Loading benchmark results...</div>;
93
+ if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>;
94
+
95
+ return (
96
+ <div className="container">
97
+ <div className="header">
98
+ <h1 className="title">Smolagents Leaderboard</h1>
99
+ <p className="subtitle">How do different LLMs compare for powering agents?</p>
100
+ <p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
101
+ </div>
102
+
103
+ <div className="search-container">
104
+ <div className="search-with-options">
105
+ <input
106
+ type="text"
107
+ className="search-input"
108
+ placeholder="Search models..."
109
+ value={searchQuery}
110
+ onChange={(e) => setSearchQuery(e.target.value)}
111
+ />
112
+
113
+ <div className="options-container">
114
+ <label className="option-label">
115
+ <input
116
+ type="checkbox"
117
+ checked={showVanilla}
118
+ onChange={() => setShowVanilla(!showVanilla)}
119
+ />
120
+ Show Vanilla Scores
121
+ </label>
122
+
123
+ <label className="option-label">
124
+ <input
125
+ type="checkbox"
126
+ checked={showToolCalling}
127
+ onChange={() => setShowToolCalling(!showToolCalling)}
128
+ />
129
+ Show Tool-Calling Scores
130
+ </label>
131
+ </div>
132
+ </div>
133
+ </div>
134
+
135
+ <div className="table-container">
136
+ <table>
137
+ <thead>
138
+ <tr>
139
+ <th onClick={() => handleSort('model')}>
140
+ Model {sortConfig.key === 'model' && (
141
+ sortConfig.direction === 'desc' ? '↓' : '↑'
142
+ )}
143
+ </th>
144
+ {["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
145
+ <th key={benchmark} onClick={() => handleSort(benchmark)}>
146
+ {benchmark} {sortConfig.key === benchmark && (
147
+ sortConfig.direction === 'desc' ? '↓' : '↑'
148
+ )}
149
+ </th>
150
+ ))}
151
+ </tr>
152
+ </thead>
153
+ <tbody>
154
+ {filteredAndSortedData.map((item, index) => (
155
+ <tr key={index}>
156
+ <td className="model-cell">
157
+ <div className="model-name">{item.model_id}</div>
158
+ {showVanilla && (
159
+ <div className="vanilla-name">
160
+ {`vanilla: ${getVanillaScore(item.model_id, 'Average')?.toFixed(1) || 'N/A'}%`}
161
+ </div>
162
+ )}
163
+ </td>
164
+ {["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
165
+ <td key={metric}>
166
+ <ScoreBar score={item.scores[metric]} />
167
+ {showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
168
+ <ScoreBar score={getVanillaScore(item.model_id, metric)} />
169
+ )}
170
+ </td>
171
+ ))}
172
+ </tr>
173
+ ))}
174
+ </tbody>
175
+ </table>
176
+ </div>
177
+
178
+ <div className="legend">
179
+ <p><strong>Agent type:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p>
180
+ </div>
181
+ </div>
182
+ );
183
+ };
184
+
185
+ export default App;