Spaces:
Running
Running
Commit
Β·
96e55d5
1
Parent(s):
88c98d4
hard reset repo
Browse files- .gitignore +5 -1
- .pre-commit-config.yaml +0 -53
- README.md +0 -34
- app.py +7 -7
- {src β leaderboard}/constants.py +0 -0
- {src β leaderboard}/css.py +0 -0
- {src β leaderboard}/logo.png +0 -0
- {src β leaderboard}/md.py +0 -0
- {src β leaderboard}/plt.py +0 -0
- {src β leaderboard}/utils.py +0 -0
- requirements.txt +2 -15
.gitignore
CHANGED
@@ -11,5 +11,9 @@ eval-results/
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
|
|
|
|
14 |
.gradio/
|
15 |
-
.evals/
|
|
|
|
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
14 |
+
evals/
|
15 |
+
|
16 |
.gradio/
|
17 |
+
.evals/
|
18 |
+
__pycache__/*
|
19 |
+
*.pyc
|
.pre-commit-config.yaml
DELETED
@@ -1,53 +0,0 @@
|
|
1 |
-
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
2 |
-
#
|
3 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
-
# you may not use this file except in compliance with the License.
|
5 |
-
# You may obtain a copy of the License at
|
6 |
-
#
|
7 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
-
#
|
9 |
-
# Unless required by applicable law or agreed to in writing, software
|
10 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
-
# See the License for the specific language governing permissions and
|
13 |
-
# limitations under the License.
|
14 |
-
|
15 |
-
default_language_version:
|
16 |
-
python: python3
|
17 |
-
|
18 |
-
ci:
|
19 |
-
autofix_prs: true
|
20 |
-
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
|
21 |
-
autoupdate_schedule: quarterly
|
22 |
-
|
23 |
-
repos:
|
24 |
-
- repo: https://github.com/pre-commit/pre-commit-hooks
|
25 |
-
rev: v4.3.0
|
26 |
-
hooks:
|
27 |
-
- id: check-yaml
|
28 |
-
- id: check-case-conflict
|
29 |
-
- id: detect-private-key
|
30 |
-
- id: check-added-large-files
|
31 |
-
args: ['--maxkb=1000']
|
32 |
-
- id: requirements-txt-fixer
|
33 |
-
- id: end-of-file-fixer
|
34 |
-
- id: trailing-whitespace
|
35 |
-
|
36 |
-
- repo: https://github.com/PyCQA/isort
|
37 |
-
rev: 5.12.0
|
38 |
-
hooks:
|
39 |
-
- id: isort
|
40 |
-
name: Format imports
|
41 |
-
|
42 |
-
- repo: https://github.com/psf/black
|
43 |
-
rev: 22.12.0
|
44 |
-
hooks:
|
45 |
-
- id: black
|
46 |
-
name: Format code
|
47 |
-
additional_dependencies: ['click==8.0.2']
|
48 |
-
|
49 |
-
- repo: https://github.com/charliermarsh/ruff-pre-commit
|
50 |
-
# Ruff version.
|
51 |
-
rev: 'v0.0.267'
|
52 |
-
hooks:
|
53 |
-
- id: ruff
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -11,37 +11,3 @@ license: apache-2.0
|
|
11 |
tags:
|
12 |
- leaderboard
|
13 |
---
|
14 |
-
|
15 |
-
# Start the configuration
|
16 |
-
|
17 |
-
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
18 |
-
|
19 |
-
Results files should have the following format and be stored as json files:
|
20 |
-
```json
|
21 |
-
{
|
22 |
-
"config": {
|
23 |
-
"model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
|
24 |
-
"model_name": "path of the model on the hub: org/model",
|
25 |
-
"model_sha": "revision on the hub",
|
26 |
-
},
|
27 |
-
"results": {
|
28 |
-
"task_name": {
|
29 |
-
"metric_name": score,
|
30 |
-
},
|
31 |
-
"task_name2": {
|
32 |
-
"metric_name": score,
|
33 |
-
}
|
34 |
-
}
|
35 |
-
}
|
36 |
-
```
|
37 |
-
|
38 |
-
Request files are created automatically by this tool.
|
39 |
-
|
40 |
-
If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
|
41 |
-
|
42 |
-
# Code logic for more complex edits
|
43 |
-
|
44 |
-
You'll find
|
45 |
-
- the main table' columns names and properties in `src/display/utils.py`
|
46 |
-
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
47 |
-
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
|
|
11 |
tags:
|
12 |
- leaderboard
|
13 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -6,10 +6,10 @@ import numpy as np
|
|
6 |
from datasets import load_dataset
|
7 |
from huggingface_hub import HfApi, snapshot_download
|
8 |
|
9 |
-
from
|
10 |
-
from
|
11 |
-
from
|
12 |
-
from
|
13 |
|
14 |
api = HfApi()
|
15 |
|
@@ -329,7 +329,7 @@ total_models = len(
|
|
329 |
rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"], style=False
|
330 |
).values
|
331 |
)
|
332 |
-
assets = Path("
|
333 |
|
334 |
# Using a string for a predefined color
|
335 |
theme = gr.themes.Default(primary_hue="blue")
|
@@ -344,7 +344,7 @@ with gr.Blocks(theme=theme, css=custom_css) as app:
|
|
344 |
# # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
|
345 |
# # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
|
346 |
# gr.Markdown("""
|
347 |
-
# 
|
349 |
|
350 |
with gr.Tabs(elem_id="outer-tabs", elem_classes="tabs-big") as tabs_big:
|
@@ -358,7 +358,7 @@ with gr.Blocks(theme=theme, css=custom_css) as app:
|
|
358 |
# img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
|
359 |
gr.Markdown(
|
360 |
"""
|
361 |
-

|
364 |
with gr.Tabs(elem_id="inner-tabs", elem_classes="tabs-small") as tabs:
|
|
|
6 |
from datasets import load_dataset
|
7 |
from huggingface_hub import HfApi, snapshot_download
|
8 |
|
9 |
+
from leaderboard.constants import example_counts, length_categories, subset_mapping
|
10 |
+
from leaderboard.css import custom_css
|
11 |
+
from leaderboard.md import *
|
12 |
+
from leaderboard.utils import load_all_data
|
13 |
|
14 |
api = HfApi()
|
15 |
|
|
|
329 |
rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"], style=False
|
330 |
).values
|
331 |
)
|
332 |
+
assets = Path("leaderboard").resolve() # absolute dir with the image
|
333 |
|
334 |
# Using a string for a predefined color
|
335 |
theme = gr.themes.Default(primary_hue="blue")
|
|
|
344 |
# # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
|
345 |
# # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
|
346 |
# gr.Markdown("""
|
347 |
+
# 
|
348 |
# """)
|
349 |
|
350 |
with gr.Tabs(elem_id="outer-tabs", elem_classes="tabs-big") as tabs_big:
|
|
|
358 |
# img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
|
359 |
gr.Markdown(
|
360 |
"""
|
361 |
+

|
362 |
"""
|
363 |
)
|
364 |
with gr.Tabs(elem_id="inner-tabs", elem_classes="tabs-small") as tabs:
|
{src β leaderboard}/constants.py
RENAMED
File without changes
|
{src β leaderboard}/css.py
RENAMED
File without changes
|
{src β leaderboard}/logo.png
RENAMED
File without changes
|
{src β leaderboard}/md.py
RENAMED
File without changes
|
{src β leaderboard}/plt.py
RENAMED
File without changes
|
{src β leaderboard}/utils.py
RENAMED
File without changes
|
requirements.txt
CHANGED
@@ -1,16 +1,3 @@
|
|
1 |
-
|
2 |
-
black
|
3 |
-
datasets
|
4 |
-
gradio
|
5 |
-
gradio[oauth]
|
6 |
-
gradio_leaderboard==0.0.13
|
7 |
-
gradio_client
|
8 |
-
huggingface-hub>=0.18.0
|
9 |
-
matplotlib
|
10 |
-
numpy
|
11 |
pandas
|
12 |
-
|
13 |
-
tqdm
|
14 |
-
transformers
|
15 |
-
tokenizers>=0.15.0
|
16 |
-
sentencepiece
|
|
|
1 |
+
apscheduler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
pandas
|
3 |
+
datasets==2.21.0
|
|
|
|
|
|
|
|