Upload folder using huggingface_hub

#1
by ZiyiXia - opened
Files changed (11) hide show
  1. .gitignore +15 -0
  2. Makefile +13 -0
  3. app.py +79 -0
  4. pyproject.toml +13 -0
  5. requirements.txt +16 -0
  6. results.csv +12 -0
  7. src/about.py +102 -0
  8. src/display/css_html_js.py +105 -0
  9. src/display/formatting.py +27 -0
  10. src/display/utils.py +103 -0
  11. src/envs.py +25 -0
.gitignore ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_evals/
2
+ venv/
3
+ __pycache__/
4
+ .env
5
+ .ipynb_checkpoints
6
+ *ipynb
7
+ .vscode/
8
+
9
+ eval-queue/
10
+ eval-results/
11
+ eval-queue-bk/
12
+ eval-results-bk/
13
+ logs/
14
+
15
+ construct_df.py
Makefile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: style format
2
+
3
+
4
+ style:
5
+ python -m black --line-length 119 .
6
+ python -m isort .
7
+ ruff check --fix .
8
+
9
+
10
+ quality:
11
+ python -m black --check --line-length 119 .
12
+ python -m isort --check-only .
13
+ ruff check .
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
+ import pandas as pd
4
+ from apscheduler.schedulers.background import BackgroundScheduler
5
+ from huggingface_hub import snapshot_download
6
+
7
+ from src.about import (
8
+ CITATION_BUTTON_LABEL,
9
+ CITATION_BUTTON_TEXT,
10
+ EVALUATION_QUEUE_TEXT,
11
+ INTRODUCTION_TEXT,
12
+ LLM_BENCHMARKS_TEXT,
13
+ SUBMIT_FORM,
14
+ TITLE,
15
+ )
16
+ from src.display.css_html_js import custom_css
17
+ from src.display.utils import (
18
+ BENCHMARK_COLS,
19
+ COLS,
20
+ EVAL_COLS,
21
+ EVAL_TYPES,
22
+ AutoEvalColumn,
23
+ ModelType,
24
+ fields,
25
+ WeightType,
26
+ Precision
27
+ )
28
+ from src.envs import API, REPO_ID
29
+
30
+
31
+ def restart_space():
32
+ API.restart_space(repo_id=REPO_ID)
33
+
34
+
35
+ LEADERBOARD_DF = pd.read_csv("results.csv")
36
+
37
+ demo = gr.Blocks(css=custom_css)
38
+ with demo:
39
+ gr.HTML(TITLE)
40
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
41
+
42
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
43
+ with gr.TabItem("πŸ… MVRB", elem_id="llm-benchmark-tab-table", id=0):
44
+ Leaderboard(
45
+ value=LEADERBOARD_DF,
46
+ datatype=[c.type for c in fields(AutoEvalColumn)],
47
+ select_columns=SelectColumns(
48
+ default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
49
+ cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
50
+ label="Select Columns to Display:",
51
+ ),
52
+ search_columns=[AutoEvalColumn.model.name],
53
+ hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
54
+ filter_columns=[
55
+ ColumnFilter("#Params (B)", type="slider", default=[0.428, 8.0], label="Number of parameters (B)"),
56
+ ],
57
+ interactive=True,
58
+ )
59
+
60
+ with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
61
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
62
+
63
+ with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
64
+ gr.Markdown(SUBMIT_FORM, elem_classes="markdown-text")
65
+
66
+ with gr.Row():
67
+ with gr.Accordion("πŸ“™ Citation", open=False):
68
+ citation_button = gr.Textbox(
69
+ value=CITATION_BUTTON_TEXT,
70
+ label=CITATION_BUTTON_LABEL,
71
+ lines=8,
72
+ elem_id="citation-button",
73
+ show_copy_button=True,
74
+ )
75
+
76
+ scheduler = BackgroundScheduler()
77
+ scheduler.add_job(restart_space, "interval", seconds=1800)
78
+ scheduler.start()
79
+ demo.queue(default_concurrency_limit=40).launch()
pyproject.toml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.ruff]
2
+ # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
+ select = ["E", "F"]
4
+ ignore = ["E501"] # line too long (black is taking care of this)
5
+ line-length = 119
6
+ fixable = ["A", "B", "C", "D", "E", "F", "G", "I", "N", "Q", "S", "T", "W", "ANN", "ARG", "BLE", "COM", "DJ", "DTZ", "EM", "ERA", "EXE", "FBT", "ICN", "INP", "ISC", "NPY", "PD", "PGH", "PIE", "PL", "PT", "PTH", "PYI", "RET", "RSE", "RUF", "SIM", "SLF", "TCH", "TID", "TRY", "UP", "YTT"]
7
+
8
+ [tool.isort]
9
+ profile = "black"
10
+ line_length = 119
11
+
12
+ [tool.black]
13
+ line-length = 119
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ APScheduler
2
+ black
3
+ datasets
4
+ gradio
5
+ gradio[oauth]
6
+ gradio_leaderboard==0.0.13
7
+ gradio_client
8
+ huggingface-hub>=0.18.0
9
+ matplotlib
10
+ numpy
11
+ pandas
12
+ python-dateutil
13
+ tqdm
14
+ transformers
15
+ tokenizers>=0.15.0
16
+ sentencepiece
results.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Rank,Model,#Params (B),Overall,SR,CSR,SQA,OVC
2
+ 1,BGE-VL-Screenshot,3.75,60.61,70.09,59.58,53.1,54.46
3
+ 2,"<a href=""https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct"">GME</a>",2.21,48.14,61.62,37.68,37.78,47.98
4
+ 3,"<a href=""https://huggingface.co/Tevatron/dse-phi3-v1.0"">DSE</a>",4.15,45.21,61.54,37.78,39.24,31.51
5
+ 4,"<a href=""https://huggingface.co/vidore/colpali"">ColPali</a>",2.92,43.64,61.73,35.0,35.32,31.04
6
+ 5,"<a href=""https://huggingface.co/nvidia/MM-Embed"">MM-Embed</a>",7.57,34.48,25.86,40.93,42.83,32.67
7
+ 6,"<a href=""https://huggingface.co/google/siglip-so400m-patch14-384"">SigLIP</a>",0.878,33.34,38.33,34.48,19.6,40.64
8
+ 7,"<a href=""https://huggingface.co/TIGER-Lab/VLM2Vec-Full"">VLM2Vec</a>",4.15,32.19,15.93,48.05,49.42,23.24
9
+ 8,"<a href=""https://huggingface.co/royokong/e5-v"">E5-V</a>",8.35,25.13,34.11,26.59,5.23,32.85
10
+ 9,"<a href=""https://huggingface.co/openai/clip-vit-large-patch14"">CLIP</a>",0.428,23.75,18.89,25.39,23.9,30.4
11
+ 10,"<a href=""https://huggingface.co/TIGER-Lab/UniIR"">Uni-IR</a>",0.428,19.63,12.35,35.92,29.68,20.06
12
+ 11,"<a href=""https://huggingface.co/OpenDriveLab/Vista"">VISTA</a>",0.196,13.85,5.21,11.29,25.78,16.61
src/about.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from enum import Enum
3
+
4
+ @dataclass
5
+ class Task:
6
+ benchmark: str
7
+ metric: str
8
+ col_name: str
9
+
10
+
11
+ # Select your tasks here
12
+ # ---------------------------------------------------
13
+ class Tasks(Enum):
14
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ task0 = Task("anli_r1", "acc", "ANLI")
16
+ task1 = Task("logiqa", "acc_norm", "LogiQA")
17
+
18
+ NUM_FEWSHOT = 0 # Change with your few shot
19
+ # ---------------------------------------------------
20
+
21
+
22
+
23
+ # Your leaderboard name
24
+ TITLE = """<h1 align="center" id="space-title">MVRB Leaderboard</h1>"""
25
+
26
+ # What does your leaderboard evaluate?
27
+ INTRODUCTION_TEXT = """
28
+ **MVRB (Massive Visualized IR Benchmark)** evaluates multimodal retrievers’ performance on general **Vis-IR** tasks. The benchmark includes various task types, such as screenshot-based multimodal retrieval (screenshot to anything, anything to screenshot) and screenshotconditioned retrieval (e.g., searching for documents using queries conditioned on screenshots). It also covers a variety of important domains, including news, products, papers, and charts.
29
+
30
+ More details can be found:
31
+ - Paper: https://arxiv.org/pdf/2502.11431
32
+ - Repo: https://github.com/VectorSpaceLab/Vis-IR
33
+ """
34
+
35
+ # Which evaluations are you running? how can people reproduce what you have?
36
+ LLM_BENCHMARKS_TEXT = f"""
37
+ ## Tasks
38
+ - **Screenshot Retrieval (SR)** consists of evaluation samples, each comprising a textual query *q* and its relevant screenshot *s: (q, s)*. The retrieval model needs to precisely retrieve the relevant screenshot for a testing query from a given corpus *S*. Each evaluation sample is created in two steps: 1) sample a screenshot *s*, 2) prompt the LLM to generate a search query based on the caption of screenshot. We consider seven tasks under this category, including product retrieval, paper retrieval, repo retrieval, news retrieval, chart retrieval, document retrieval, and slide retrieval.
39
+
40
+ - **Composed Screenshot Retrieval (CSR)** is made up of sq2s triplets. Given a screenshot *s1* and a query *q* conditioned on *s1*, the retrieval model needs to retrieve the relevant screenshot *s2* from the corpus *S*. We define four tasks for this category, including product discovery, news-to-Wiki, knowledge relation, and Wiki-to-product. All tasks in this category are created by human annotators. For each task, annotators are instructed to identify relevant screenshot pairs and write queries to retrieve *s2* based on *s1*.
41
+
42
+ - **Screenshot Question Answering (SQA)** comprises sq2a triplets. Given a screenshot s and a question q conditioned on s, the retrieval model needs to retrieve the correct answer a from a candidate corpus A. Each evaluation sample is created in three steps: 1) sample a screenshot *s*. 2) prompt the MLLM to generate a question *q*. 3) prompt the MLLM to generate the answer *a* for *q* based on *s*. The following tasks are included in this category: product-QA, news-QA, Wiki-QA, paper-QA, repo-QA.
43
+
44
+ - **Open-Vocab Classification (OVC)** is performed using evaluation samples of screenshots and their textual class labels. Given a screenshot s and the label class *C*, the retrieval model needs to discriminate the correct label c from *C* based on the embedding similarity. We include the following tasks in this category: product classification, news-topic classification, academic-field classification, knowledge classification. For each task, we employ human labelers to create the label class and assign each screenshot with its correct label.
45
+ """
46
+
47
+ EVALUATION_QUEUE_TEXT = """
48
+ ## Some good practices before submitting a model
49
+
50
+ ### 1) Make sure you can load your model and tokenizer using AutoClasses:
51
+ ```python
52
+ from transformers import AutoConfig, AutoModel, AutoTokenizer
53
+ config = AutoConfig.from_pretrained("your model name", revision=revision)
54
+ model = AutoModel.from_pretrained("your model name", revision=revision)
55
+ tokenizer = AutoTokenizer.from_pretrained("your model name", revision=revision)
56
+ ```
57
+ If this step fails, follow the error messages to debug your model before submitting it. It's likely your model has been improperly uploaded.
58
+
59
+ Note: make sure your model is public!
60
+ Note: if your model needs `use_remote_code=True`, we do not support this option yet but we are working on adding it, stay posted!
61
+
62
+ ### 2) Convert your model weights to [safetensors](https://huggingface.co/docs/safetensors/index)
63
+ It's a new format for storing weights which is safer and faster to load and use. It will also allow us to add the number of parameters of your model to the `Extended Viewer`!
64
+
65
+ ### 3) Make sure your model has an open license!
66
+ This is a leaderboard for Open LLMs, and we'd love for as many people as possible to know they can use your model πŸ€—
67
+
68
+ ### 4) Fill up your model card
69
+ When we add extra information about models to the leaderboard, it will be automatically taken from the model card
70
+
71
+ ## In case of model failure
72
+ If your model is displayed in the `FAILED` category, its execution stopped.
73
+ Make sure you have followed the above steps first.
74
+ If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
75
+ """
76
+
77
+ SUBMIT_FORM = """
78
+ ## Make sure you submit your evaluation results in a JSON file with the following format:
79
+ ```json
80
+ {
81
+ "Model": "<Model Name>",
82
+ "URL (optional)": "<Model/Repo/Paper URL>"
83
+ "#params": "7.11B",
84
+ "Overall": 30.00,
85
+ "SR": 30.00,
86
+ "CSR": 30.00,
87
+ "VQA": 30.00,
88
+ "OVC": 30.00,
89
+ }
90
+ ```
91
+ Then send a email to xxx@xx.xx with the JSON file attached. We will review your submission and add it to the leaderboard.
92
+ """
93
+
94
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite MVRB:"
95
+ CITATION_BUTTON_TEXT = """
96
+ @article{liu2025any,
97
+ title={Any Information Is Just Worth One Single Screenshot: Unifying Search With Visualized Information Retrieval},
98
+ author={Liu, Ze and Liang, Zhengyang and Zhou, Junjie and Liu, Zheng and Lian, Defu},
99
+ journal={arXiv preprint arXiv:2502.11431},
100
+ year={2025}
101
+ }
102
+ """
src/display/css_html_js.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_css = """
2
+
3
+ .markdown-text {
4
+ font-size: 16px !important;
5
+ }
6
+
7
+ #models-to-add-text {
8
+ font-size: 18px !important;
9
+ }
10
+
11
+ #citation-button span {
12
+ font-size: 16px !important;
13
+ }
14
+
15
+ #citation-button textarea {
16
+ font-size: 16px !important;
17
+ }
18
+
19
+ #citation-button > label > button {
20
+ margin: 6px;
21
+ transform: scale(1.3);
22
+ }
23
+
24
+ #leaderboard-table {
25
+ margin-top: 15px
26
+ }
27
+
28
+ #leaderboard-table-lite {
29
+ margin-top: 15px
30
+ }
31
+
32
+ #search-bar-table-box > div:first-child {
33
+ background: none;
34
+ border: none;
35
+ }
36
+
37
+ #search-bar {
38
+ padding: 0px;
39
+ }
40
+
41
+ /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
42
+ #leaderboard-table td:nth-child(2),
43
+ #leaderboard-table th:nth-child(2) {
44
+ max-width: 400px;
45
+ overflow: auto;
46
+ white-space: nowrap;
47
+ }
48
+
49
+ .tab-buttons button {
50
+ font-size: 20px;
51
+ }
52
+
53
+ #scale-logo {
54
+ border-style: none !important;
55
+ box-shadow: none;
56
+ display: block;
57
+ margin-left: auto;
58
+ margin-right: auto;
59
+ max-width: 600px;
60
+ }
61
+
62
+ #scale-logo .download {
63
+ display: none;
64
+ }
65
+ #filter_type{
66
+ border: 0;
67
+ padding-left: 0;
68
+ padding-top: 0;
69
+ }
70
+ #filter_type label {
71
+ display: flex;
72
+ }
73
+ #filter_type label > span{
74
+ margin-top: var(--spacing-lg);
75
+ margin-right: 0.5em;
76
+ }
77
+ #filter_type label > .wrap{
78
+ width: 103px;
79
+ }
80
+ #filter_type label > .wrap .wrap-inner{
81
+ padding: 2px;
82
+ }
83
+ #filter_type label > .wrap .wrap-inner input{
84
+ width: 1px
85
+ }
86
+ #filter-columns-type{
87
+ border:0;
88
+ padding:0.5;
89
+ }
90
+ #filter-columns-size{
91
+ border:0;
92
+ padding:0.5;
93
+ }
94
+ #box-filter > .form{
95
+ border: 0
96
+ }
97
+ """
98
+
99
+ get_window_url_params = """
100
+ function(url_params) {
101
+ const params = new URLSearchParams(window.location.search);
102
+ url_params = Object.fromEntries(params);
103
+ return url_params;
104
+ }
105
+ """
src/display/formatting.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def model_hyperlink(link, model_name):
2
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
3
+
4
+
5
+ def make_clickable_model(model_name):
6
+ link = f"https://huggingface.co/{model_name}"
7
+ return model_hyperlink(link, model_name)
8
+
9
+
10
+ def styled_error(error):
11
+ return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
12
+
13
+
14
+ def styled_warning(warn):
15
+ return f"<p style='color: orange; font-size: 20px; text-align: center;'>{warn}</p>"
16
+
17
+
18
+ def styled_message(message):
19
+ return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
20
+
21
+
22
+ def has_no_nan_values(df, columns):
23
+ return df[columns].notna().all(axis=1)
24
+
25
+
26
+ def has_nan_values(df, columns):
27
+ return df[columns].isna().any(axis=1)
src/display/utils.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, make_dataclass
2
+ from enum import Enum
3
+
4
+ import pandas as pd
5
+
6
+ from src.about import Tasks
7
+
8
+ def fields(raw_class):
9
+ return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
10
+
11
+
12
+ # These classes are for user facing column names,
13
+ # to avoid having to change them all around the code
14
+ # when a modif is needed
15
+ @dataclass
16
+ class ColumnContent:
17
+ name: str
18
+ type: str
19
+ displayed_by_default: bool
20
+ hidden: bool = False
21
+ never_hidden: bool = False
22
+
23
+ ## Leaderboard columns
24
+ auto_eval_column_dict = []
25
+ # Init
26
+ auto_eval_column_dict.append(["rank", ColumnContent, ColumnContent("Rank", "number", True, never_hidden=True)])
27
+ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
28
+ auto_eval_column_dict.append(["num_params", ColumnContent, ColumnContent("#Params (B)", "number", True)])
29
+ #Scores
30
+ auto_eval_column_dict.append(["overall", ColumnContent, ColumnContent("Overall", "number", True, never_hidden=True)])
31
+ auto_eval_column_dict.append(["SR", ColumnContent, ColumnContent("SR", "number", True)])
32
+ auto_eval_column_dict.append(["CSR", ColumnContent, ColumnContent("CSR", "number", True)])
33
+ auto_eval_column_dict.append(["SQA", ColumnContent, ColumnContent("SQA", "number", True)])
34
+ auto_eval_column_dict.append(["OVC", ColumnContent, ColumnContent("OVC", "number", True)])
35
+
36
+ # We use make dataclass to dynamically fill the scores from Tasks
37
+ AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
38
+
39
+ ## For the queue columns in the submission tab
40
+ @dataclass(frozen=True)
41
+ class EvalQueueColumn: # Queue column
42
+ model = ColumnContent("model", "markdown", True)
43
+ revision = ColumnContent("revision", "str", True)
44
+ private = ColumnContent("private", "bool", True)
45
+ precision = ColumnContent("precision", "str", True)
46
+ weight_type = ColumnContent("weight_type", "str", "Original")
47
+ status = ColumnContent("status", "str", True)
48
+
49
+ ## All the model information that we might need
50
+ @dataclass
51
+ class ModelDetails:
52
+ name: str
53
+ display_name: str = ""
54
+ symbol: str = "" # emoji
55
+
56
+
57
+ class ModelType(Enum):
58
+ PT = ModelDetails(name="pretrained", symbol="🟒")
59
+ FT = ModelDetails(name="fine-tuned", symbol="πŸ”Ά")
60
+ IFT = ModelDetails(name="instruction-tuned", symbol="β­•")
61
+ RL = ModelDetails(name="RL-tuned", symbol="🟦")
62
+ Unknown = ModelDetails(name="", symbol="?")
63
+
64
+ def to_str(self, separator=" "):
65
+ return f"{self.value.symbol}{separator}{self.value.name}"
66
+
67
+ @staticmethod
68
+ def from_str(type):
69
+ if "fine-tuned" in type or "πŸ”Ά" in type:
70
+ return ModelType.FT
71
+ if "pretrained" in type or "🟒" in type:
72
+ return ModelType.PT
73
+ if "RL-tuned" in type or "🟦" in type:
74
+ return ModelType.RL
75
+ if "instruction-tuned" in type or "β­•" in type:
76
+ return ModelType.IFT
77
+ return ModelType.Unknown
78
+
79
+ class WeightType(Enum):
80
+ Adapter = ModelDetails("Adapter")
81
+ Original = ModelDetails("Original")
82
+ Delta = ModelDetails("Delta")
83
+
84
+ class Precision(Enum):
85
+ float16 = ModelDetails("float16")
86
+ bfloat16 = ModelDetails("bfloat16")
87
+ Unknown = ModelDetails("?")
88
+
89
+ def from_str(precision):
90
+ if precision in ["torch.float16", "float16"]:
91
+ return Precision.float16
92
+ if precision in ["torch.bfloat16", "bfloat16"]:
93
+ return Precision.bfloat16
94
+ return Precision.Unknown
95
+
96
+ # Column selection
97
+ COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
98
+
99
+ EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
100
+ EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
101
+
102
+ BENCHMARK_COLS = [t.value.col_name for t in Tasks]
103
+
src/envs.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from huggingface_hub import HfApi
4
+
5
+ # Info to change for your repository
6
+ # ----------------------------------
7
+ TOKEN = os.environ.get("HF_TOKEN") # A read/write token for your org
8
+
9
+ OWNER = "demo-leaderboard-backend" # Change to your org - don't forget to create a results and request dataset, with the correct format!
10
+ # ----------------------------------
11
+
12
+ REPO_ID = f"{OWNER}/leaderboard"
13
+ QUEUE_REPO = f"{OWNER}/requests"
14
+ RESULTS_REPO = f"{OWNER}/results"
15
+
16
+ # If you setup a cache later, just change HF_HOME
17
+ CACHE_PATH=os.getenv("HF_HOME", ".")
18
+
19
+ # Local caches
20
+ EVAL_REQUESTS_PATH = os.path.join(CACHE_PATH, "eval-queue")
21
+ EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
22
+ EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
23
+ EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
24
+
25
+ API = HfApi(token=TOKEN)