eval_playground / get_popular_eval_datasets.py
Narsil's picture
Narsil HF Staff
Push.
e2152af unverified
#!/usr/bin/env python3
"""
Script to fetch the 10 most used evaluation datasets from Hugging Face.
"""
import requests
from typing import List, Dict
def get_popular_eval_datasets(limit: int = 10) -> List[Dict]:
"""
Fetch popular evaluation datasets from Hugging Face Hub API.
Args:
limit: Number of datasets to return
Returns:
List of dataset information dictionaries
"""
# Common evaluation dataset tags and keywords
eval_keywords = [
"evaluation", "benchmark", "eval", "test-set", "validation",
"leaderboard", "assessment", "metric"
]
# Search for datasets with evaluation-related tags
base_url = "https://huggingface.co/api/datasets"
params = {
"sort": "downloads", # Sort by most downloaded
"direction": "-1", # Descending order
"limit": 100, # Get more to filter
"full": "true"
}
response = requests.get(base_url, params=params)
response.raise_for_status()
datasets = response.json()
# Filter for evaluation datasets
eval_datasets = []
for dataset in datasets:
# Check if dataset has evaluation-related tags or is commonly used for eval
tags = dataset.get("tags", [])
dataset_id = dataset.get("id", "").lower()
# Check for eval keywords in tags or dataset name
is_eval = any(
any(keyword in str(tag).lower() for keyword in eval_keywords)
for tag in tags
) or any(keyword in dataset_id for keyword in eval_keywords)
# Also include well-known evaluation datasets
known_eval_datasets = [
"glue", "superglue", "squad", "xnli", "hellaswag", "winogrande",
"arc", "mmlu", "gsm8k", "humaneval", "mbpp", "truthfulqa",
"bigbench", "c4", "piqa", "siqa", "boolq", "copa", "multirc",
"record", "rte", "wic", "wsc", "cb", "axb", "axg", "swag",
"race", "qnli", "wnli", "sst", "cola", "stsb", "mrpc", "qqp"
]
if any(known in dataset_id for known in known_eval_datasets):
is_eval = True
if is_eval:
eval_datasets.append({
"name": dataset.get("id", ""),
"downloads": dataset.get("downloads", 0),
"likes": dataset.get("likes", 0),
"tags": [tag for tag in tags if isinstance(tag, str)][:5], # First 5 tags
"description": dataset.get("description", "")[:200] # First 200 chars
})
# Sort by downloads and return top N
eval_datasets.sort(key=lambda x: x["downloads"], reverse=True)
return eval_datasets[:limit]
def main():
"""Main function to fetch and display popular evaluation datasets."""
print("Fetching the 10 most used evaluation datasets from Hugging Face...\n")
try:
datasets = get_popular_eval_datasets(10)
for i, dataset in enumerate(datasets, 1):
print(f"{i}. {dataset['name']}")
print(f" Downloads: {dataset['downloads']:,}")
print(f" Likes: {dataset['likes']}")
if dataset['tags']:
print(f" Tags: {', '.join(dataset['tags'])}")
if dataset['description']:
print(f" Description: {dataset['description']}...")
print()
except requests.exceptions.RequestException as e:
print(f"Error fetching data from Hugging Face: {e}")
except Exception as e:
print(f"An error occurred: {e}")
if __name__ == "__main__":
main()