Spaces:

Narsil
/

eval_playground

Running

File size: 3,627 Bytes

e2152af

#!/usr/bin/env python3
"""
Script to fetch the 10 most used evaluation datasets from Hugging Face.
"""

import requests
from typing import List, Dict

def get_popular_eval_datasets(limit: int = 10) -> List[Dict]:
    """
    Fetch popular evaluation datasets from Hugging Face Hub API.
    
    Args:
        limit: Number of datasets to return
        
    Returns:
        List of dataset information dictionaries
    """
    # Common evaluation dataset tags and keywords
    eval_keywords = [
        "evaluation", "benchmark", "eval", "test-set", "validation",
        "leaderboard", "assessment", "metric"
    ]
    
    # Search for datasets with evaluation-related tags
    base_url = "https://huggingface.co/api/datasets"
    params = {
        "sort": "downloads",  # Sort by most downloaded
        "direction": "-1",    # Descending order
        "limit": 100,         # Get more to filter
        "full": "true"
    }
    
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    
    datasets = response.json()
    
    # Filter for evaluation datasets
    eval_datasets = []
    for dataset in datasets:
        # Check if dataset has evaluation-related tags or is commonly used for eval
        tags = dataset.get("tags", [])
        dataset_id = dataset.get("id", "").lower()
        
        # Check for eval keywords in tags or dataset name
        is_eval = any(
            any(keyword in str(tag).lower() for keyword in eval_keywords)
            for tag in tags
        ) or any(keyword in dataset_id for keyword in eval_keywords)
        
        # Also include well-known evaluation datasets
        known_eval_datasets = [
            "glue", "superglue", "squad", "xnli", "hellaswag", "winogrande",
            "arc", "mmlu", "gsm8k", "humaneval", "mbpp", "truthfulqa",
            "bigbench", "c4", "piqa", "siqa", "boolq", "copa", "multirc",
            "record", "rte", "wic", "wsc", "cb", "axb", "axg", "swag",
            "race", "qnli", "wnli", "sst", "cola", "stsb", "mrpc", "qqp"
        ]
        
        if any(known in dataset_id for known in known_eval_datasets):
            is_eval = True
        
        if is_eval:
            eval_datasets.append({
                "name": dataset.get("id", ""),
                "downloads": dataset.get("downloads", 0),
                "likes": dataset.get("likes", 0),
                "tags": [tag for tag in tags if isinstance(tag, str)][:5],  # First 5 tags
                "description": dataset.get("description", "")[:200]  # First 200 chars
            })
    
    # Sort by downloads and return top N
    eval_datasets.sort(key=lambda x: x["downloads"], reverse=True)
    return eval_datasets[:limit]

def main():
    """Main function to fetch and display popular evaluation datasets."""
    print("Fetching the 10 most used evaluation datasets from Hugging Face...\n")
    
    try:
        datasets = get_popular_eval_datasets(10)
        
        for i, dataset in enumerate(datasets, 1):
            print(f"{i}. {dataset['name']}")
            print(f"   Downloads: {dataset['downloads']:,}")
            print(f"   Likes: {dataset['likes']}")
            if dataset['tags']:
                print(f"   Tags: {', '.join(dataset['tags'])}")
            if dataset['description']:
                print(f"   Description: {dataset['description']}...")
            print()
            
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from Hugging Face: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == "__main__":
    main()