dfd-arena-leaderboard

Running

File size: 12,337 Bytes

ad46aa7
 
17c4940
 
a218cef
c7b36a3
17c4940
 
 
 
 
 
a218cef
 
5866690
ede1573
5866690
 
 
a218cef
5866690
 
 
 
ede1573
5866690
 
 
 
 
 
 
 
 
 
a218cef
5866690
 
 
17c4940
 
 
 
 
 
b82d5d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17c4940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94dde65
 
f4d6783
94dde65
 
 
 
d17f334
31f5d2c
6b7443f
2be45b4
fa78862
be5d1e9
f4d6783
8bd6363
ca2d507
8bd6363
ca2d507
8bd6363
ca2d507
f4d6783
8bd6363
be5d1e9
8bd6363
f4d6783
94dde65
17c4940
d9c4f46
17c4940
b40d6a8
 
b82d5d6
a218cef
17c4940
d9c4f46
b82d5d6
a218cef
17c4940
 
40ce811
 
bfca92b
 
0e48f2c
bfca92b
c9f69cd
6e1b7c0
bfca92b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e48f2c
c9f69cd
 
bfca92b
 
0e48f2c
f061fa8
 
bfca92b
 
 
 
 
 
 
 
0e48f2c
 
bfca92b
17c4940
 
 
 
 
 
 
 
 
 
 
ad46aa7
17c4940

import gradio as gr
import pandas as pd
import requests
from huggingface_hub import HfApi, create_repo, upload_file, hf_hub_download
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import os

# Hugging Face API setup
HF_TOKEN = os.getenv("HF_TOKEN")  # This should be set as an environment variable

api = HfApi(token=HF_TOKEN)

# Deepfake detector data
data_avg_performance = {
    "Detector": ["NPR", "UCF", "CAMO"],
    "Accuracy": [0.7169, 0.7229, 0.7555],
    "Precision": [0.9193, 0.9436, 0.9442],
    "Recall": [0.5996, 0.592, 0.647],
    "F1-Score": [0.7258, 0.7275, 0.7679],
    "MCC": [0.5044, 0.5285, 0.5707],
}

data_dataset_accuracy = {
    "Detector": ["NPR", "UCF", "CAMO"],
    "CelebA-HQ": [0.987, 0.995, 0.999],
    "Flickr30k": [0.916, 0.981, 0.979],
    "ImageNet": [0.834, 0.847, 0.831],
    "DiffusionDB": [0.876, 0.85, 0.961],
    "CelebA-HQ-SDXL": [0.386, 0.484, 0.682],
    "CelebA-HQ-Flux": [0.846, 0.794, 0.722],
    "Flickr30k-SDXL": [0.302, 0.256, 0.28],
    "MS-COCO-Flux": [0.588, 0.576, 0.59],
}

# Convert data to DataFrames
df_avg_performance = pd.DataFrame(data_avg_performance)
df_dataset_accuracy = pd.DataFrame(data_dataset_accuracy)

# Ensure correct data types
df_avg_performance['Detector'] = df_avg_performance['Detector'].astype(str)
df_avg_performance[['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MCC']] = df_avg_performance[
    ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MCC']
].apply(pd.to_numeric)

def init_leaderboard():
    if df_avg_performance.empty:
        raise ValueError("Leaderboard DataFrame is empty.")
    
    # Display average performance leaderboard only
    leaderboard = Leaderboard(
        value=df_avg_performance,
        datatype=['str', 'number', 'number', 'number', 'number', 'number'],
        select_columns=SelectColumns(
            default_selection=["Detector", "Accuracy", "Precision", "Recall", "F1-Score", "MCC"],
            label="Select Columns to Display:"
        ),
        search_columns=["Detector"],
        filter_columns=[
            ColumnFilter("Detector", type="checkboxgroup", label="Detectors"),
        ],
        bool_checkboxgroup_label="Hide detectors",
        interactive=False,
    )
    return leaderboard

# Function to validate if a file exists in the Hugging Face repo
def check_file_in_repo(repo_id, file_path):
    try:
        # List the repo contents and check if the file exists
        contents = api.list_repo_files(repo_id)
        return file_path in contents
    except Exception as e:
        return False

# Function to handle form submission and validate inputs
def submit_detector(detector_name, model_repo, detector_file_path, configs_file_path):
    # Check if the Hugging Face repository exists
    try:
        repo_info = api.repo_info(model_repo)
    except Exception as e:
        return f"Repository '{model_repo}' not found. Error: {e}"

    # Validate the existence of the .py file and weights file in the repo
    if not check_file_in_repo(model_repo, detector_file_path):
        return f"The file '{detector_file_path}' was not found in the repository."
    
    if not check_file_in_repo(model_repo, configs_file_path):
        return f"The configs file '{configs_file_path}' was not found in the repository."

    # Prepare the data to push to the Hugging Face dataset
    submission_data = {
        "detector_name": detector_name,
        "model_repo": model_repo,
        "detector_file_path": detector_file_path,
        "configs_file_path": configs_file_path,
        "passed_invocation_test": "Pending",
        "evaluation_status": "Under Review"  # Set evaluated to False by default
    }
    df_submission = pd.DataFrame([submission_data])
    
    dataset_repo_id = "bitmind/dfd-arena-detectors"
    # Push to the dataset or create it if it doesn't exist
    try:
        # Create the dataset if it doesn't exist
        create_repo(repo_id=dataset_repo_id,
                    token=HF_TOKEN,
                    repo_type="dataset",
                    exist_ok=True)

        # Try to download the existing dataset
        existing_dataset_path = hf_hub_download(
            repo_id=dataset_repo_id,
            filename="submissions/submission.csv",
            token=HF_TOKEN,
            repo_type="dataset"
        )
        # If the file is found, load the existing dataset into a DataFrame
        df_existing = pd.read_csv(existing_dataset_path)
        print(df_existing)
        print(df_submission)
        # Check if the submission is already in the dataset (exact match)
        is_in_submissions = (df_existing == df_submission.iloc[0]).all(axis=1).any()
        if not is_in_submissions:
            # If no duplicate is found, append the new submission
            df_updated = pd.concat([df_existing, df_submission], ignore_index=True)
        else:
            return "Duplicate submission: This submission already exists in the dataset."
            
        # Append the new submission data
        df_updated = pd.concat([df_existing, df_submission], ignore_index=True)
    except Exception as e:
        # If the dataset doesn't exist, create a new DataFrame with the submission data
        df_updated = pd.DataFrame([submission_data])
    
    # Save the updated (or new) DataFrame as a CSV file
    submission_file = "submission.csv"
    df_updated.to_csv(submission_file, index=False)
    
    # Upload the updated (or new) file to the Hugging Face repository
    try:
        upload_file(
            path_or_fileobj=submission_file,
            path_in_repo="submissions/submission.csv",  # Location in the repo
            repo_id=dataset_repo_id,
            token=HF_TOKEN,
            repo_type="dataset",
            commit_message="Submitted for review: "+detector_name
        )
        return "Submission successful!"
    except Exception as e:
        return f"Failed to push submission: {str(e)}"

# Gradio demo
with gr.Blocks(theme=gr.themes.Base()) as demo:
    # Title
    gr.HTML("""
        <div style="text-align:center;">
            <h1> Deepfake Detection Arena (DFD) Leaderboard</h1>
        </div>
    """)
    
    # Description/Intro Section
    gr.Markdown("""
    ## 🎯 The Open Benchmark for Detecting AI-Generated Images
    [DFD-Arena](https://github.com/BitMind-AI/dfd-arena) is the first benchmark to address the open-source computer vision community's need for a *comprehensive evaluation framework* for state-of-the-art (SOTA) detection of AI-generated images.
    
    While [previous studies](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9721302) have focused on benchmarking the SOTA on content-specific subsets of the deepfake detection problem, e.g. human face deepfake benchmarking via [DeepfakeBench](https://github.com/SCLBD/DeepfakeBench), these benchmarks do not adequately account for the broad spectrum of real and generated image types seen in everyday scenarios.
    
    ### 🔍 Explore DFD-Arena
    
    Learn how the framework evaluates on diverse, content-rich images with semantic balance between real and generated data:
    
    - 📂 [Code: DFD-Arena GitHub Repository](https://github.com/BitMind-AI/dfd-arena)
    
    - 📝 [Blog: Technical Write-Up](https://bitmindlabs.notion.site/BitMind-Deepfake-Detection-Arena-106af85402838007830ece5a6f3f35a8?pvs=25)
    ### ✍️ Authorship
    
    Both DFD-Arena and novel synthetic image datasets used for evaluation are created by [BitMind](https://www.bitmind.ca/).
    
    - 🐦 [X/Twitter: @BitMindAI](https://x.com/BitMindAI)
    """)
    
    with gr.Tabs():
        with gr.TabItem("🏅 Deepfake Detection Arena"):
            # Add text for Average Performance Metrics
            gr.Markdown("## Average Performance Metrics")
            leaderboard = init_leaderboard()

            # Add dataset-specific accuracy
            gr.Markdown("## Dataset-specific Accuracy")
            gr.DataFrame(value=df_dataset_accuracy)

        # # Submit Detector Results Form
        with gr.TabItem("🚀 Submit Detector Results"):
            # Add submission instructions
            gr.Markdown("""
                ## Submission Instructions
                
                <h3>🏗️ 1. Ensure that your detector code follows the DFD Arena detectors format.</h3>
                
                The best way to guarantee compatibility is to develop and test your detector within a local copy of the [DFD Arena repository](https://github.com/BitMind-AI/dfd-arena/tree/main), with dependencies, detector file, and configs in relative locations similar to how we structured our implementations of UCF, NPR, and CAMO detectors.
                   
                   **✅ Check list:**
                   - Your code should work with detector dependencies (architecture and training code) imported from a dependencies directory a level above the detector directory.
                     - e.g., `arena/detectors/deepfake_detectors/ucf_detector.py` relies on a dependency folder at `arena/detectors/UCF/`
                     - Our automated benchmarking pipeline will reconstruct the required directory at evaluation time
                   - Implement a `.py` file in `arena/detectors/deepfake_detectors/` containing a `DeepfakeDetector` subclass with PascalCase naming convention, registered as a module in the dfd-arena `DETECTOR_REGISTRY`.
                     - e.g., in `myCustomDetector.py`,
                     ```python
                     @DETECTOR_REGISTRY.register_module(module_name='MyCustomModuleName')
                     class MyCustomDetector(DeepfakeDetector):
                         # implementation
                     ```
                     - The module name should match the detector name you want to appear on the leaderboard
                   - Create a config YAML file that the DeepfakeDetector loads in `arena/detectors/deepfake_detectors/configs/`.

                
                <h3>🤗 2. Create a HuggingFace model repo with the detector `.py` file, config `.yaml`, and dependencies in the same root level.</h3>
                
                [Check out our Sample Leaderboard Submission Repo for Reference!](https://huggingface.co/caliangandrew/submit_test/tree/main)

                
                <h3>📤 3. Fill out the form below with the correct paths and submit!</h3>
                
                The results will be processed after a code review by the BitMind team, and an automated test/benchmarking stage.

                
                **⚠️ Note:** The Detector Name must match the name of the registered detector module in the dfd arena detector registry. This will be the name of your detector on our leaderboard.
                   - For example, using the [Sample Submission Repo](https://huggingface.co/caliangandrew/submit_test/tree/main) provided, you would submit:  
                     - Detector Name: `test`  
                     - Hugging Face Model Repo: `caliangandrew/submit_test`  
                     - Path to detector `.py`: `test_detector.py`  
                     - Path to config `.YAML`: `test_config.yaml`
                
                You can monitor your submission via the companion [detectors](https://huggingface.co/datasets/bitmind/dfd-arena-detectors) and [results](https://huggingface.co/datasets/bitmind/dfd-arena-results) datasets.
                """)
            
            with gr.Row():
                detector_name = gr.Textbox(label="Detector Name", placeholder="e.g., MyDetector")
                model_repo = gr.Textbox(label="Hugging Face Model Repo", placeholder="e.g., username/repo-name")
                detector_file_path = gr.Textbox(label="Path to detector .py", placeholder="e.g., my_detector.py")
                configs_file_path = gr.Textbox(label="Path to config .YAML", placeholder="e.g., my_config.yaml")
            
            submit_button = gr.Button("Submit",interactive=True)
            output = gr.Textbox(label="Submission Result",interactive=True)
            
            submit_button.click(fn=submit_detector, inputs=[detector_name, model_repo, detector_file_path, configs_file_path], outputs=output)

# Launch the Gradio app
demo.queue(default_concurrency_limit=40).launch()