Spaces:

caliangandrew
/

dfd-leaderboard

Sleeping

File size: 9,075 Bytes

ad46aa7
 
6bdb067
2fe384d
890118b
 
7fcdfda
 
6bdb067
 
 
 
 
890118b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72e8fda
 
 
 
 
 
890118b
 
 
 
24c5f61
890118b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28ae92d
6bdb067
b278efb
6bdb067
 
 
 
7fcdfda
 
6bdb067
9d52324
6bdb067
7fcdfda
6bdb067
e217684
 
6bdb067
 
9d52324
 
6bdb067
9d52324
 
6bdb067
890118b
28ae92d
5d16259
6bdb067
9d52324
 
8137b49
4c2d3f3
b278efb
74d2f39
6bdb067
6142c5e
74d2f39
d4e0904
74d2f39
2fe384d
 
 
 
74d2f39
 
d4e0904
 
 
2fe384d
 
d4e0904
 
 
d0c293d
 
74d2f39
d0c293d
 
74d2f39
 
 
 
 
d4e0904
 
 
 
 
 
 
 
 
 
 
28ae92d
d4e0904
 
 
 
2fe384d
d5fd406
a49b47c
d4e0904
28ae92d
 
 
7fcdfda
6bdb067
52a76a3
6bdb067
94dde65
 
7e9d6e7
94dde65
 
6bdb067
 
 
 
 
 
24c5f61
 
6bdb067
24c5f61
 
 
6bdb067
24c5f61
6bdb067
24c5f61
 
 
 
 
6bdb067
28ae92d
d9c4f46
6bdb067
28ae92d
6bdb067
28ae92d
 
 
 
 
7fcdfda
a87f246
6475a5c
 
6bdb067
6475a5c
d5fd406
 
 
 
6bdb067
6475a5c
 
6bdb067
6475a5c
6bdb067
 
6475a5c

import gradio as gr
import pandas as pd
import requests
from huggingface_hub import HfApi, create_repo, upload_file, hf_hub_download
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import os

# Hugging Face API setup
HF_TOKEN = os.getenv("HF_TOKEN")  # This should be set as an environment variable

api = HfApi(token=HF_TOKEN)

# Deepfake detector data
data_avg_performance = {
    "Detector": ["NPR", "UCF", "CAMO"],
    "Accuracy": [0.7169, 0.7229, 0.7555],
    "Precision": [0.9193, 0.9436, 0.9442],
    "Recall": [0.5996, 0.592, 0.647],
    "F1-Score": [0.7258, 0.7275, 0.7679],
    "MCC": [0.5044, 0.5285, 0.5707],
}

data_dataset_accuracy = {
    "Detector": ["NPR", "UCF", "CAMO"],
    "CelebA-HQ": [0.987, 0.995, 0.999],
    "Flickr30k": [0.916, 0.981, 0.979],
    "ImageNet": [0.834, 0.847, 0.831],
    "DiffusionDB": [0.876, 0.85, 0.961],
    "CelebA-HQ-SDXL": [0.386, 0.484, 0.682],
    "CelebA-HQ-Flux": [0.846, 0.794, 0.722],
    "Flickr30k-SDXL": [0.302, 0.256, 0.28],
    "MS-COCO-Flux": [0.588, 0.576, 0.59],
}

# Convert data to DataFrames
df_avg_performance = pd.DataFrame(data_avg_performance)
df_dataset_accuracy = pd.DataFrame(data_dataset_accuracy)

# Ensure correct data types
df_avg_performance['Detector'] = df_avg_performance['Detector'].astype(str)
df_avg_performance[['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MCC']] = df_avg_performance[
    ['Accuracy', 'Precision', 'Recall', 'F1-Score', 'MCC']
].apply(pd.to_numeric)

def init_leaderboard():
    if df_avg_performance.empty:
        raise ValueError("Leaderboard DataFrame is empty.")
    
    # Display average performance leaderboard only
    leaderboard = Leaderboard(
        value=df_avg_performance,
        datatype=['str', 'number', 'number', 'number', 'number', 'number'],
        select_columns=SelectColumns(
            default_selection=["Detector", "Accuracy", "Precision", "Recall", "F1-Score", "MCC"],
            label="Select Columns to Display:"
        ),
        search_columns=["Detector"],
        filter_columns=[
            ColumnFilter("Detector", type="checkboxgroup", label="Detectors"),
        ],
        bool_checkboxgroup_label="Hide detectors",
        interactive=False,
    )
    return leaderboard

# Function to validate if a file exists in the Hugging Face repo
def check_file_in_repo(repo_id, file_path):
    try:
        # List the repo contents and check if the file exists
        contents = api.list_repo_files(repo_id)
        return file_path in contents
    except Exception as e:
        return False

# Function to handle form submission and validate inputs
def submit_detector(detector_name, model_repo, detector_file_path, configs_file_path):
    # Check if the Hugging Face repository exists
    try:
        repo_info = api.repo_info(model_repo)
    except Exception as e:
        return f"Repository '{model_repo}' not found. Error: {e}"

    # Validate the existence of the .py file and weights file in the repo
    if not check_file_in_repo(model_repo, detector_file_path):
        return f"The file '{detector_file_path}' was not found in the repository."
    
    if not check_file_in_repo(model_repo, configs_file_path):
        return f"The configs file '{configs_file_path}' was not found in the repository."

    # Prepare the data to push to the Hugging Face dataset
    submission_data = {
        "detector_name": detector_name,
        "model_repo": model_repo,
        "detector_file_path": detector_file_path,
        "configs_file_path": configs_file_path,
        "passed_invocation_test": "Pending",
        "evaluation_status": "Under Review"  # Set evaluated to False by default
    }
    df_submission = pd.DataFrame([submission_data])
    
    dataset_repo_id = "caliangandrew/dfd-arena-detectors"
    # Push to the dataset or create it if it doesn't exist
    try:
        # Create the dataset if it doesn't exist
        create_repo(repo_id=dataset_repo_id,
                    token=HF_TOKEN,
                    repo_type="dataset",
                    exist_ok=True)

        # Try to download the existing dataset
        existing_dataset_path = hf_hub_download(
            repo_id=dataset_repo_id,
            filename="submissions/submission.csv",
            token=HF_TOKEN,
            repo_type="dataset"
        )
        # If the file is found, load the existing dataset into a DataFrame
        df_existing = pd.read_csv(existing_dataset_path)
        print(df_existing)
        print(df_submission)
        # Check if the submission is already in the dataset (exact match)
        is_in_submissions = (df_existing == df_submission.iloc[0]).all(axis=1).any()
        if not is_in_submissions:
            # If no duplicate is found, append the new submission
            df_updated = pd.concat([df_existing, df_submission], ignore_index=True)
        else:
            return "Duplicate submission: This submission already exists in the dataset."
            
        # Append the new submission data
        df_updated = pd.concat([df_existing, df_submission], ignore_index=True)
    except Exception as e:
        # If the dataset doesn't exist, create a new DataFrame with the submission data
        df_updated = pd.DataFrame([submission_data])
    
    # Save the updated (or new) DataFrame as a CSV file
    submission_file = "submission.csv"
    df_updated.to_csv(submission_file, index=False)
    
    # Upload the updated (or new) file to the Hugging Face repository
    try:
        upload_file(
            path_or_fileobj=submission_file,
            path_in_repo="submissions/submission.csv",  # Location in the repo
            repo_id=dataset_repo_id,
            token=HF_TOKEN,
            repo_type="dataset",
            commit_message="Submitted for review: "+detector_name
        )
        return "Submission successful!"
    except Exception as e:
        return f"Failed to push submission: {str(e)}"

# Gradio demo
with gr.Blocks(theme=gr.themes.Base()) as demo:
    # Title
    gr.HTML("""
        <div style="text-align:center;">
            <h1> Deepfake Detection Arena (DFD) Leaderboard</h1>
        </div>
    """)
    
    # Description/Intro Section
    gr.Markdown("""
    ## 🎯 The Open Benchmark for Detecting AI-Generated Images
    [DFD-Arena](https://github.com/BitMind-AI/dfd-arena) is the first benchmark to address the open-source computer vision community's need for a *comprehensive evaluation framework* for state-of-the-art (SOTA) detection of AI-generated images.
    
    While [previous studies](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9721302) have focused on benchmarking the SOTA on content-specific subsets of the deepfake detection problem, e.g. human face deepfake benchmarking via [DeepfakeBench](https://github.com/SCLBD/DeepfakeBench), these benchmarks do not adequately account for the broad spectrum of real and generated image types seen in everyday scenarios.
    
    ### 🔍 Explore DFD-Arena
    
    Learn how the framework evaluates on diverse, content-rich images with semantic balance between real and generated data:
    
    - 📂 [Code: DFD-Arena GitHub Repository](https://github.com/BitMind-AI/dfd-arena)
    
    - 📝 [Blog: Technical Write-Up](https://bitmindlabs.notion.site/BitMind-Deepfake-Detection-Arena-106af85402838007830ece5a6f3f35a8?pvs=25)
    ### ✍️ Authorship
    
    Both DFD-Arena and novel synthetic image datasets used for evaluation are created by [BitMind](https://www.bitmind.ca/).
    
    - 🐦 [X/Twitter: @BitMindAI](https://x.com/BitMindAI)
    """)
    
    with gr.Tabs():
        with gr.TabItem("🏅 Deepfake Detection Arena"):
            # Add text for Average Performance Metrics
            gr.Markdown("## Average Performance Metrics")
            leaderboard = init_leaderboard()

            # Add dataset-specific accuracy
            gr.Markdown("## Dataset-specific Accuracy")
            gr.DataFrame(value=df_dataset_accuracy)

        # # Submit Detector Results Form
        with gr.TabItem("🚀 Submit Detector Results"):
            gr.Markdown("Submit your detector results for evaluation.")
            
            with gr.Row():
                detector_name = gr.Textbox(label="Detector Name", placeholder="e.g., MyDetector")
                model_repo = gr.Textbox(label="Hugging Face Model Repo", placeholder="e.g., username/repo-name")
                detector_file_path = gr.Textbox(label="Path to detector .py", placeholder="e.g., my_detector.py")
                configs_file_path = gr.Textbox(label="Path to config .YAML", placeholder="e.g., my_config.yaml")
            
            submit_button = gr.Button("Submit",interactive=True)
            output = gr.Textbox(label="Submission Result",interactive=True)
            
            submit_button.click(fn=submit_detector, inputs=[detector_name, model_repo, detector_file_path, configs_file_path], outputs=output)

# Launch the Gradio app
demo.queue(default_concurrency_limit=40).launch()