omnisealbench / backend /config.py
valeriulacatusu's picture
tuantran/local_leaderboard_run (#2)
6a879a9 verified
raw
history blame
7.62 kB
# Change these values to match your dataset structure if loading locally or from a different source.
# IMPORTANT: When running from docker more setup is required (e.g. on Huggingface)
import os
from collections import defaultdict
from copy import deepcopy
from typing import Any, Dict
ABS_DATASET_DOMAIN = "https://dl.fbaipublicfiles.com"
# Sample dataset domain and path for local loading
# Some more configuration may be required to load examples from local files.
# ABS_DATASET_DOMAIN = "./data"
ABS_DATASET_PATH = f"{ABS_DATASET_DOMAIN}/omnisealbench/"
MODALITY_CONFIG_CONSTANTS = {
"audio": {
"first_cols": [
"snr",
"sisnr",
"stoi",
"pesq",
],
"attack_scores": [
"bit_acc",
"log10_p_value",
"TPR",
"FPR",
],
"categories": {
"speed": "Time",
"updownresample": "Time",
"echo": "Time",
"random_noise": "Amplitude",
"lowpass_filter": "Amplitude",
"highpass_filter": "Amplitude",
"bandpass_filter": "Amplitude",
"smooth": "Amplitude",
"boost_audio": "Amplitude",
"duck_audio": "Amplitude",
"shush": "Amplitude",
"pink_noise": "Amplitude",
"aac_compression": "Compression",
"mp3_compression": "Compression",
},
"attacks_with_variations": [
"random_noise",
"lowpass_filter",
"highpass_filter",
"boost_audio",
"duck_audio",
"shush",
],
},
"image": {
"first_cols": ["psnr", "ssim", "lpips", "decoder_time"],
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
"categories": {
"proportion": "Geometric",
"collage": "Inpainting",
"center_crop": "Geometric",
"rotate": "Geometric",
"jpeg": "Compression",
"brightness": "Visual",
"contrast": "Visual",
"saturation": "Visual",
"sharpness": "Visual",
"resize": "Geometric",
"overlay_text": "Inpainting",
"hflip": "Geometric",
"perspective": "Geometric",
"median_filter": "Visual",
"hue": "Visual",
"gaussian_blur": "Visual",
"comb": "Mixed",
"avg": "Averages",
"none": "Baseline",
},
"attacks_with_variations": [
"center_crop",
"jpeg",
"brightness",
"contrast",
"saturation",
"sharpness",
"resize",
"perspective",
"median_filter",
"hue",
"gaussian_blur",
],
},
"video": {
"first_cols": ["psnr", "ssim", "msssim", "lpips", "vmaf", "decoder_time"],
"attack_scores": ["bit_acc", "log10_p_value", "TPR", "FPR"],
"categories": {
"HorizontalFlip": "Geometric",
"Rotate": "Geometric",
"Resize": "Geometric",
"Crop": "Geometric",
"Perspective": "Geometric",
"Brightness": "Visual",
"Contrast": "Visual",
"Saturation": "Visual",
"Grayscale": "Visual",
"Hue": "Visual",
"JPEG": "Compression",
"GaussianBlur": "Visual",
"MedianFilter": "Visual",
"H264": "Compression",
"H264rgb": "Compression",
"H265": "Compression",
"VP9": "Compression",
"H264_Crop_Brightness0": "Mixed",
"H264_Crop_Brightness1": "Mixed",
"H264_Crop_Brightness2": "Mixed",
"H264_Crop_Brightness3": "Mixed",
},
"attacks_with_variations": [
"Rotate",
"Resize",
"Crop",
"Brightness",
"Contrast",
"Saturation",
"H264",
"H264rgb",
"H265",
],
}
}
DATASET_CONFIGS = {
"voxpopuli_1k/audio": {"type": "audio", "path": ABS_DATASET_PATH},
"ravdess_1k/audio": {"type": "audio", "path": ABS_DATASET_PATH},
"val2014_1k_v2/image": {"type": "image", "path": ABS_DATASET_PATH},
"sa_1b_val_1k/image": {"type": "image", "path": ABS_DATASET_PATH},
"sav_val_full_v2/video": {"type": "video", "path": ABS_DATASET_PATH},
}
EXAMPLE_CONFIGS = {
"audio": {
"dataset_name": "voxpopuli_1k",
"path": ABS_DATASET_PATH,
"db_key": "voxpopuli",
},
# "image": {
# "dataset_name": "val2014_1k_v2",
# "path": ABS_DATASET_PATH,
# "db_key": "local_val2014",
# },
"image": {
"dataset_name": "sa_1b_val_1k",
"path": ABS_DATASET_PATH,
"db_key": "local_valid",
},
"video": {
"dataset_name": "sav_val_full_v2",
"path": ABS_DATASET_PATH,
"db_key": "sa-v_sav_val_videos",
},
}
def get_user_dataset():
datasets = defaultdict(list)
user_data_dir = os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data")
if user_data_dir:
for user_data in os.listdir(user_data_dir):
if not os.path.isdir(os.path.join(user_data_dir, user_data)):
continue
user_dtype = os.listdir(os.path.join(user_data_dir, user_data, "examples"))[0]
datasets[user_dtype].append(user_data + "/" + user_dtype)
return datasets
def get_datasets():
grouped = {"audio": [], "image": [], "video": []}
for name, cfg in DATASET_CONFIGS.items():
dtype = cfg.get("type")
if dtype in grouped:
grouped[dtype].append(name)
# Add user datasets
user_datasets = get_user_dataset()
for dtype, user_names in user_datasets.items():
if dtype in grouped:
_names = [name for name in user_names if name not in grouped[dtype]]
grouped[dtype].extend(_names)
return grouped
def get_example_config(type):
if type not in EXAMPLE_CONFIGS:
raise ValueError(f"Unknown example type: {type}")
examples_config: Dict[str, Any] = deepcopy(EXAMPLE_CONFIGS[type])
user_datasets = get_user_dataset()
user_data_dir = os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data")
if len(user_datasets) > 0:
assert user_data_dir, f"OMNISEAL_LEADERBOARD_DATA is reset during loading the examples for {type}. Please set it correctly"
for dtype, user_names in user_datasets.items():
if dtype == type:
dataset_name = user_names[0].split("/")[0]
path = user_data_dir + "/"
examples_config = {
"dataset_name": dataset_name,
"path": path,
"db_key": dataset_name,
}
return examples_config
def get_dataset_config(dataset_name):
if dataset_name in DATASET_CONFIGS:
cfg = DATASET_CONFIGS[dataset_name]
extra_cfg = MODALITY_CONFIG_CONSTANTS.get(cfg["type"], {})
cfg.update(extra_cfg)
return cfg
else:
modality = dataset_name.split("/")[-1]
user_dataset = get_user_dataset()
if dataset_name in user_dataset.get(modality, []):
cfg = {
"type": modality,
"path": os.getenv("OMNISEAL_LEADERBOARD_DATA", "./data"),
}
extra_cfg = MODALITY_CONFIG_CONSTANTS.get(cfg["type"], {})
cfg.update(extra_cfg)
return cfg
raise ValueError(f"Unknown dataset: {dataset_name}")