ControlNet-Image-Generator / scripts /download_datasets.py
danhtran2mind's picture
Upload 68 files
f56ede2 verified
import argparse
import yaml
from datasets import load_dataset
def load_config(config_path):
with open(config_path, 'r') as file:
return yaml.safe_load(file)
def download_huggingface_dataset(config):
# Get dataset details from config
dataset_name = config['dataset_name']
local_dir = config['local_dir']
# Split dataset name into user_name and model_hub_name
user_name, model_hub_name = dataset_name.split('/')
# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset(dataset_name, cache_dir=local_dir)
# Print information for verification
print(f"User Name: {user_name}")
print(f"Model Hub Name: {model_hub_name}")
print(f"Dataset saved to: {local_dir}")
print(f"Dataset info: {ds}")
if __name__ == "__main__":
# Set up argument parser
parser = argparse.ArgumentParser(description="Download dataset from Hugging Face")
parser.add_argument('--config_path',
type=str,
default='configs/datasets_info.yaml',
help='Path to the dataset configuration YAML file')
args = parser.parse_args()
# Load configuration from YAML file
configs = load_config(args.config_path)
# Iterate through the list of configurations
for config in configs:
# Download dataset if platform is HuggingFace
if config['platform'] == 'HuggingFace':
download_huggingface_dataset(config)
else:
print(f"Unsupported platform: {config['platform']}")