import json | |
import os | |
from huggingface_hub import HfApi, hf_hub_download | |
import utils.helpers as helpers | |
def update_dataset(data: list[dict], dataset_id: str, filename: str, hf_api_key: str) -> None: | |
""" | |
Update a HF dataset. | |
""" | |
# Filter data and then dump into a data.json file | |
with open(filename, "w") as file: | |
json.dump(data, file, indent=4) | |
# Upload data.json to the HF dataset | |
api = HfApi() | |
api.upload_file( | |
path_or_fileobj=filename, # the file to upload | |
path_in_repo=filename, # where to upload it to | |
repo_id=dataset_id, | |
repo_type="dataset", | |
commit_message="Update data.json 🤖", | |
token=hf_api_key | |
) | |
helpers.log("Database updated!") | |
def load_dataset(dataset_id: str, filename: str, hf_api_key: str | None = None) -> list[dict]: | |
""" | |
Load a HF dataset. | |
""" | |
# Remove filename to ensure hf_hub_download raises an exception on fail | |
try: | |
os.remove(filename) | |
except OSError: | |
pass | |
# Try to download and load the file | |
try: | |
hf_hub_download( | |
filename=filename, # The file to download | |
local_dir="", # Where to download it to | |
repo_id=dataset_id, | |
repo_type="dataset", | |
token=hf_api_key | |
) | |
with open(filename) as file: | |
dataset = json.load(file) | |
return dataset | |
except Exception as e: | |
helpers.log("WARNING: dataset is empty or does not exist(?):", e) | |
dataset = [] | |
return dataset |