Flat-Notifs / utils /datasets.py
xarical's picture
Handle API and server errors more gracefully, improve maintainability of codebase, bug fixes
8622a0e
import json
import os
from huggingface_hub import HfApi, hf_hub_download
import utils.helpers as helpers
def update_dataset(data: list[dict], dataset_id: str, filename: str, hf_api_key: str) -> None:
"""
Update a HF dataset.
"""
# Filter data and then dump into a data.json file
with open(filename, "w") as file:
json.dump(data, file, indent=4)
# Upload data.json to the HF dataset
api = HfApi()
api.upload_file(
path_or_fileobj=filename, # the file to upload
path_in_repo=filename, # where to upload it to
repo_id=dataset_id,
repo_type="dataset",
commit_message="Update data.json 🤖",
token=hf_api_key
)
helpers.log("Database updated!")
def load_dataset(dataset_id: str, filename: str, hf_api_key: str | None = None) -> list[dict]:
"""
Load a HF dataset.
"""
# Remove filename to ensure hf_hub_download raises an exception on fail
try:
os.remove(filename)
except OSError:
pass
# Try to download and load the file
try:
hf_hub_download(
filename=filename, # The file to download
local_dir="", # Where to download it to
repo_id=dataset_id,
repo_type="dataset",
token=hf_api_key
)
with open(filename) as file:
dataset = json.load(file)
return dataset
except Exception as e:
helpers.log("WARNING: dataset is empty or does not exist(?):", e)
dataset = []
return dataset