Spaces:
Sleeping
Sleeping
""" | |
Media-specific commit scheduler for handling media files in Hugging Face Spaces. | |
This module provides a specialized commit scheduler for media files that are stored | |
in the space's app directory (/home/user/app/media) rather than the persistent | |
storage directory. | |
""" | |
import os | |
from pathlib import Path | |
from typing import Optional | |
try: # absolute imports when installed | |
from trackio.commit_scheduler import CommitScheduler | |
from trackio.dummy_commit_scheduler import DummyCommitScheduler | |
from trackio.utils import get_media_path | |
except ImportError: # relative imports for local execution on Spaces | |
from commit_scheduler import CommitScheduler | |
from dummy_commit_scheduler import DummyCommitScheduler | |
from utils import get_media_path | |
class MediaCommitScheduler: | |
""" | |
Manages commit schedulers for media files in Hugging Face Spaces. | |
This class handles the creation and management of commit schedulers specifically | |
for media files, which are stored in the space's app directory rather than | |
the persistent storage directory. | |
""" | |
_current_scheduler: CommitScheduler | DummyCommitScheduler | None = None | |
def initialize(): | |
""" | |
Initialize the media commit scheduler if not already initialized. | |
This method ensures the scheduler is set up for the current environment. | |
It's safe to call multiple times - subsequent calls will be no-ops. | |
""" | |
if MediaCommitScheduler._current_scheduler is not None: | |
return | |
# Only create a real scheduler if we're in a space with proper config | |
hf_token = os.environ.get("HF_TOKEN") | |
dataset_id = os.environ.get("TRACKIO_DATASET_ID") | |
space_repo_name = os.environ.get("SPACE_REPO_NAME") | |
if dataset_id is None or space_repo_name is None: | |
# Use dummy scheduler for local development or missing config | |
scheduler = DummyCommitScheduler() | |
else: | |
# Create real scheduler for media files in space | |
scheduler = CommitScheduler( | |
repo_id=dataset_id, | |
repo_type="dataset", | |
folder_path=get_media_path(), | |
path_in_repo="media", # Store media files under "media/" in the dataset | |
private=True, | |
allow_patterns=["**/*"], # Allow all media file types | |
squash_history=True, | |
token=hf_token, | |
every=5, # Upload every 5 minutes | |
) | |
MediaCommitScheduler._current_scheduler = scheduler | |
def get_upload_status(): | |
""" | |
Get the status of the last media upload. | |
Returns: | |
dict with upload status information | |
""" | |
if MediaCommitScheduler._current_scheduler is None: | |
return { | |
"last_upload": None, | |
"minutes_since_upload": None, | |
"is_dummy": True, | |
"is_initialized": False | |
} | |
scheduler = MediaCommitScheduler._current_scheduler | |
if hasattr(scheduler, 'last_push_time') and scheduler.last_push_time: | |
import time | |
time_since_upload = time.time() - scheduler.last_push_time | |
return { | |
"last_upload": scheduler.last_push_time, | |
"minutes_since_upload": int(time_since_upload / 60), | |
"is_dummy": isinstance(scheduler, DummyCommitScheduler), | |
"is_initialized": True | |
} | |
return { | |
"last_upload": None, | |
"minutes_since_upload": None, | |
"is_dummy": isinstance(scheduler, DummyCommitScheduler), | |
"is_initialized": True | |
} | |