GeorgeBredis commited on
Commit
36e35f1
·
verified ·
1 Parent(s): 76130f6

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ trackio_logo.png filter=lfs diff=lfs merge=lfs -text
__init__.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import webbrowser
3
+ from pathlib import Path
4
+
5
+ from gradio_client import Client
6
+
7
+ from trackio import context_vars, deploy, utils
8
+ from trackio.imports import import_csv
9
+ from trackio.run import Run
10
+ from trackio.sqlite_storage import SQLiteStorage
11
+ from trackio.ui import demo
12
+ from trackio.utils import TRACKIO_DIR, TRACKIO_LOGO_PATH
13
+
14
+ __version__ = Path(__file__).parent.joinpath("version.txt").read_text().strip()
15
+
16
+ __all__ = ["init", "log", "finish", "show", "import_csv"]
17
+
18
+
19
+ config = {}
20
+
21
+
22
+ def init(
23
+ project: str,
24
+ name: str | None = None,
25
+ space_id: str | None = None,
26
+ dataset_id: str | None = None,
27
+ config: dict | None = None,
28
+ resume: str = "never",
29
+ ) -> Run:
30
+ """
31
+ Creates a new Trackio project and returns a Run object.
32
+
33
+ Args:
34
+ project: The name of the project (can be an existing project to continue tracking or a new project to start tracking from scratch).
35
+ name: The name of the run (if not provided, a default name will be generated).
36
+ space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame" or "orgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
37
+ dataset_id: If a space_id is provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Specify a Dataset with name like "username/datasetname" or "orgname/datasetname", or "datasetname" (uses currently-logged-in Hugging Face user's namespace), or None (uses the same name as the Space but with the "_dataset" suffix). If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it.
38
+ config: A dictionary of configuration options. Provided for compatibility with wandb.init()
39
+ resume: Controls how to handle resuming a run. Can be one of:
40
+ - "must": Must resume the run with the given name, raises error if run doesn't exist
41
+ - "allow": Resume the run if it exists, otherwise create a new run
42
+ - "never": Never resume a run, always create a new one
43
+ """
44
+ if space_id is None and dataset_id is not None:
45
+ raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
46
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
47
+ url = context_vars.current_server.get()
48
+
49
+ if url is None:
50
+ if space_id is None:
51
+ _, url, _ = demo.launch(
52
+ show_api=False,
53
+ inline=False,
54
+ quiet=True,
55
+ prevent_thread_lock=True,
56
+ show_error=True,
57
+ )
58
+ else:
59
+ url = space_id
60
+ context_vars.current_server.set(url)
61
+
62
+ if (
63
+ context_vars.current_project.get() is None
64
+ or context_vars.current_project.get() != project
65
+ ):
66
+ print(f"* Trackio project initialized: {project}")
67
+
68
+ if dataset_id is not None:
69
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
70
+ print(
71
+ f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
72
+ )
73
+ if space_id is None:
74
+ print(f"* Trackio metrics logged to: {TRACKIO_DIR}")
75
+ utils.print_dashboard_instructions(project)
76
+ else:
77
+ deploy.create_space_if_not_exists(space_id, dataset_id)
78
+ print(
79
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
80
+ )
81
+ context_vars.current_project.set(project)
82
+
83
+ client = None
84
+ if not space_id:
85
+ client = Client(url, verbose=False)
86
+
87
+ if resume == "must":
88
+ if name is None:
89
+ raise ValueError("Must provide a run name when resume='must'")
90
+ if name not in SQLiteStorage.get_runs(project):
91
+ raise ValueError(f"Run '{name}' does not exist in project '{project}'")
92
+ elif resume == "allow":
93
+ if name is not None and name in SQLiteStorage.get_runs(project):
94
+ print(f"* Resuming existing run: {name}")
95
+ elif resume == "never":
96
+ if name is not None and name in SQLiteStorage.get_runs(project):
97
+ name = None
98
+ else:
99
+ raise ValueError("resume must be one of: 'must', 'allow', or 'never'")
100
+
101
+ run = Run(
102
+ url=url,
103
+ project=project,
104
+ client=client,
105
+ name=name,
106
+ config=config,
107
+ )
108
+ context_vars.current_run.set(run)
109
+ globals()["config"] = run.config
110
+ return run
111
+
112
+
113
+ def log(metrics: dict) -> None:
114
+ """
115
+ Logs metrics to the current run.
116
+
117
+ Args:
118
+ metrics: A dictionary of metrics to log.
119
+ """
120
+ if context_vars.current_run.get() is None:
121
+ raise RuntimeError("Call trackio.init() before log().")
122
+ context_vars.current_run.get().log(metrics)
123
+
124
+
125
+ def finish():
126
+ """
127
+ Finishes the current run.
128
+ """
129
+ if context_vars.current_run.get() is None:
130
+ raise RuntimeError("Call trackio.init() before finish().")
131
+ context_vars.current_run.get().finish()
132
+
133
+
134
+ def show(project: str | None = None):
135
+ """
136
+ Launches the Trackio dashboard.
137
+
138
+ Args:
139
+ project: The name of the project whose runs to show. If not provided, all projects will be shown and the user can select one.
140
+ """
141
+ _, url, share_url = demo.launch(
142
+ show_api=False,
143
+ quiet=True,
144
+ inline=False,
145
+ prevent_thread_lock=True,
146
+ favicon_path=TRACKIO_LOGO_PATH,
147
+ allowed_paths=[TRACKIO_LOGO_PATH],
148
+ )
149
+ base_url = share_url + "/" if share_url else url
150
+ dashboard_url = base_url + f"?project={project}" if project else base_url
151
+ print(f"* Trackio UI launched at: {dashboard_url}")
152
+ webbrowser.open(dashboard_url)
153
+ utils.block_except_in_notebook()
__pycache__/__init__.cpython-310.pyc ADDED
Binary file (5.35 kB). View file
 
__pycache__/cli.cpython-310.pyc ADDED
Binary file (785 Bytes). View file
 
__pycache__/commit_scheduler.cpython-310.pyc ADDED
Binary file (13.8 kB). View file
 
__pycache__/context_vars.cpython-310.pyc ADDED
Binary file (513 Bytes). View file
 
__pycache__/deploy.cpython-310.pyc ADDED
Binary file (4.2 kB). View file
 
__pycache__/dummy_commit_scheduler.cpython-310.pyc ADDED
Binary file (963 Bytes). View file
 
__pycache__/imports.cpython-310.pyc ADDED
Binary file (4.23 kB). View file
 
__pycache__/run.cpython-310.pyc ADDED
Binary file (2.42 kB). View file
 
__pycache__/sqlite_storage.cpython-310.pyc ADDED
Binary file (9.06 kB). View file
 
__pycache__/ui.cpython-310.pyc ADDED
Binary file (11.2 kB). View file
 
__pycache__/utils.cpython-310.pyc ADDED
Binary file (6.53 kB). View file
 
cli.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ from trackio import show
4
+
5
+
6
+ def main():
7
+ parser = argparse.ArgumentParser(description="Trackio CLI")
8
+ subparsers = parser.add_subparsers(dest="command")
9
+
10
+ ui_parser = subparsers.add_parser(
11
+ "show", help="Show the Trackio dashboard UI for a project"
12
+ )
13
+ ui_parser.add_argument(
14
+ "--project", required=False, help="Project name to show in the dashboard"
15
+ )
16
+
17
+ args = parser.parse_args()
18
+
19
+ if args.command == "show":
20
+ show(args.project)
21
+ else:
22
+ parser.print_help()
23
+
24
+
25
+ if __name__ == "__main__":
26
+ main()
commit_scheduler.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Originally copied from https://github.com/huggingface/huggingface_hub/blob/d0a948fc2a32ed6e557042a95ef3e4af97ec4a7c/src/huggingface_hub/_commit_scheduler.py
2
+
3
+ import atexit
4
+ import logging
5
+ import os
6
+ import time
7
+ from concurrent.futures import Future
8
+ from dataclasses import dataclass
9
+ from io import SEEK_END, SEEK_SET, BytesIO
10
+ from pathlib import Path
11
+ from threading import Lock, Thread
12
+ from typing import Dict, List, Optional, Union
13
+
14
+ from huggingface_hub.hf_api import (
15
+ DEFAULT_IGNORE_PATTERNS,
16
+ CommitInfo,
17
+ CommitOperationAdd,
18
+ HfApi,
19
+ )
20
+ from huggingface_hub.utils import filter_repo_objects
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ @dataclass(frozen=True)
26
+ class _FileToUpload:
27
+ """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
28
+
29
+ local_path: Path
30
+ path_in_repo: str
31
+ size_limit: int
32
+ last_modified: float
33
+
34
+
35
+ class CommitScheduler:
36
+ """
37
+ Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
38
+
39
+ The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
40
+ properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
41
+ with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
42
+ to learn more about how to use it.
43
+
44
+ Args:
45
+ repo_id (`str`):
46
+ The id of the repo to commit to.
47
+ folder_path (`str` or `Path`):
48
+ Path to the local folder to upload regularly.
49
+ every (`int` or `float`, *optional*):
50
+ The number of minutes between each commit. Defaults to 5 minutes.
51
+ path_in_repo (`str`, *optional*):
52
+ Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
53
+ of the repository.
54
+ repo_type (`str`, *optional*):
55
+ The type of the repo to commit to. Defaults to `model`.
56
+ revision (`str`, *optional*):
57
+ The revision of the repo to commit to. Defaults to `main`.
58
+ private (`bool`, *optional*):
59
+ Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
60
+ token (`str`, *optional*):
61
+ The token to use to commit to the repo. Defaults to the token saved on the machine.
62
+ allow_patterns (`List[str]` or `str`, *optional*):
63
+ If provided, only files matching at least one pattern are uploaded.
64
+ ignore_patterns (`List[str]` or `str`, *optional*):
65
+ If provided, files matching any of the patterns are not uploaded.
66
+ squash_history (`bool`, *optional*):
67
+ Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
68
+ useful to avoid degraded performances on the repo when it grows too large.
69
+ hf_api (`HfApi`, *optional*):
70
+ The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
71
+
72
+ Example:
73
+ ```py
74
+ >>> from pathlib import Path
75
+ >>> from huggingface_hub import CommitScheduler
76
+
77
+ # Scheduler uploads every 10 minutes
78
+ >>> csv_path = Path("watched_folder/data.csv")
79
+ >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
80
+
81
+ >>> with csv_path.open("a") as f:
82
+ ... f.write("first line")
83
+
84
+ # Some time later (...)
85
+ >>> with csv_path.open("a") as f:
86
+ ... f.write("second line")
87
+ ```
88
+
89
+ Example using a context manager:
90
+ ```py
91
+ >>> from pathlib import Path
92
+ >>> from huggingface_hub import CommitScheduler
93
+
94
+ >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
95
+ ... csv_path = Path("watched_folder/data.csv")
96
+ ... with csv_path.open("a") as f:
97
+ ... f.write("first line")
98
+ ... (...)
99
+ ... with csv_path.open("a") as f:
100
+ ... f.write("second line")
101
+
102
+ # Scheduler is now stopped and last commit have been triggered
103
+ ```
104
+ """
105
+
106
+ def __init__(
107
+ self,
108
+ *,
109
+ repo_id: str,
110
+ folder_path: Union[str, Path],
111
+ every: Union[int, float] = 5,
112
+ path_in_repo: Optional[str] = None,
113
+ repo_type: Optional[str] = None,
114
+ revision: Optional[str] = None,
115
+ private: Optional[bool] = None,
116
+ token: Optional[str] = None,
117
+ allow_patterns: Optional[Union[List[str], str]] = None,
118
+ ignore_patterns: Optional[Union[List[str], str]] = None,
119
+ squash_history: bool = False,
120
+ hf_api: Optional["HfApi"] = None,
121
+ ) -> None:
122
+ self.api = hf_api or HfApi(token=token)
123
+
124
+ # Folder
125
+ self.folder_path = Path(folder_path).expanduser().resolve()
126
+ self.path_in_repo = path_in_repo or ""
127
+ self.allow_patterns = allow_patterns
128
+
129
+ if ignore_patterns is None:
130
+ ignore_patterns = []
131
+ elif isinstance(ignore_patterns, str):
132
+ ignore_patterns = [ignore_patterns]
133
+ self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
134
+
135
+ if self.folder_path.is_file():
136
+ raise ValueError(
137
+ f"'folder_path' must be a directory, not a file: '{self.folder_path}'."
138
+ )
139
+ self.folder_path.mkdir(parents=True, exist_ok=True)
140
+
141
+ # Repository
142
+ repo_url = self.api.create_repo(
143
+ repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True
144
+ )
145
+ self.repo_id = repo_url.repo_id
146
+ self.repo_type = repo_type
147
+ self.revision = revision
148
+ self.token = token
149
+
150
+ # Keep track of already uploaded files
151
+ self.last_uploaded: Dict[
152
+ Path, float
153
+ ] = {} # key is local path, value is timestamp
154
+
155
+ # Scheduler
156
+ if not every > 0:
157
+ raise ValueError(f"'every' must be a positive integer, not '{every}'.")
158
+ self.lock = Lock()
159
+ self.every = every
160
+ self.squash_history = squash_history
161
+
162
+ logger.info(
163
+ f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes."
164
+ )
165
+ self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
166
+ self._scheduler_thread.start()
167
+ atexit.register(self._push_to_hub)
168
+
169
+ self.__stopped = False
170
+
171
+ def stop(self) -> None:
172
+ """Stop the scheduler.
173
+
174
+ A stopped scheduler cannot be restarted. Mostly for tests purposes.
175
+ """
176
+ self.__stopped = True
177
+
178
+ def __enter__(self) -> "CommitScheduler":
179
+ return self
180
+
181
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
182
+ # Upload last changes before exiting
183
+ self.trigger().result()
184
+ self.stop()
185
+ return
186
+
187
+ def _run_scheduler(self) -> None:
188
+ """Dumb thread waiting between each scheduled push to Hub."""
189
+ while True:
190
+ self.last_future = self.trigger()
191
+ time.sleep(self.every * 60)
192
+ if self.__stopped:
193
+ break
194
+
195
+ def trigger(self) -> Future:
196
+ """Trigger a `push_to_hub` and return a future.
197
+
198
+ This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
199
+ immediately, without waiting for the next scheduled commit.
200
+ """
201
+ return self.api.run_as_future(self._push_to_hub)
202
+
203
+ def _push_to_hub(self) -> Optional[CommitInfo]:
204
+ if self.__stopped: # If stopped, already scheduled commits are ignored
205
+ return None
206
+
207
+ logger.info("(Background) scheduled commit triggered.")
208
+ try:
209
+ value = self.push_to_hub()
210
+ if self.squash_history:
211
+ logger.info("(Background) squashing repo history.")
212
+ self.api.super_squash_history(
213
+ repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision
214
+ )
215
+ return value
216
+ except Exception as e:
217
+ logger.error(
218
+ f"Error while pushing to Hub: {e}"
219
+ ) # Depending on the setup, error might be silenced
220
+ raise
221
+
222
+ def push_to_hub(self) -> Optional[CommitInfo]:
223
+ """
224
+ Push folder to the Hub and return the commit info.
225
+
226
+ <Tip warning={true}>
227
+
228
+ This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
229
+ queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
230
+ issues.
231
+
232
+ </Tip>
233
+
234
+ The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
235
+ uploads only changed files. If no changes are found, the method returns without committing anything. If you want
236
+ to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
237
+ for example to compress data together in a single file before committing. For more details and examples, check
238
+ out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
239
+ """
240
+ # Check files to upload (with lock)
241
+ with self.lock:
242
+ logger.debug("Listing files to upload for scheduled commit.")
243
+
244
+ # List files from folder (taken from `_prepare_upload_folder_additions`)
245
+ relpath_to_abspath = {
246
+ path.relative_to(self.folder_path).as_posix(): path
247
+ for path in sorted(
248
+ self.folder_path.glob("**/*")
249
+ ) # sorted to be deterministic
250
+ if path.is_file()
251
+ }
252
+ prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
253
+
254
+ # Filter with pattern + filter out unchanged files + retrieve current file size
255
+ files_to_upload: List[_FileToUpload] = []
256
+ for relpath in filter_repo_objects(
257
+ relpath_to_abspath.keys(),
258
+ allow_patterns=self.allow_patterns,
259
+ ignore_patterns=self.ignore_patterns,
260
+ ):
261
+ local_path = relpath_to_abspath[relpath]
262
+ stat = local_path.stat()
263
+ if (
264
+ self.last_uploaded.get(local_path) is None
265
+ or self.last_uploaded[local_path] != stat.st_mtime
266
+ ):
267
+ files_to_upload.append(
268
+ _FileToUpload(
269
+ local_path=local_path,
270
+ path_in_repo=prefix + relpath,
271
+ size_limit=stat.st_size,
272
+ last_modified=stat.st_mtime,
273
+ )
274
+ )
275
+
276
+ # Return if nothing to upload
277
+ if len(files_to_upload) == 0:
278
+ logger.debug("Dropping schedule commit: no changed file to upload.")
279
+ return None
280
+
281
+ # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
282
+ logger.debug("Removing unchanged files since previous scheduled commit.")
283
+ add_operations = [
284
+ CommitOperationAdd(
285
+ # Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
286
+ path_or_fileobj=PartialFileIO(
287
+ file_to_upload.local_path, size_limit=file_to_upload.size_limit
288
+ ),
289
+ path_in_repo=file_to_upload.path_in_repo,
290
+ )
291
+ for file_to_upload in files_to_upload
292
+ ]
293
+
294
+ # Upload files (append mode expected - no need for lock)
295
+ logger.debug("Uploading files for scheduled commit.")
296
+ commit_info = self.api.create_commit(
297
+ repo_id=self.repo_id,
298
+ repo_type=self.repo_type,
299
+ operations=add_operations,
300
+ commit_message="Scheduled Commit",
301
+ revision=self.revision,
302
+ )
303
+
304
+ # Successful commit: keep track of the latest "last_modified" for each file
305
+ for file in files_to_upload:
306
+ self.last_uploaded[file.local_path] = file.last_modified
307
+ return commit_info
308
+
309
+
310
+ class PartialFileIO(BytesIO):
311
+ """A file-like object that reads only the first part of a file.
312
+
313
+ Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
314
+ file is uploaded (i.e. the part that was available when the filesystem was first scanned).
315
+
316
+ In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
317
+ disturbance for the user. The object is passed to `CommitOperationAdd`.
318
+
319
+ Only supports `read`, `tell` and `seek` methods.
320
+
321
+ Args:
322
+ file_path (`str` or `Path`):
323
+ Path to the file to read.
324
+ size_limit (`int`):
325
+ The maximum number of bytes to read from the file. If the file is larger than this, only the first part
326
+ will be read (and uploaded).
327
+ """
328
+
329
+ def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
330
+ self._file_path = Path(file_path)
331
+ self._file = self._file_path.open("rb")
332
+ self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
333
+
334
+ def __del__(self) -> None:
335
+ self._file.close()
336
+ return super().__del__()
337
+
338
+ def __repr__(self) -> str:
339
+ return (
340
+ f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
341
+ )
342
+
343
+ def __len__(self) -> int:
344
+ return self._size_limit
345
+
346
+ def __getattribute__(self, name: str):
347
+ if name.startswith("_") or name in (
348
+ "read",
349
+ "tell",
350
+ "seek",
351
+ ): # only 3 public methods supported
352
+ return super().__getattribute__(name)
353
+ raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
354
+
355
+ def tell(self) -> int:
356
+ """Return the current file position."""
357
+ return self._file.tell()
358
+
359
+ def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
360
+ """Change the stream position to the given offset.
361
+
362
+ Behavior is the same as a regular file, except that the position is capped to the size limit.
363
+ """
364
+ if __whence == SEEK_END:
365
+ # SEEK_END => set from the truncated end
366
+ __offset = len(self) + __offset
367
+ __whence = SEEK_SET
368
+
369
+ pos = self._file.seek(__offset, __whence)
370
+ if pos > self._size_limit:
371
+ return self._file.seek(self._size_limit)
372
+ return pos
373
+
374
+ def read(self, __size: Optional[int] = -1) -> bytes:
375
+ """Read at most `__size` bytes from the file.
376
+
377
+ Behavior is the same as a regular file, except that it is capped to the size limit.
378
+ """
379
+ current = self._file.tell()
380
+ if __size is None or __size < 0:
381
+ # Read until file limit
382
+ truncated_size = self._size_limit - current
383
+ else:
384
+ # Read until file limit or __size
385
+ truncated_size = min(__size, self._size_limit - current)
386
+ return self._file.read(truncated_size)
context_vars.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextvars
2
+ from typing import TYPE_CHECKING
3
+
4
+ if TYPE_CHECKING:
5
+ from trackio.run import Run
6
+
7
+ current_run: contextvars.ContextVar["Run | None"] = contextvars.ContextVar(
8
+ "current_run", default=None
9
+ )
10
+ current_project: contextvars.ContextVar[str | None] = contextvars.ContextVar(
11
+ "current_project", default=None
12
+ )
13
+ current_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
14
+ "current_server", default=None
15
+ )
deploy.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os
3
+ import time
4
+ from importlib.resources import files
5
+ from pathlib import Path
6
+
7
+ import gradio
8
+ import huggingface_hub
9
+ from gradio_client import Client, handle_file
10
+ from httpx import ReadTimeout
11
+ from huggingface_hub.errors import RepositoryNotFoundError
12
+ from requests import HTTPError
13
+
14
+ from trackio.sqlite_storage import SQLiteStorage
15
+
16
+ SPACE_URL = "https://huggingface.co/spaces/{space_id}"
17
+
18
+
19
+ def deploy_as_space(
20
+ space_id: str,
21
+ dataset_id: str | None = None,
22
+ ):
23
+ if (
24
+ os.getenv("SYSTEM") == "spaces"
25
+ ): # in case a repo with this function is uploaded to spaces
26
+ return
27
+
28
+ trackio_path = files("trackio")
29
+
30
+ hf_api = huggingface_hub.HfApi()
31
+
32
+ try:
33
+ huggingface_hub.create_repo(
34
+ space_id,
35
+ space_sdk="gradio",
36
+ repo_type="space",
37
+ exist_ok=True,
38
+ )
39
+ except HTTPError as e:
40
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
41
+ print("Need 'write' access token to create a Spaces repo.")
42
+ huggingface_hub.login(add_to_git_credential=False)
43
+ huggingface_hub.create_repo(
44
+ space_id,
45
+ space_sdk="gradio",
46
+ repo_type="space",
47
+ exist_ok=True,
48
+ )
49
+ else:
50
+ raise ValueError(f"Failed to create Space: {e}")
51
+
52
+ with open(Path(trackio_path, "README.md"), "r") as f:
53
+ readme_content = f.read()
54
+ readme_content = readme_content.replace("{GRADIO_VERSION}", gradio.__version__)
55
+ readme_buffer = io.BytesIO(readme_content.encode("utf-8"))
56
+ hf_api.upload_file(
57
+ path_or_fileobj=readme_buffer,
58
+ path_in_repo="README.md",
59
+ repo_id=space_id,
60
+ repo_type="space",
61
+ )
62
+
63
+ huggingface_hub.utils.disable_progress_bars()
64
+ hf_api.upload_folder(
65
+ repo_id=space_id,
66
+ repo_type="space",
67
+ folder_path=trackio_path,
68
+ ignore_patterns=["README.md"],
69
+ )
70
+
71
+ hf_token = huggingface_hub.utils.get_token()
72
+ if hf_token is not None:
73
+ huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
74
+ if dataset_id is not None:
75
+ huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
76
+
77
+
78
+ def create_space_if_not_exists(
79
+ space_id: str,
80
+ dataset_id: str | None = None,
81
+ ) -> None:
82
+ """
83
+ Creates a new Hugging Face Space if it does not exist. If a dataset_id is provided, it will be added as a space variable.
84
+
85
+ Args:
86
+ space_id: The ID of the Space to create.
87
+ dataset_id: The ID of the Dataset to add to the Space.
88
+ """
89
+ if "/" not in space_id:
90
+ raise ValueError(
91
+ f"Invalid space ID: {space_id}. Must be in the format: username/reponame or orgname/reponame."
92
+ )
93
+ if dataset_id is not None and "/" not in dataset_id:
94
+ raise ValueError(
95
+ f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname or orgname/datasetname."
96
+ )
97
+ try:
98
+ huggingface_hub.repo_info(space_id, repo_type="space")
99
+ print(f"* Found existing space: {SPACE_URL.format(space_id=space_id)}")
100
+ if dataset_id is not None:
101
+ huggingface_hub.add_space_variable(
102
+ space_id, "TRACKIO_DATASET_ID", dataset_id
103
+ )
104
+ return
105
+ except RepositoryNotFoundError:
106
+ pass
107
+ except HTTPError as e:
108
+ if e.response.status_code in [401, 403]: # unauthorized or forbidden
109
+ print("Need 'write' access token to create a Spaces repo.")
110
+ huggingface_hub.login(add_to_git_credential=False)
111
+ huggingface_hub.add_space_variable(
112
+ space_id, "TRACKIO_DATASET_ID", dataset_id
113
+ )
114
+ else:
115
+ raise ValueError(f"Failed to create Space: {e}")
116
+
117
+ print(f"* Creating new space: {SPACE_URL.format(space_id=space_id)}")
118
+ deploy_as_space(space_id, dataset_id)
119
+
120
+
121
+ def wait_until_space_exists(
122
+ space_id: str,
123
+ ) -> None:
124
+ """
125
+ Blocks the current thread until the space exists.
126
+ May raise a TimeoutError if this takes quite a while.
127
+
128
+ Args:
129
+ space_id: The ID of the Space to wait for.
130
+ """
131
+ delay = 1
132
+ for _ in range(10):
133
+ try:
134
+ Client(space_id, verbose=False)
135
+ return
136
+ except (ReadTimeout, ValueError):
137
+ time.sleep(delay)
138
+ delay = min(delay * 2, 30)
139
+ raise TimeoutError("Waiting for space to exist took longer than expected")
140
+
141
+
142
+ def upload_db_to_space(project: str, space_id: str) -> None:
143
+ """
144
+ Uploads the database of a local Trackio project to a Hugging Face Space.
145
+
146
+ Args:
147
+ project: The name of the project to upload.
148
+ space_id: The ID of the Space to upload to.
149
+ """
150
+ db_path = SQLiteStorage.get_project_db_path(project)
151
+ client = Client(space_id, verbose=False)
152
+ client.predict(
153
+ api_name="/upload_db_to_space",
154
+ project=project,
155
+ uploaded_db=handle_file(db_path),
156
+ hf_token=huggingface_hub.utils.get_token(),
157
+ )
dummy_commit_scheduler.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A dummy object to fit the interface of huggingface_hub's CommitScheduler
2
+ class DummyCommitSchedulerLock:
3
+ def __enter__(self):
4
+ return None
5
+
6
+ def __exit__(self, exception_type, exception_value, exception_traceback):
7
+ pass
8
+
9
+
10
+ class DummyCommitScheduler:
11
+ def __init__(self):
12
+ self.lock = DummyCommitSchedulerLock()
imports.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+
6
+ from trackio import deploy, utils
7
+ from trackio.sqlite_storage import SQLiteStorage
8
+
9
+
10
+ def import_csv(
11
+ csv_path: str,
12
+ project: str,
13
+ name: str | None = None,
14
+ space_id: str | None = None,
15
+ dataset_id: str | None = None,
16
+ ) -> None:
17
+ """
18
+ Imports a CSV file into a Trackio project. The CSV file must contain a "step" column, may optionally
19
+ contain a "timestamp" column, and any other columns will be treated as metrics. It should also include
20
+ a header row with the column names.
21
+
22
+ TODO: call init() and return a Run object so that the user can continue to log metrics to it.
23
+
24
+ Args:
25
+ csv_path: The str or Path to the CSV file to import.
26
+ project: The name of the project to import the CSV file into. Must not be an existing project.
27
+ name: The name of the Run to import the CSV file into. If not provided, a default name will be generated.
28
+ name: The name of the run (if not provided, a default name will be generated).
29
+ space_id: If provided, the project will be logged to a Hugging Face Space instead of a local directory. Should be a complete Space name like "username/reponame" or "orgname/reponame", or just "reponame" in which case the Space will be created in the currently-logged-in Hugging Face user's namespace. If the Space does not exist, it will be created. If the Space already exists, the project will be logged to it.
30
+ dataset_id: If provided, a persistent Hugging Face Dataset will be created and the metrics will be synced to it every 5 minutes. Should be a complete Dataset name like "username/datasetname" or "orgname/datasetname", or just "datasetname" in which case the Dataset will be created in the currently-logged-in Hugging Face user's namespace. If the Dataset does not exist, it will be created. If the Dataset already exists, the project will be appended to it. If not provided, the metrics will be logged to a local SQLite database, unless a `space_id` is provided, in which case a Dataset will be automatically created with the same name as the Space but with the "_dataset" suffix.
31
+ """
32
+ if SQLiteStorage.get_runs(project):
33
+ raise ValueError(
34
+ f"Project '{project}' already exists. Cannot import CSV into existing project."
35
+ )
36
+
37
+ csv_path = Path(csv_path)
38
+ if not csv_path.exists():
39
+ raise FileNotFoundError(f"CSV file not found: {csv_path}")
40
+
41
+ df = pd.read_csv(csv_path)
42
+ if df.empty:
43
+ raise ValueError("CSV file is empty")
44
+
45
+ column_mapping = utils.simplify_column_names(df.columns.tolist())
46
+ df = df.rename(columns=column_mapping)
47
+
48
+ step_column = None
49
+ for col in df.columns:
50
+ if col.lower() == "step":
51
+ step_column = col
52
+ break
53
+
54
+ if step_column is None:
55
+ raise ValueError("CSV file must contain a 'step' or 'Step' column")
56
+
57
+ if name is None:
58
+ name = csv_path.stem
59
+
60
+ metrics_list = []
61
+ steps = []
62
+ timestamps = []
63
+
64
+ numeric_columns = []
65
+ for column in df.columns:
66
+ if column == step_column:
67
+ continue
68
+ if column == "timestamp":
69
+ continue
70
+
71
+ try:
72
+ pd.to_numeric(df[column], errors="raise")
73
+ numeric_columns.append(column)
74
+ except (ValueError, TypeError):
75
+ continue
76
+
77
+ for _, row in df.iterrows():
78
+ metrics = {}
79
+ for column in numeric_columns:
80
+ if pd.notna(row[column]):
81
+ metrics[column] = float(row[column])
82
+
83
+ if metrics:
84
+ metrics_list.append(metrics)
85
+ steps.append(int(row[step_column]))
86
+
87
+ if "timestamp" in df.columns and pd.notna(row["timestamp"]):
88
+ timestamps.append(str(row["timestamp"]))
89
+ else:
90
+ timestamps.append("")
91
+
92
+ if metrics_list:
93
+ SQLiteStorage.bulk_log(
94
+ project=project,
95
+ run=name,
96
+ metrics_list=metrics_list,
97
+ steps=steps,
98
+ timestamps=timestamps,
99
+ )
100
+
101
+ print(
102
+ f"* Imported {len(metrics_list)} rows from {csv_path} into project '{project}' as run '{name}'"
103
+ )
104
+ print(f"* Metrics found: {', '.join(metrics_list[0].keys())}")
105
+
106
+ space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
107
+ if dataset_id is not None:
108
+ os.environ["TRACKIO_DATASET_ID"] = dataset_id
109
+ print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
110
+
111
+ if space_id is None:
112
+ utils.print_dashboard_instructions(project)
113
+ else:
114
+ deploy.create_space_if_not_exists(space_id, dataset_id)
115
+ deploy.wait_until_space_exists(space_id)
116
+ deploy.upload_db_to_space(project, space_id)
117
+ print(
118
+ f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
119
+ )
run.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import time
3
+ from collections import deque
4
+
5
+ import huggingface_hub
6
+ from gradio_client import Client
7
+
8
+ from trackio.utils import RESERVED_KEYS, fibo, generate_readable_name
9
+
10
+
11
+ class Run:
12
+ def __init__(
13
+ self,
14
+ url: str,
15
+ project: str,
16
+ client: Client,
17
+ name: str | None = None,
18
+ config: dict | None = None,
19
+ ):
20
+ self.url = url
21
+ self.project = project
22
+ self._client_lock = threading.Lock()
23
+ self._client_thread = None
24
+ self._client = client
25
+ self.name = name or generate_readable_name()
26
+ self.config = config or {}
27
+ self._queued_logs = deque()
28
+
29
+ if client is None:
30
+ self._client_thread = threading.Thread(target=self._init_client_background)
31
+ self._client_thread.start()
32
+
33
+ def _init_client_background(self):
34
+ fib = fibo()
35
+ for sleep_coefficient in fib:
36
+ try:
37
+ client = Client(self.url, verbose=False)
38
+ with self._client_lock:
39
+ self._client = client
40
+ if len(self._queued_logs) > 0:
41
+ for queued_log in self._queued_logs:
42
+ self._client.predict(**queued_log)
43
+ self._queued_logs.clear()
44
+ break
45
+ except Exception:
46
+ pass
47
+ if sleep_coefficient is not None:
48
+ time.sleep(0.1 * sleep_coefficient)
49
+
50
+ def log(self, metrics: dict):
51
+ for k in metrics.keys():
52
+ if k in RESERVED_KEYS or k.startswith("__"):
53
+ raise ValueError(
54
+ f"Please do not use this reserved key as a metric: {k}"
55
+ )
56
+ with self._client_lock:
57
+ if self._client is None:
58
+ # client can still be None for a Space while the Space is still initializing.
59
+ # queue up log items for when the client is not None.
60
+ self._queued_logs.append(
61
+ dict(
62
+ api_name="/log",
63
+ project=self.project,
64
+ run=self.name,
65
+ metrics=metrics,
66
+ hf_token=huggingface_hub.utils.get_token(),
67
+ )
68
+ )
69
+ else:
70
+ assert (
71
+ len(self._queued_logs) == 0
72
+ ) # queue should have been flushed on client init
73
+ # write the current log item
74
+ self._client.predict(
75
+ api_name="/log",
76
+ project=self.project,
77
+ run=self.name,
78
+ metrics=metrics,
79
+ hf_token=huggingface_hub.utils.get_token(),
80
+ )
81
+
82
+ def finish(self):
83
+ """Cleanup when run is finished."""
84
+ # wait for background client thread, in case it has a queue of logs to flush.
85
+ if self._client_thread is not None:
86
+ print(f"* Uploading logs to Trackio Space: {self.url} (please wait...)")
87
+ self._client_thread.join()
sqlite_storage.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import shutil
4
+ import sqlite3
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from threading import Lock
8
+
9
+ from huggingface_hub import hf_hub_download
10
+ from huggingface_hub.errors import EntryNotFoundError
11
+
12
+ try: # absolute imports when installed
13
+ from trackio.commit_scheduler import CommitScheduler
14
+ from trackio.dummy_commit_scheduler import DummyCommitScheduler
15
+ from trackio.utils import TRACKIO_DIR
16
+ except Exception: # relative imports for local execution on Spaces
17
+ from commit_scheduler import CommitScheduler
18
+ from dummy_commit_scheduler import DummyCommitScheduler
19
+ from utils import TRACKIO_DIR
20
+
21
+
22
+ class SQLiteStorage:
23
+ _dataset_import_attempted = False
24
+ _current_scheduler: CommitScheduler | DummyCommitScheduler | None = None
25
+ _scheduler_lock = Lock()
26
+
27
+ @staticmethod
28
+ def _get_connection(db_path: Path) -> sqlite3.Connection:
29
+ conn = sqlite3.connect(str(db_path))
30
+ conn.row_factory = sqlite3.Row
31
+ return conn
32
+
33
+ @staticmethod
34
+ def get_project_db_filename(project: str) -> Path:
35
+ """Get the database filename for a specific project."""
36
+ safe_project_name = "".join(
37
+ c for c in project if c.isalnum() or c in ("-", "_")
38
+ ).rstrip()
39
+ if not safe_project_name:
40
+ safe_project_name = "default"
41
+ return f"{safe_project_name}.db"
42
+
43
+ @staticmethod
44
+ def get_project_db_path(project: str) -> Path:
45
+ """Get the database path for a specific project."""
46
+ filename = SQLiteStorage.get_project_db_filename(project)
47
+ return TRACKIO_DIR / filename
48
+
49
+ @staticmethod
50
+ def init_db(project: str) -> Path:
51
+ """
52
+ Initialize the SQLite database with required tables.
53
+ If there is a dataset ID provided, copies from that dataset instead.
54
+ Returns the database path.
55
+ """
56
+ db_path = SQLiteStorage.get_project_db_path(project)
57
+ db_path.parent.mkdir(parents=True, exist_ok=True)
58
+ with SQLiteStorage.get_scheduler().lock:
59
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
60
+ if dataset_id is not None and not SQLiteStorage._dataset_import_attempted:
61
+ filename = SQLiteStorage.get_project_db_filename(project)
62
+ try:
63
+ downloaded_path = hf_hub_download(
64
+ dataset_id, filename, repo_type="dataset"
65
+ )
66
+ shutil.copy(downloaded_path, db_path)
67
+ except EntryNotFoundError:
68
+ pass
69
+ SQLiteStorage._dataset_import_attempted = True
70
+
71
+ with sqlite3.connect(db_path) as conn:
72
+ cursor = conn.cursor()
73
+ cursor.execute("""
74
+ CREATE TABLE IF NOT EXISTS metrics (
75
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
76
+ timestamp TEXT NOT NULL,
77
+ run_name TEXT NOT NULL,
78
+ step INTEGER NOT NULL,
79
+ metrics TEXT NOT NULL
80
+ )
81
+ """)
82
+ cursor.execute(
83
+ """
84
+ CREATE INDEX IF NOT EXISTS idx_metrics_run_step
85
+ ON metrics(run_name, step)
86
+ """
87
+ )
88
+ conn.commit()
89
+ return db_path
90
+
91
+ @staticmethod
92
+ def get_scheduler():
93
+ """
94
+ Get the scheduler for the database based on the environment variables.
95
+ This applies to both local and Spaces.
96
+ """
97
+ with SQLiteStorage._scheduler_lock:
98
+ if SQLiteStorage._current_scheduler is not None:
99
+ return SQLiteStorage._current_scheduler
100
+ hf_token = os.environ.get("HF_TOKEN")
101
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
102
+ space_repo_name = os.environ.get("SPACE_REPO_NAME")
103
+ if dataset_id is None or space_repo_name is None:
104
+ scheduler = DummyCommitScheduler()
105
+ else:
106
+ scheduler = CommitScheduler(
107
+ repo_id=dataset_id,
108
+ repo_type="dataset",
109
+ folder_path=TRACKIO_DIR,
110
+ private=True,
111
+ squash_history=True,
112
+ token=hf_token,
113
+ )
114
+ SQLiteStorage._current_scheduler = scheduler
115
+ return scheduler
116
+
117
+ @staticmethod
118
+ def log(project: str, run: str, metrics: dict):
119
+ """
120
+ Safely log metrics to the database. Before logging, this method will ensure the database exists
121
+ and is set up with the correct tables. It also uses the scheduler to lock the database so
122
+ that there is no race condition when logging / syncing to the Hugging Face Dataset.
123
+ """
124
+ db_path = SQLiteStorage.init_db(project)
125
+
126
+ with SQLiteStorage.get_scheduler().lock:
127
+ with SQLiteStorage._get_connection(db_path) as conn:
128
+ cursor = conn.cursor()
129
+
130
+ cursor.execute(
131
+ """
132
+ SELECT MAX(step)
133
+ FROM metrics
134
+ WHERE run_name = ?
135
+ """,
136
+ (run,),
137
+ )
138
+ last_step = cursor.fetchone()[0]
139
+ current_step = 0 if last_step is None else last_step + 1
140
+
141
+ current_timestamp = datetime.now().isoformat()
142
+
143
+ cursor.execute(
144
+ """
145
+ INSERT INTO metrics
146
+ (timestamp, run_name, step, metrics)
147
+ VALUES (?, ?, ?, ?)
148
+ """,
149
+ (
150
+ current_timestamp,
151
+ run,
152
+ current_step,
153
+ json.dumps(metrics),
154
+ ),
155
+ )
156
+ conn.commit()
157
+
158
+ @staticmethod
159
+ def bulk_log(
160
+ project: str,
161
+ run: str,
162
+ metrics_list: list[dict],
163
+ steps: list[int] | None = None,
164
+ timestamps: list[str] | None = None,
165
+ ):
166
+ """Bulk log metrics to the database with specified steps and timestamps."""
167
+ if not metrics_list:
168
+ return
169
+
170
+ if steps is None:
171
+ steps = list(range(len(metrics_list)))
172
+
173
+ if timestamps is None:
174
+ timestamps = [datetime.now().isoformat()] * len(metrics_list)
175
+
176
+ if len(metrics_list) != len(steps) or len(metrics_list) != len(timestamps):
177
+ raise ValueError(
178
+ "metrics_list, steps, and timestamps must have the same length"
179
+ )
180
+
181
+ db_path = SQLiteStorage.init_db(project)
182
+ with SQLiteStorage.get_scheduler().lock:
183
+ with SQLiteStorage._get_connection(db_path) as conn:
184
+ cursor = conn.cursor()
185
+
186
+ data = []
187
+ for i, metrics in enumerate(metrics_list):
188
+ data.append(
189
+ (
190
+ timestamps[i],
191
+ run,
192
+ steps[i],
193
+ json.dumps(metrics),
194
+ )
195
+ )
196
+
197
+ cursor.executemany(
198
+ """
199
+ INSERT INTO metrics
200
+ (timestamp, run_name, step, metrics)
201
+ VALUES (?, ?, ?, ?)
202
+ """,
203
+ data,
204
+ )
205
+ conn.commit()
206
+
207
+ @staticmethod
208
+ def get_metrics(project: str, run: str) -> list[dict]:
209
+ """Retrieve metrics for a specific run. The metrics also include the step count (int) and the timestamp (datetime object)."""
210
+ db_path = SQLiteStorage.get_project_db_path(project)
211
+ if not db_path.exists():
212
+ return []
213
+
214
+ with SQLiteStorage._get_connection(db_path) as conn:
215
+ cursor = conn.cursor()
216
+ cursor.execute(
217
+ """
218
+ SELECT timestamp, step, metrics
219
+ FROM metrics
220
+ WHERE run_name = ?
221
+ ORDER BY timestamp
222
+ """,
223
+ (run,),
224
+ )
225
+
226
+ rows = cursor.fetchall()
227
+ results = []
228
+ for row in rows:
229
+ metrics = json.loads(row["metrics"])
230
+ metrics["timestamp"] = row["timestamp"]
231
+ metrics["step"] = row["step"]
232
+ results.append(metrics)
233
+
234
+ return results
235
+
236
+ @staticmethod
237
+ def get_projects() -> list[str]:
238
+ """
239
+ Get list of all projects by scanning the database files in the trackio directory.
240
+ """
241
+ projects: set[str] = set()
242
+ if not TRACKIO_DIR.exists():
243
+ return []
244
+
245
+ for db_file in TRACKIO_DIR.glob("*.db"):
246
+ project_name = db_file.stem
247
+ projects.add(project_name)
248
+ return sorted(projects)
249
+
250
+ @staticmethod
251
+ def get_runs(project: str) -> list[str]:
252
+ """Get list of all runs for a project."""
253
+ db_path = SQLiteStorage.get_project_db_path(project)
254
+ if not db_path.exists():
255
+ return []
256
+
257
+ with SQLiteStorage._get_connection(db_path) as conn:
258
+ cursor = conn.cursor()
259
+ cursor.execute(
260
+ "SELECT DISTINCT run_name FROM metrics",
261
+ )
262
+ return [row[0] for row in cursor.fetchall()]
263
+
264
+ @staticmethod
265
+ def get_max_steps_for_runs(project: str, runs: list[str]) -> dict[str, int]:
266
+ """Efficiently get the maximum step for multiple runs in a single query."""
267
+ db_path = SQLiteStorage.get_project_db_path(project)
268
+ if not db_path.exists():
269
+ return {run: 0 for run in runs}
270
+
271
+ with SQLiteStorage._get_connection(db_path) as conn:
272
+ cursor = conn.cursor()
273
+ placeholders = ",".join("?" * len(runs))
274
+ cursor.execute(
275
+ f"""
276
+ SELECT run_name, MAX(step) as max_step
277
+ FROM metrics
278
+ WHERE run_name IN ({placeholders})
279
+ GROUP BY run_name
280
+ """,
281
+ runs,
282
+ )
283
+
284
+ results = {run: 0 for run in runs} # Default to 0 for runs with no data
285
+ for row in cursor.fetchall():
286
+ results[row["run_name"]] = row["max_step"]
287
+
288
+ return results
289
+
290
+ def finish(self):
291
+ """Cleanup when run is finished."""
292
+ pass
trackio_logo.png ADDED

Git LFS Details

  • SHA256: 3922c4d1e465270ad4d8abb12023f3beed5d9f7f338528a4c0ac21dcf358a1c8
  • Pointer size: 131 Bytes
  • Size of remote file: 487 kB
ui.py ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from typing import Any
4
+
5
+ import gradio as gr
6
+ import huggingface_hub as hf
7
+ import pandas as pd
8
+
9
+ HfApi = hf.HfApi()
10
+
11
+ try:
12
+ from trackio.sqlite_storage import SQLiteStorage
13
+ from trackio.utils import (
14
+ RESERVED_KEYS,
15
+ TRACKIO_LOGO_PATH,
16
+ downsample,
17
+ get_color_mapping,
18
+ )
19
+ except: # noqa: E722
20
+ from sqlite_storage import SQLiteStorage
21
+ from utils import RESERVED_KEYS, TRACKIO_LOGO_PATH, downsample, get_color_mapping
22
+
23
+ css = """
24
+ #run-cb .wrap {
25
+ gap: 2px;
26
+ }
27
+ #run-cb .wrap label {
28
+ line-height: 1;
29
+ padding: 6px;
30
+ }
31
+ """
32
+
33
+
34
+ def get_projects(request: gr.Request):
35
+ dataset_id = os.environ.get("TRACKIO_DATASET_ID")
36
+ projects = SQLiteStorage.get_projects()
37
+ if project := request.query_params.get("project"):
38
+ interactive = False
39
+ else:
40
+ interactive = True
41
+ project = projects[0] if projects else None
42
+ return gr.Dropdown(
43
+ label="Project",
44
+ choices=projects,
45
+ value=project,
46
+ allow_custom_value=True,
47
+ interactive=interactive,
48
+ info=f"&#x21bb; Synced to <a href='https://huggingface.co/datasets/{dataset_id}' target='_blank'>{dataset_id}</a> every 5 min"
49
+ if dataset_id
50
+ else None,
51
+ )
52
+
53
+
54
+ def get_runs(project) -> list[str]:
55
+ if not project:
56
+ return []
57
+ return SQLiteStorage.get_runs(project)
58
+
59
+
60
+ def get_available_metrics(project: str, runs: list[str]) -> list[str]:
61
+ """Get all available metrics across all runs for x-axis selection."""
62
+ if not project or not runs:
63
+ return ["step", "time"]
64
+
65
+ all_metrics = set()
66
+ for run in runs:
67
+ metrics = SQLiteStorage.get_metrics(project, run)
68
+ if metrics:
69
+ df = pd.DataFrame(metrics)
70
+ numeric_cols = df.select_dtypes(include="number").columns
71
+ numeric_cols = [c for c in numeric_cols if c not in RESERVED_KEYS]
72
+ all_metrics.update(numeric_cols)
73
+
74
+ # Always include step and time as options
75
+ all_metrics.add("step")
76
+ all_metrics.add("time")
77
+
78
+ # Sort metrics by prefix
79
+ sorted_metrics = sort_metrics_by_prefix(list(all_metrics))
80
+
81
+ # Put step and time at the beginning
82
+ result = ["step", "time"]
83
+ for metric in sorted_metrics:
84
+ if metric not in result:
85
+ result.append(metric)
86
+
87
+ return result
88
+
89
+
90
+ def load_run_data(project: str | None, run: str | None, smoothing: bool, x_axis: str):
91
+ if not project or not run:
92
+ return None
93
+ metrics = SQLiteStorage.get_metrics(project, run)
94
+ if not metrics:
95
+ return None
96
+ df = pd.DataFrame(metrics)
97
+
98
+ if "step" not in df.columns:
99
+ df["step"] = range(len(df))
100
+
101
+ if x_axis == "time" and "timestamp" in df.columns:
102
+ df["timestamp"] = pd.to_datetime(df["timestamp"])
103
+ first_timestamp = df["timestamp"].min()
104
+ df["time"] = (df["timestamp"] - first_timestamp).dt.total_seconds()
105
+ x_column = "time"
106
+ elif x_axis == "step":
107
+ x_column = "step"
108
+ else:
109
+ x_column = x_axis
110
+
111
+ if smoothing:
112
+ numeric_cols = df.select_dtypes(include="number").columns
113
+ numeric_cols = [c for c in numeric_cols if c not in RESERVED_KEYS]
114
+
115
+ df_original = df.copy()
116
+ df_original["run"] = f"{run}_original"
117
+ df_original["data_type"] = "original"
118
+
119
+ df_smoothed = df.copy()
120
+ window_size = max(3, min(10, len(df) // 10)) # Adaptive window size
121
+ df_smoothed[numeric_cols] = (
122
+ df_smoothed[numeric_cols]
123
+ .rolling(window=window_size, center=True, min_periods=1)
124
+ .mean()
125
+ )
126
+ df_smoothed["run"] = f"{run}_smoothed"
127
+ df_smoothed["data_type"] = "smoothed"
128
+
129
+ combined_df = pd.concat([df_original, df_smoothed], ignore_index=True)
130
+ combined_df["x_axis"] = x_column
131
+ return combined_df
132
+ else:
133
+ df["run"] = run
134
+ df["data_type"] = "original"
135
+ df["x_axis"] = x_column
136
+ return df
137
+
138
+
139
+ def update_runs(project, filter_text, user_interacted_with_runs=False):
140
+ if project is None:
141
+ runs = []
142
+ num_runs = 0
143
+ else:
144
+ runs = get_runs(project)
145
+ num_runs = len(runs)
146
+ if filter_text:
147
+ runs = [r for r in runs if filter_text in r]
148
+ if not user_interacted_with_runs:
149
+ return gr.CheckboxGroup(choices=runs, value=runs), gr.Textbox(
150
+ label=f"Runs ({num_runs})"
151
+ )
152
+ else:
153
+ return gr.CheckboxGroup(choices=runs), gr.Textbox(label=f"Runs ({num_runs})")
154
+
155
+
156
+ def filter_runs(project, filter_text):
157
+ runs = get_runs(project)
158
+ runs = [r for r in runs if filter_text in r]
159
+ return gr.CheckboxGroup(choices=runs, value=runs)
160
+
161
+
162
+ def update_x_axis_choices(project, runs):
163
+ """Update x-axis dropdown choices based on available metrics."""
164
+ available_metrics = get_available_metrics(project, runs)
165
+ return gr.Dropdown(
166
+ label="X-axis",
167
+ choices=available_metrics,
168
+ value="step",
169
+ )
170
+
171
+
172
+ def toggle_timer(cb_value):
173
+ if cb_value:
174
+ return gr.Timer(active=True)
175
+ else:
176
+ return gr.Timer(active=False)
177
+
178
+
179
+ def check_auth(hf_token: str | None) -> None:
180
+ if os.getenv("SYSTEM") == "spaces": # if we are running in Spaces
181
+ # check auth token passed in
182
+ if hf_token is None:
183
+ raise PermissionError(
184
+ "Expected a HF_TOKEN to be provided when logging to a Space"
185
+ )
186
+ who = HfApi.whoami(hf_token)
187
+ access_token = who["auth"]["accessToken"]
188
+ owner_name = os.getenv("SPACE_AUTHOR_NAME")
189
+ repo_name = os.getenv("SPACE_REPO_NAME")
190
+ # make sure the token user is either the author of the space,
191
+ # or is a member of an org that is the author.
192
+ orgs = [o["name"] for o in who["orgs"]]
193
+ if owner_name != who["name"] and owner_name not in orgs:
194
+ raise PermissionError(
195
+ "Expected the provided hf_token to be the user owner of the space, or be a member of the org owner of the space"
196
+ )
197
+ # reject fine-grained tokens without specific repo access
198
+ if access_token["role"] == "fineGrained":
199
+ matched = False
200
+ for item in access_token["fineGrained"]["scoped"]:
201
+ if (
202
+ item["entity"]["type"] == "space"
203
+ and item["entity"]["name"] == f"{owner_name}/{repo_name}"
204
+ and "repo.write" in item["permissions"]
205
+ ):
206
+ matched = True
207
+ break
208
+ if (
209
+ item["entity"]["type"] == "user"
210
+ and item["entity"]["name"] == owner_name
211
+ and "repo.write" in item["permissions"]
212
+ ):
213
+ matched = True
214
+ break
215
+ if not matched:
216
+ raise PermissionError(
217
+ "Expected the provided hf_token with fine grained permissions to provide write access to the space"
218
+ )
219
+ # reject read-only tokens
220
+ elif access_token["role"] != "write":
221
+ raise PermissionError(
222
+ "Expected the provided hf_token to provide write permissions"
223
+ )
224
+
225
+
226
+ def upload_db_to_space(
227
+ project: str, uploaded_db: gr.FileData, hf_token: str | None
228
+ ) -> None:
229
+ check_auth(hf_token)
230
+ db_project_path = SQLiteStorage.get_project_db_path(project)
231
+ if os.path.exists(db_project_path):
232
+ raise gr.Error(
233
+ f"Trackio database file already exists for project {project}, cannot overwrite."
234
+ )
235
+ os.makedirs(os.path.dirname(db_project_path), exist_ok=True)
236
+ shutil.copy(uploaded_db["path"], db_project_path)
237
+
238
+
239
+ def log(
240
+ project: str,
241
+ run: str,
242
+ metrics: dict[str, Any],
243
+ hf_token: str | None,
244
+ ) -> None:
245
+ check_auth(hf_token)
246
+ SQLiteStorage.log(project=project, run=run, metrics=metrics)
247
+
248
+
249
+ def sort_metrics_by_prefix(metrics: list[str]) -> list[str]:
250
+ """
251
+ Sort metrics by grouping prefixes together.
252
+ Metrics without prefixes come first, then grouped by prefix.
253
+
254
+ Example:
255
+ Input: ["train/loss", "loss", "train/acc", "val/loss"]
256
+ Output: ["loss", "train/acc", "train/loss", "val/loss"]
257
+ """
258
+ no_prefix = []
259
+ with_prefix = []
260
+
261
+ for metric in metrics:
262
+ if "/" in metric:
263
+ with_prefix.append(metric)
264
+ else:
265
+ no_prefix.append(metric)
266
+
267
+ no_prefix.sort()
268
+
269
+ prefix_groups = {}
270
+ for metric in with_prefix:
271
+ prefix = metric.split("/")[0]
272
+ if prefix not in prefix_groups:
273
+ prefix_groups[prefix] = []
274
+ prefix_groups[prefix].append(metric)
275
+
276
+ sorted_with_prefix = []
277
+ for prefix in sorted(prefix_groups.keys()):
278
+ sorted_with_prefix.extend(sorted(prefix_groups[prefix]))
279
+
280
+ return no_prefix + sorted_with_prefix
281
+
282
+
283
+ def configure(request: gr.Request):
284
+ sidebar_param = request.query_params.get("sidebar")
285
+ match sidebar_param:
286
+ case "collapsed":
287
+ sidebar = gr.Sidebar(open=False, visible=True)
288
+ case "hidden":
289
+ sidebar = gr.Sidebar(open=False, visible=False)
290
+ case _:
291
+ sidebar = gr.Sidebar(open=True, visible=True)
292
+
293
+ if metrics := request.query_params.get("metrics"):
294
+ return metrics.split(","), sidebar
295
+ else:
296
+ return [], sidebar
297
+
298
+
299
+ with gr.Blocks(theme="citrus", title="Trackio Dashboard", css=css) as demo:
300
+ with gr.Sidebar(open=False) as sidebar:
301
+ gr.Markdown(
302
+ f"<div style='display: flex; align-items: center; gap: 8px;'><img src='/gradio_api/file={TRACKIO_LOGO_PATH}' width='32' height='32'><span style='font-size: 2em; font-weight: bold;'>Trackio</span></div>"
303
+ )
304
+ project_dd = gr.Dropdown(label="Project", allow_custom_value=True)
305
+ run_tb = gr.Textbox(label="Runs", placeholder="Type to filter...")
306
+ run_cb = gr.CheckboxGroup(
307
+ label="Runs", choices=[], interactive=True, elem_id="run-cb"
308
+ )
309
+ gr.HTML("<hr>")
310
+ realtime_cb = gr.Checkbox(label="Refresh metrics realtime", value=True)
311
+ smoothing_cb = gr.Checkbox(label="Smooth metrics", value=True)
312
+ x_axis_dd = gr.Dropdown(
313
+ label="X-axis",
314
+ choices=["step", "time"],
315
+ value="step",
316
+ )
317
+
318
+ timer = gr.Timer(value=1)
319
+ metrics_subset = gr.State([])
320
+ user_interacted_with_run_cb = gr.State(False)
321
+
322
+ gr.on([demo.load], fn=configure, outputs=[metrics_subset, sidebar])
323
+ gr.on(
324
+ [demo.load],
325
+ fn=get_projects,
326
+ outputs=project_dd,
327
+ show_progress="hidden",
328
+ )
329
+ gr.on(
330
+ [timer.tick],
331
+ fn=update_runs,
332
+ inputs=[project_dd, run_tb, user_interacted_with_run_cb],
333
+ outputs=[run_cb, run_tb],
334
+ show_progress="hidden",
335
+ )
336
+ gr.on(
337
+ [demo.load, project_dd.change],
338
+ fn=update_runs,
339
+ inputs=[project_dd, run_tb],
340
+ outputs=[run_cb, run_tb],
341
+ show_progress="hidden",
342
+ )
343
+ gr.on(
344
+ [demo.load, project_dd.change, run_cb.change],
345
+ fn=update_x_axis_choices,
346
+ inputs=[project_dd, run_cb],
347
+ outputs=x_axis_dd,
348
+ show_progress="hidden",
349
+ )
350
+
351
+ realtime_cb.change(
352
+ fn=toggle_timer,
353
+ inputs=realtime_cb,
354
+ outputs=timer,
355
+ api_name="toggle_timer",
356
+ )
357
+ run_cb.input(
358
+ fn=lambda: True,
359
+ outputs=user_interacted_with_run_cb,
360
+ )
361
+ run_tb.input(
362
+ fn=filter_runs,
363
+ inputs=[project_dd, run_tb],
364
+ outputs=run_cb,
365
+ )
366
+
367
+ gr.api(
368
+ fn=upload_db_to_space,
369
+ api_name="upload_db_to_space",
370
+ )
371
+ gr.api(
372
+ fn=log,
373
+ api_name="log",
374
+ )
375
+
376
+ x_lim = gr.State(None)
377
+ last_steps = gr.State({})
378
+
379
+ def update_x_lim(select_data: gr.SelectData):
380
+ return select_data.index
381
+
382
+ def update_last_steps(project, runs):
383
+ """Update the last step from all runs to detect when new data is available."""
384
+ if not project or not runs:
385
+ return {}
386
+
387
+ return SQLiteStorage.get_max_steps_for_runs(project, runs)
388
+
389
+ timer.tick(
390
+ fn=update_last_steps,
391
+ inputs=[project_dd, run_cb],
392
+ outputs=last_steps,
393
+ show_progress="hidden",
394
+ )
395
+
396
+ @gr.render(
397
+ triggers=[
398
+ demo.load,
399
+ run_cb.change,
400
+ last_steps.change,
401
+ smoothing_cb.change,
402
+ x_lim.change,
403
+ x_axis_dd.change,
404
+ ],
405
+ inputs=[project_dd, run_cb, smoothing_cb, metrics_subset, x_lim, x_axis_dd],
406
+ show_progress="hidden",
407
+ )
408
+ def update_dashboard(project, runs, smoothing, metrics_subset, x_lim_value, x_axis):
409
+ dfs = []
410
+ original_runs = runs.copy()
411
+
412
+ for run in runs:
413
+ df = load_run_data(project, run, smoothing, x_axis)
414
+ if df is not None:
415
+ dfs.append(df)
416
+
417
+ if dfs:
418
+ master_df = pd.concat(dfs, ignore_index=True)
419
+ else:
420
+ master_df = pd.DataFrame()
421
+
422
+ if master_df.empty:
423
+ return
424
+
425
+ x_column = "step"
426
+ if dfs and not dfs[0].empty and "x_axis" in dfs[0].columns:
427
+ x_column = dfs[0]["x_axis"].iloc[0]
428
+
429
+ numeric_cols = master_df.select_dtypes(include="number").columns
430
+ numeric_cols = [c for c in numeric_cols if c not in RESERVED_KEYS]
431
+ if metrics_subset:
432
+ numeric_cols = [c for c in numeric_cols if c in metrics_subset]
433
+
434
+ numeric_cols = sort_metrics_by_prefix(list(numeric_cols))
435
+ color_map = get_color_mapping(original_runs, smoothing)
436
+
437
+ with gr.Row(key="row"):
438
+ for metric_idx, metric_name in enumerate(numeric_cols):
439
+ metric_df = master_df.dropna(subset=[metric_name])
440
+ color = "run" if "run" in metric_df.columns else None
441
+ if not metric_df.empty:
442
+ plot = gr.LinePlot(
443
+ downsample(
444
+ metric_df, x_column, metric_name, color, x_lim_value
445
+ ),
446
+ x=x_column,
447
+ y=metric_name,
448
+ color=color,
449
+ color_map=color_map,
450
+ title=metric_name,
451
+ key=f"plot-{metric_idx}",
452
+ preserved_by_key=None,
453
+ x_lim=x_lim_value,
454
+ show_fullscreen_button=True,
455
+ min_width=400,
456
+ )
457
+ plot.select(update_x_lim, outputs=x_lim, key=f"select-{metric_idx}")
458
+ plot.double_click(
459
+ lambda: None, outputs=x_lim, key=f"double-{metric_idx}"
460
+ )
461
+
462
+
463
+ if __name__ == "__main__":
464
+ demo.launch(allowed_paths=[TRACKIO_LOGO_PATH], show_api=False, show_error=True)
utils.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import re
3
+ import sys
4
+ import time
5
+ from pathlib import Path
6
+
7
+ import huggingface_hub
8
+ import numpy as np
9
+ import pandas as pd
10
+ from huggingface_hub.constants import HF_HOME
11
+
12
+ RESERVED_KEYS = ["project", "run", "timestamp", "step", "time"]
13
+ TRACKIO_DIR = Path(HF_HOME) / "trackio"
14
+
15
+ TRACKIO_LOGO_PATH = str(Path(__file__).parent.joinpath("trackio_logo.png"))
16
+
17
+
18
+ def generate_readable_name():
19
+ """
20
+ Generates a random, readable name like "dainty-sunset-1"
21
+ """
22
+ adjectives = [
23
+ "dainty",
24
+ "brave",
25
+ "calm",
26
+ "eager",
27
+ "fancy",
28
+ "gentle",
29
+ "happy",
30
+ "jolly",
31
+ "kind",
32
+ "lively",
33
+ "merry",
34
+ "nice",
35
+ "proud",
36
+ "quick",
37
+ "silly",
38
+ "tidy",
39
+ "witty",
40
+ "zealous",
41
+ "bright",
42
+ "shy",
43
+ "bold",
44
+ "clever",
45
+ "daring",
46
+ "elegant",
47
+ "faithful",
48
+ "graceful",
49
+ "honest",
50
+ "inventive",
51
+ "jovial",
52
+ "keen",
53
+ "lucky",
54
+ "modest",
55
+ "noble",
56
+ "optimistic",
57
+ "patient",
58
+ "quirky",
59
+ "resourceful",
60
+ "sincere",
61
+ "thoughtful",
62
+ "upbeat",
63
+ "valiant",
64
+ "warm",
65
+ "youthful",
66
+ "zesty",
67
+ "adventurous",
68
+ "breezy",
69
+ "cheerful",
70
+ "delightful",
71
+ "energetic",
72
+ "fearless",
73
+ "glad",
74
+ "hopeful",
75
+ "imaginative",
76
+ "joyful",
77
+ "kindly",
78
+ "luminous",
79
+ "mysterious",
80
+ "neat",
81
+ "outgoing",
82
+ "playful",
83
+ "radiant",
84
+ "spirited",
85
+ "tranquil",
86
+ "unique",
87
+ "vivid",
88
+ "wise",
89
+ "zany",
90
+ "artful",
91
+ "bubbly",
92
+ "charming",
93
+ "dazzling",
94
+ "earnest",
95
+ "festive",
96
+ "gentlemanly",
97
+ "hearty",
98
+ "intrepid",
99
+ "jubilant",
100
+ "knightly",
101
+ "lively",
102
+ "magnetic",
103
+ "nimble",
104
+ "orderly",
105
+ "peaceful",
106
+ "quick-witted",
107
+ "robust",
108
+ "sturdy",
109
+ "trusty",
110
+ "upstanding",
111
+ "vibrant",
112
+ "whimsical",
113
+ ]
114
+ nouns = [
115
+ "sunset",
116
+ "forest",
117
+ "river",
118
+ "mountain",
119
+ "breeze",
120
+ "meadow",
121
+ "ocean",
122
+ "valley",
123
+ "sky",
124
+ "field",
125
+ "cloud",
126
+ "star",
127
+ "rain",
128
+ "leaf",
129
+ "stone",
130
+ "flower",
131
+ "bird",
132
+ "tree",
133
+ "wave",
134
+ "trail",
135
+ "island",
136
+ "desert",
137
+ "hill",
138
+ "lake",
139
+ "pond",
140
+ "grove",
141
+ "canyon",
142
+ "reef",
143
+ "bay",
144
+ "peak",
145
+ "glade",
146
+ "marsh",
147
+ "cliff",
148
+ "dune",
149
+ "spring",
150
+ "brook",
151
+ "cave",
152
+ "plain",
153
+ "ridge",
154
+ "wood",
155
+ "blossom",
156
+ "petal",
157
+ "root",
158
+ "branch",
159
+ "seed",
160
+ "acorn",
161
+ "pine",
162
+ "willow",
163
+ "cedar",
164
+ "elm",
165
+ "falcon",
166
+ "eagle",
167
+ "sparrow",
168
+ "robin",
169
+ "owl",
170
+ "finch",
171
+ "heron",
172
+ "crane",
173
+ "duck",
174
+ "swan",
175
+ "fox",
176
+ "wolf",
177
+ "bear",
178
+ "deer",
179
+ "moose",
180
+ "otter",
181
+ "beaver",
182
+ "lynx",
183
+ "hare",
184
+ "badger",
185
+ "butterfly",
186
+ "bee",
187
+ "ant",
188
+ "beetle",
189
+ "dragonfly",
190
+ "firefly",
191
+ "ladybug",
192
+ "moth",
193
+ "spider",
194
+ "worm",
195
+ "coral",
196
+ "kelp",
197
+ "shell",
198
+ "pebble",
199
+ "boulder",
200
+ "cobble",
201
+ "sand",
202
+ "wavelet",
203
+ "tide",
204
+ "current",
205
+ ]
206
+ adjective = random.choice(adjectives)
207
+ noun = random.choice(nouns)
208
+ number = random.randint(1, 99)
209
+ return f"{adjective}-{noun}-{number}"
210
+
211
+
212
+ def block_except_in_notebook():
213
+ in_notebook = bool(getattr(sys, "ps1", sys.flags.interactive))
214
+ if in_notebook:
215
+ return
216
+ try:
217
+ while True:
218
+ time.sleep(0.1)
219
+ except (KeyboardInterrupt, OSError):
220
+ print("Keyboard interruption in main thread... closing dashboard.")
221
+
222
+
223
+ def simplify_column_names(columns: list[str]) -> dict[str, str]:
224
+ """
225
+ Simplifies column names to first 10 alphanumeric or "/" characters with unique suffixes.
226
+
227
+ Args:
228
+ columns: List of original column names
229
+
230
+ Returns:
231
+ Dictionary mapping original column names to simplified names
232
+ """
233
+ simplified_names = {}
234
+ used_names = set()
235
+
236
+ for col in columns:
237
+ alphanumeric = re.sub(r"[^a-zA-Z0-9/]", "", col)
238
+ base_name = alphanumeric[:10] if alphanumeric else f"col_{len(used_names)}"
239
+
240
+ final_name = base_name
241
+ suffix = 1
242
+ while final_name in used_names:
243
+ final_name = f"{base_name}_{suffix}"
244
+ suffix += 1
245
+
246
+ simplified_names[col] = final_name
247
+ used_names.add(final_name)
248
+
249
+ return simplified_names
250
+
251
+
252
+ def print_dashboard_instructions(project: str) -> None:
253
+ """
254
+ Prints instructions for viewing the Trackio dashboard.
255
+
256
+ Args:
257
+ project: The name of the project to show dashboard for.
258
+ """
259
+ YELLOW = "\033[93m"
260
+ BOLD = "\033[1m"
261
+ RESET = "\033[0m"
262
+
263
+ print("* View dashboard by running in your terminal:")
264
+ print(f'{BOLD}{YELLOW}trackio show --project "{project}"{RESET}')
265
+ print(f'* or by running in Python: trackio.show(project="{project}")')
266
+
267
+
268
+ def preprocess_space_and_dataset_ids(
269
+ space_id: str | None, dataset_id: str | None
270
+ ) -> tuple[str | None, str | None]:
271
+ if space_id is not None and "/" not in space_id:
272
+ username = huggingface_hub.whoami()["name"]
273
+ space_id = f"{username}/{space_id}"
274
+ if dataset_id is not None and "/" not in dataset_id:
275
+ username = huggingface_hub.whoami()["name"]
276
+ dataset_id = f"{username}/{dataset_id}"
277
+ if space_id is not None and dataset_id is None:
278
+ dataset_id = f"{space_id}_dataset"
279
+ return space_id, dataset_id
280
+
281
+
282
+ def fibo():
283
+ """Generator for Fibonacci backoff: 1, 1, 2, 3, 5, 8, ..."""
284
+ a, b = 1, 1
285
+ while True:
286
+ yield a
287
+ a, b = b, a + b
288
+
289
+
290
+ COLOR_PALETTE = [
291
+ "#3B82F6",
292
+ "#EF4444",
293
+ "#10B981",
294
+ "#F59E0B",
295
+ "#8B5CF6",
296
+ "#EC4899",
297
+ "#06B6D4",
298
+ "#84CC16",
299
+ "#F97316",
300
+ "#6366F1",
301
+ ]
302
+
303
+
304
+ def get_color_mapping(runs: list[str], smoothing: bool) -> dict[str, str]:
305
+ """Generate color mapping for runs, with transparency for original data when smoothing is enabled."""
306
+ color_map = {}
307
+
308
+ for i, run in enumerate(runs):
309
+ base_color = COLOR_PALETTE[i % len(COLOR_PALETTE)]
310
+
311
+ if smoothing:
312
+ color_map[f"{run}_smoothed"] = base_color
313
+ color_map[f"{run}_original"] = base_color + "4D"
314
+ else:
315
+ color_map[run] = base_color
316
+
317
+ return color_map
318
+
319
+
320
+ def downsample(
321
+ df: pd.DataFrame,
322
+ x: str,
323
+ y: str,
324
+ color: str | None,
325
+ x_lim: tuple[float, float] | None = None,
326
+ ) -> pd.DataFrame:
327
+ if df.empty:
328
+ return df
329
+
330
+ columns_to_keep = [x, y]
331
+ if color is not None and color in df.columns:
332
+ columns_to_keep.append(color)
333
+ df = df[columns_to_keep].copy()
334
+
335
+ n_bins = 100
336
+
337
+ if color is not None and color in df.columns:
338
+ groups = df.groupby(color)
339
+ else:
340
+ groups = [(None, df)]
341
+
342
+ downsampled_indices = []
343
+
344
+ for _, group_df in groups:
345
+ if group_df.empty:
346
+ continue
347
+
348
+ group_df = group_df.sort_values(x)
349
+
350
+ if x_lim is not None:
351
+ x_min, x_max = x_lim
352
+ before_point = group_df[group_df[x] < x_min].tail(1)
353
+ after_point = group_df[group_df[x] > x_max].head(1)
354
+ group_df = group_df[(group_df[x] >= x_min) & (group_df[x] <= x_max)]
355
+ else:
356
+ before_point = after_point = None
357
+ x_min = group_df[x].min()
358
+ x_max = group_df[x].max()
359
+
360
+ if before_point is not None and not before_point.empty:
361
+ downsampled_indices.extend(before_point.index.tolist())
362
+ if after_point is not None and not after_point.empty:
363
+ downsampled_indices.extend(after_point.index.tolist())
364
+
365
+ if group_df.empty:
366
+ continue
367
+
368
+ if x_min == x_max:
369
+ min_y_idx = group_df[y].idxmin()
370
+ max_y_idx = group_df[y].idxmax()
371
+ if min_y_idx != max_y_idx:
372
+ downsampled_indices.extend([min_y_idx, max_y_idx])
373
+ else:
374
+ downsampled_indices.append(min_y_idx)
375
+ continue
376
+
377
+ if len(group_df) < 500:
378
+ downsampled_indices.extend(group_df.index.tolist())
379
+ continue
380
+
381
+ bins = np.linspace(x_min, x_max, n_bins + 1)
382
+ group_df["bin"] = pd.cut(
383
+ group_df[x], bins=bins, labels=False, include_lowest=True
384
+ )
385
+
386
+ for bin_idx in group_df["bin"].dropna().unique():
387
+ bin_data = group_df[group_df["bin"] == bin_idx]
388
+ if bin_data.empty:
389
+ continue
390
+
391
+ min_y_idx = bin_data[y].idxmin()
392
+ max_y_idx = bin_data[y].idxmax()
393
+
394
+ downsampled_indices.append(min_y_idx)
395
+ if min_y_idx != max_y_idx:
396
+ downsampled_indices.append(max_y_idx)
397
+
398
+ unique_indices = list(set(downsampled_indices))
399
+
400
+ downsampled_df = df.loc[unique_indices].copy()
401
+ downsampled_df = downsampled_df.sort_values(x).reset_index(drop=True)
402
+ downsampled_df = downsampled_df.drop(columns=["bin"], errors="ignore")
403
+
404
+ return downsampled_df
version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.1.0