Spaces:

abidlabs
/

histogram-test-208105

Sleeping

App Files Files

xet

Community

abidlabs HF Staff commited on 2 days ago

Commit

82b0cb3

verified ·

1 Parent(s): eea39aa

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
CHANGELOG.md +24 -0
__init__.py +337 -0
__pycache__/__init__.cpython-311.pyc +0 -0
__pycache__/__init__.cpython-312.pyc +0 -0
__pycache__/cli.cpython-311.pyc +0 -0
__pycache__/cli.cpython-312.pyc +0 -0
__pycache__/commit_scheduler.cpython-311.pyc +0 -0
__pycache__/commit_scheduler.cpython-312.pyc +0 -0
__pycache__/context_vars.cpython-311.pyc +0 -0
__pycache__/context_vars.cpython-312.pyc +0 -0
__pycache__/deploy.cpython-311.pyc +0 -0
__pycache__/deploy.cpython-312.pyc +0 -0
__pycache__/dummy_commit_scheduler.cpython-311.pyc +0 -0
__pycache__/dummy_commit_scheduler.cpython-312.pyc +0 -0
__pycache__/file_storage.cpython-311.pyc +0 -0
__pycache__/file_storage.cpython-312.pyc +0 -0
__pycache__/histogram.cpython-311.pyc +0 -0
__pycache__/histogram.cpython-312.pyc +0 -0
__pycache__/imports.cpython-311.pyc +0 -0
__pycache__/imports.cpython-312.pyc +0 -0
__pycache__/media.cpython-311.pyc +0 -0
__pycache__/media.cpython-312.pyc +0 -0
__pycache__/run.cpython-311.pyc +0 -0
__pycache__/run.cpython-312.pyc +0 -0
__pycache__/sqlite_storage.cpython-311.pyc +0 -0
__pycache__/sqlite_storage.cpython-312.pyc +0 -0
__pycache__/table.cpython-311.pyc +0 -0
__pycache__/table.cpython-312.pyc +0 -0
__pycache__/typehints.cpython-311.pyc +0 -0
__pycache__/typehints.cpython-312.pyc +0 -0
__pycache__/utils.cpython-311.pyc +0 -0
__pycache__/utils.cpython-312.pyc +0 -0
__pycache__/video_writer.cpython-311.pyc +0 -0
__pycache__/video_writer.cpython-312.pyc +0 -0
assets/trackio_logo_dark.png +0 -0
assets/trackio_logo_light.png +0 -0
assets/trackio_logo_old.png +3 -0
assets/trackio_logo_type_dark.png +0 -0
assets/trackio_logo_type_dark_transparent.png +0 -0
assets/trackio_logo_type_light.png +0 -0
assets/trackio_logo_type_light_transparent.png +0 -0
cli.py +37 -0
commit_scheduler.py +391 -0
context_vars.py +18 -0
deploy.py +258 -0
dummy_commit_scheduler.py +12 -0
file_storage.py +37 -0
histogram.py +68 -0
imports.py +302 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/trackio_logo_old.png filter=lfs diff=lfs merge=lfs -text

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,24 @@

+# trackio
+## 0.5.3
+### Features
+- [#300](https://github.com/gradio-app/trackio/pull/300) [`5e4cacf`](https://github.com/gradio-app/trackio/commit/5e4cacf2e7ce527b4ce60de3a5bc05d2c02c77fb) - Adds more environment variables to allow customization of Trackio dashboard.  Thanks @abidlabs!
+## 0.5.2
+### Features
+- [#293](https://github.com/gradio-app/trackio/pull/293) [`64afc28`](https://github.com/gradio-app/trackio/commit/64afc28d3ea1dfd821472dc6bf0b8ed35a9b74be) - Ensures that the TRACKIO_DIR environment variable is respected.  Thanks @abidlabs!
+- [#287](https://github.com/gradio-app/trackio/pull/287) [`cd3e929`](https://github.com/gradio-app/trackio/commit/cd3e9294320949e6b8b829239069a43d5d7ff4c1) - fix(sqlite): unify .sqlite extension, allow export when DBs exist, clean WAL sidecars on import.  Thanks @vaibhav-research!
+### Fixes
+- [#291](https://github.com/gradio-app/trackio/pull/291) [`3b5adc3`](https://github.com/gradio-app/trackio/commit/3b5adc3d1f452dbab7a714d235f4974782f93730) - Fix the wheel build.  Thanks @pngwn!
+## 0.5.1
+### Fixes
+- [#278](https://github.com/gradio-app/trackio/pull/278) [`314c054`](https://github.com/gradio-app/trackio/commit/314c05438007ddfea3383e06fd19143e27468e2d) - Fix row orientation of metrics plots.  Thanks @abidlabs!

__init__.py ADDED Viewed

	@@ -0,0 +1,337 @@

+import hashlib
+import json
+import logging
+import os
+import warnings
+import webbrowser
+from pathlib import Path
+from typing import Any
+from gradio.blocks import BUILT_IN_THEMES
+from gradio.themes import Default as DefaultTheme
+from gradio.themes import ThemeClass
+from gradio_client import Client
+from huggingface_hub import SpaceStorage
+from trackio import context_vars, deploy, utils
+from trackio.histogram import Histogram
+from trackio.imports import import_csv, import_tf_events
+from trackio.media import TrackioImage, TrackioVideo
+from trackio.run import Run
+from trackio.sqlite_storage import SQLiteStorage
+from trackio.table import Table
+from trackio.ui.main import demo
+from trackio.utils import TRACKIO_DIR, TRACKIO_LOGO_DIR
+logging.getLogger("httpx").setLevel(logging.WARNING)
+warnings.filterwarnings(
+    "ignore",
+    message="Empty session being created. Install gradio\\[oauth\\]",
+    category=UserWarning,
+    module="gradio.helpers",
+)
+__version__ = json.loads(Path(__file__).parent.joinpath("package.json").read_text())[
+    "version"
+]
+__all__ = [
+    "init",
+    "log",
+    "finish",
+    "show",
+    "import_csv",
+    "import_tf_events",
+    "Image",
+    "Video",
+    "Table",
+    "Histogram",
+]
+Image = TrackioImage
+Video = TrackioVideo
+config = {}
+DEFAULT_THEME = "default"
+def init(
+    project: str,
+    name: str | None = None,
+    group: str | None = None,
+    space_id: str | None = None,
+    space_storage: SpaceStorage | None = None,
+    dataset_id: str | None = None,
+    config: dict | None = None,
+    resume: str = "never",
+    settings: Any = None,
+    private: bool | None = None,
+    embed: bool = True,
+) -> Run:
+    """
+    Creates a new Trackio project and returns a [`Run`] object.
+    Args:
+        project (`str`):
+            The name of the project (can be an existing project to continue tracking or
+            a new project to start tracking from scratch).
+        name (`str`, *optional*):
+            The name of the run (if not provided, a default name will be generated).
+        group (`str`, *optional*):
+            The name of the group which this run belongs to in order to help organize
+            related runs together. You can toggle the entire group's visibilitiy in the
+            dashboard.
+        space_id (`str`, *optional*):
+            If provided, the project will be logged to a Hugging Face Space instead of
+            a local directory. Should be a complete Space name like
+            `"username/reponame"` or `"orgname/reponame"`, or just `"reponame"` in which
+            case the Space will be created in the currently-logged-in Hugging Face
+            user's namespace. If the Space does not exist, it will be created. If the
+            Space already exists, the project will be logged to it.
+        space_storage ([`~huggingface_hub.SpaceStorage`], *optional*):
+            Choice of persistent storage tier.
+        dataset_id (`str`, *optional*):
+            If a `space_id` is provided, a persistent Hugging Face Dataset will be
+            created and the metrics will be synced to it every 5 minutes. Specify a
+            Dataset with name like `"username/datasetname"` or `"orgname/datasetname"`,
+            or `"datasetname"` (uses currently-logged-in Hugging Face user's namespace),
+            or `None` (uses the same name as the Space but with the `"_dataset"`
+            suffix). If the Dataset does not exist, it will be created. If the Dataset
+            already exists, the project will be appended to it.
+        config (`dict`, *optional*):
+            A dictionary of configuration options. Provided for compatibility with
+            `wandb.init()`.
+        resume (`str`, *optional*, defaults to `"never"`):
+            Controls how to handle resuming a run. Can be one of:
+            - `"must"`: Must resume the run with the given name, raises error if run
+              doesn't exist
+            - `"allow"`: Resume the run if it exists, otherwise create a new run
+            - `"never"`: Never resume a run, always create a new one
+        private (`bool`, *optional*):
+            Whether to make the Space private. If None (default), the repo will be
+            public unless the organization's default is private. This value is ignored
+            if the repo already exists.
+        settings (`Any`, *optional*):
+            Not used. Provided for compatibility with `wandb.init()`.
+        embed (`bool`, *optional*, defaults to `True`):
+            If running inside a jupyter/Colab notebook, whether the dashboard should
+            automatically be embedded in the cell when trackio.init() is called.
+    Returns:
+        `Run`: A [`Run`] object that can be used to log metrics and finish the run.
+    """
+    if settings is not None:
+        warnings.warn(
+            "* Warning: settings is not used. Provided for compatibility with wandb.init(). Please create an issue at: https://github.com/gradio-app/trackio/issues if you need a specific feature implemented."
+        )
+    if space_id is None and dataset_id is not None:
+        raise ValueError("Must provide a `space_id` when `dataset_id` is provided.")
+    space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
+    url = context_vars.current_server.get()
+    share_url = context_vars.current_share_server.get()
+    if url is None:
+        if space_id is None:
+            _, url, share_url = demo.launch(
+                show_api=False,
+                inline=False,
+                quiet=True,
+                prevent_thread_lock=True,
+                show_error=True,
+                favicon_path=TRACKIO_LOGO_DIR / "trackio_logo_light.png",
+                allowed_paths=[TRACKIO_LOGO_DIR, TRACKIO_DIR],
+            )
+        else:
+            url = space_id
+            share_url = None
+        context_vars.current_server.set(url)
+        context_vars.current_share_server.set(share_url)
+    if (
+        context_vars.current_project.get() is None
+        or context_vars.current_project.get() != project
+    ):
+        print(f"* Trackio project initialized: {project}")
+        if dataset_id is not None:
+            os.environ["TRACKIO_DATASET_ID"] = dataset_id
+            print(
+                f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}"
+            )
+        if space_id is None:
+            print(f"* Trackio metrics logged to: {TRACKIO_DIR}")
+            if utils.is_in_notebook() and embed:
+                base_url = share_url + "/" if share_url else url
+                full_url = utils.get_full_url(
+                    base_url, project=project, write_token=demo.write_token
+                )
+                utils.embed_url_in_notebook(full_url)
+            else:
+                utils.print_dashboard_instructions(project)
+        else:
+            deploy.create_space_if_not_exists(
+                space_id, space_storage, dataset_id, private
+            )
+            user_name, space_name = space_id.split("/")
+            space_url = deploy.SPACE_HOST_URL.format(
+                user_name=user_name, space_name=space_name
+            )
+            print(f"* View dashboard by going to: {space_url}")
+            if utils.is_in_notebook() and embed:
+                utils.embed_url_in_notebook(space_url)
+    context_vars.current_project.set(project)
+    client = None
+    if not space_id:
+        client = Client(url, verbose=False)
+    if resume == "must":
+        if name is None:
+            raise ValueError("Must provide a run name when resume='must'")
+        if name not in SQLiteStorage.get_runs(project):
+            raise ValueError(f"Run '{name}' does not exist in project '{project}'")
+        resumed = True
+    elif resume == "allow":
+        resumed = name is not None and name in SQLiteStorage.get_runs(project)
+    elif resume == "never":
+        if name is not None and name in SQLiteStorage.get_runs(project):
+            warnings.warn(
+                f"* Warning: resume='never' but a run '{name}' already exists in "
+                f"project '{project}'. Generating a new name and instead. If you want "
+                "to resume this run, call init() with resume='must' or resume='allow'."
+            )
+            name = None
+        resumed = False
+    else:
+        raise ValueError("resume must be one of: 'must', 'allow', or 'never'")
+    run = Run(
+        url=url,
+        project=project,
+        client=client,
+        name=name,
+        group=group,
+        config=config,
+        space_id=space_id,
+    )
+    if resumed:
+        print(f"* Resumed existing run: {run.name}")
+    else:
+        print(f"* Created new run: {run.name}")
+    context_vars.current_run.set(run)
+    globals()["config"] = run.config
+    return run
+def log(metrics: dict, step: int | None = None) -> None:
+    """
+    Logs metrics to the current run.
+    Args:
+        metrics (`dict`):
+            A dictionary of metrics to log.
+        step (`int`, *optional*):
+            The step number. If not provided, the step will be incremented
+            automatically.
+    """
+    run = context_vars.current_run.get()
+    if run is None:
+        raise RuntimeError("Call trackio.init() before trackio.log().")
+    run.log(
+        metrics=metrics,
+        step=step,
+    )
+def finish():
+    """
+    Finishes the current run.
+    """
+    run = context_vars.current_run.get()
+    if run is None:
+        raise RuntimeError("Call trackio.init() before trackio.finish().")
+    run.finish()
+def show(
+    project: str | None = None,
+    theme: str | ThemeClass | None = None,
+    mcp_server: bool | None = None,
+):
+    """
+    Launches the Trackio dashboard.
+    Args:
+        project (`str`, *optional*):
+            The name of the project whose runs to show. If not provided, all projects
+            will be shown and the user can select one.
+        theme (`str` or `ThemeClass`, *optional*):
+            A Gradio Theme to use for the dashboard instead of the default Gradio theme,
+            can be a built-in theme (e.g. `'soft'`, `'citrus'`), a theme from the Hub
+            (e.g. `"gstaff/xkcd"`), or a custom Theme class. If not provided, the
+            `TRACKIO_THEME` environment variable will be used, or if that is not set, the
+            default Gradio theme will be used.
+        mcp_server (`bool`, *optional*):
+            If `True`, the Trackio dashboard will be set up as an MCP server and certain
+            functions will be added as MCP tools. If `None` (default behavior), then the
+            `GRADIO_MCP_SERVER` environment variable will be used to determine if the
+            MCP server should be enabled (which is `"True"` on Hugging Face Spaces).
+    """
+    theme = theme or os.environ.get("TRACKIO_THEME", DEFAULT_THEME)
+    if theme != DEFAULT_THEME:
+        # TODO: It's a little hacky to reproduce this theme-setting logic from Gradio Blocks,
+        # but in Gradio 6.0, the theme will be set in `launch()` instead, which means that we
+        # will be able to remove this code.
+        if isinstance(theme, str):
+            if theme.lower() in BUILT_IN_THEMES:
+                theme = BUILT_IN_THEMES[theme.lower()]
+            else:
+                try:
+                    theme = ThemeClass.from_hub(theme)
+                except Exception as e:
+                    warnings.warn(f"Cannot load {theme}. Caught Exception: {str(e)}")
+                    theme = DefaultTheme()
+        if not isinstance(theme, ThemeClass):
+            warnings.warn("Theme should be a class loaded from gradio.themes")
+            theme = DefaultTheme()
+        demo.theme: ThemeClass = theme
+        demo.theme_css = theme._get_theme_css()
+        demo.stylesheets = theme._stylesheets
+        theme_hasher = hashlib.sha256()
+        theme_hasher.update(demo.theme_css.encode("utf-8"))
+        demo.theme_hash = theme_hasher.hexdigest()
+    _mcp_server = (
+        mcp_server
+        if mcp_server is not None
+        else os.environ.get("GRADIO_MCP_SERVER", "False") == "True"
+    )
+    _, url, share_url = demo.launch(
+        show_api=_mcp_server,
+        quiet=True,
+        inline=False,
+        prevent_thread_lock=True,
+        favicon_path=TRACKIO_LOGO_DIR / "trackio_logo_light.png",
+        allowed_paths=[TRACKIO_LOGO_DIR, TRACKIO_DIR],
+        mcp_server=_mcp_server,
+    )
+    base_url = share_url + "/" if share_url else url
+    full_url = utils.get_full_url(
+        base_url, project=project, write_token=demo.write_token
+    )
+    if not utils.is_in_notebook():
+        print(f"* Trackio UI launched at: {full_url}")
+        webbrowser.open(full_url)
+        utils.block_main_thread_until_keyboard_interrupt()
+    else:
+        utils.embed_url_in_notebook(full_url)

__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (15.7 kB). View file

__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (14.7 kB). View file

__pycache__/cli.cpython-311.pyc ADDED Viewed

Binary file (1.87 kB). View file

__pycache__/cli.cpython-312.pyc ADDED Viewed

Binary file (1.72 kB). View file

__pycache__/commit_scheduler.cpython-311.pyc ADDED Viewed

Binary file (20.2 kB). View file

__pycache__/commit_scheduler.cpython-312.pyc ADDED Viewed

Binary file (18.8 kB). View file

__pycache__/context_vars.cpython-311.pyc ADDED Viewed

Binary file (1 kB). View file

__pycache__/context_vars.cpython-312.pyc ADDED Viewed

Binary file (921 Bytes). View file

__pycache__/deploy.cpython-311.pyc ADDED Viewed

Binary file (11.3 kB). View file

__pycache__/deploy.cpython-312.pyc ADDED Viewed

Binary file (10.3 kB). View file

__pycache__/dummy_commit_scheduler.cpython-311.pyc ADDED Viewed

Binary file (1.19 kB). View file

__pycache__/dummy_commit_scheduler.cpython-312.pyc ADDED Viewed

Binary file (1.01 kB). View file

__pycache__/file_storage.cpython-311.pyc ADDED Viewed

Binary file (1.88 kB). View file

__pycache__/file_storage.cpython-312.pyc ADDED Viewed

Binary file (1.63 kB). View file

__pycache__/histogram.cpython-311.pyc ADDED Viewed

Binary file (3.15 kB). View file

__pycache__/histogram.cpython-312.pyc ADDED Viewed

Binary file (2.97 kB). View file

__pycache__/imports.cpython-311.pyc ADDED Viewed

Binary file (14.3 kB). View file

__pycache__/imports.cpython-312.pyc ADDED Viewed

Binary file (13.2 kB). View file

__pycache__/media.cpython-311.pyc ADDED Viewed

Binary file (16 kB). View file

__pycache__/media.cpython-312.pyc ADDED Viewed

Binary file (15 kB). View file

__pycache__/run.cpython-311.pyc ADDED Viewed

Binary file (10.2 kB). View file

__pycache__/run.cpython-312.pyc ADDED Viewed

Binary file (9.35 kB). View file

__pycache__/sqlite_storage.cpython-311.pyc ADDED Viewed

Binary file (37.5 kB). View file

__pycache__/sqlite_storage.cpython-312.pyc ADDED Viewed

Binary file (31.7 kB). View file

__pycache__/table.cpython-311.pyc ADDED Viewed

Binary file (2.52 kB). View file

__pycache__/table.cpython-312.pyc ADDED Viewed

Binary file (2.33 kB). View file

__pycache__/typehints.cpython-311.pyc ADDED Viewed

Binary file (1.12 kB). View file

__pycache__/typehints.cpython-312.pyc ADDED Viewed

Binary file (908 Bytes). View file

__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (30.7 kB). View file

__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (26.9 kB). View file

__pycache__/video_writer.cpython-311.pyc ADDED Viewed

Binary file (5.72 kB). View file

__pycache__/video_writer.cpython-312.pyc ADDED Viewed

Binary file (5.33 kB). View file

assets/trackio_logo_dark.png ADDED Viewed

assets/trackio_logo_light.png ADDED Viewed

assets/trackio_logo_old.png ADDED Viewed

Git LFS Details

SHA256: 3922c4d1e465270ad4d8abb12023f3beed5d9f7f338528a4c0ac21dcf358a1c8
Pointer size: 131 Bytes
Size of remote file: 487 kB

assets/trackio_logo_type_dark.png ADDED Viewed

assets/trackio_logo_type_dark_transparent.png ADDED Viewed

assets/trackio_logo_type_light.png ADDED Viewed

assets/trackio_logo_type_light_transparent.png ADDED Viewed

cli.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import argparse
+from trackio import show
+def main():
+    parser = argparse.ArgumentParser(description="Trackio CLI")
+    subparsers = parser.add_subparsers(dest="command")
+    ui_parser = subparsers.add_parser(
+        "show", help="Show the Trackio dashboard UI for a project"
+    )
+    ui_parser.add_argument(
+        "--project", required=False, help="Project name to show in the dashboard"
+    )
+    ui_parser.add_argument(
+        "--theme",
+        required=False,
+        default="citrus",
+        help="A Gradio Theme to use for the dashboard instead of the default, can be a built-in theme (e.g. 'soft', 'citrus'), or a theme from the Hub (e.g. 'gstaff/xkcd').",
+    )
+    ui_parser.add_argument(
+        "--mcp-server",
+        action="store_true",
+        help="Enable MCP server functionality. The Trackio dashboard will be set up as an MCP server and certain functions will be exposed as MCP tools.",
+    )
+    args = parser.parse_args()
+    if args.command == "show":
+        show(args.project, args.theme, args.mcp_server)
+    else:
+        parser.print_help()
+if __name__ == "__main__":
+    main()

commit_scheduler.py ADDED Viewed

	@@ -0,0 +1,391 @@

+# Originally copied from https://github.com/huggingface/huggingface_hub/blob/d0a948fc2a32ed6e557042a95ef3e4af97ec4a7c/src/huggingface_hub/_commit_scheduler.py
+import atexit
+import logging
+import os
+import time
+from concurrent.futures import Future
+from dataclasses import dataclass
+from io import SEEK_END, SEEK_SET, BytesIO
+from pathlib import Path
+from threading import Lock, Thread
+from typing import Callable, Dict, List, Union
+from huggingface_hub.hf_api import (
+    DEFAULT_IGNORE_PATTERNS,
+    CommitInfo,
+    CommitOperationAdd,
+    HfApi,
+)
+from huggingface_hub.utils import filter_repo_objects
+logger = logging.getLogger(__name__)
+@dataclass(frozen=True)
+class _FileToUpload:
+    """Temporary dataclass to store info about files to upload. Not meant to be used directly."""
+    local_path: Path
+    path_in_repo: str
+    size_limit: int
+    last_modified: float
+class CommitScheduler:
+    """
+    Scheduler to upload a local folder to the Hub at regular intervals (e.g. push to hub every 5 minutes).
+    The recommended way to use the scheduler is to use it as a context manager. This ensures that the scheduler is
+    properly stopped and the last commit is triggered when the script ends. The scheduler can also be stopped manually
+    with the `stop` method. Checkout the [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#scheduled-uploads)
+    to learn more about how to use it.
+    Args:
+        repo_id (`str`):
+            The id of the repo to commit to.
+        folder_path (`str` or `Path`):
+            Path to the local folder to upload regularly.
+        every (`int` or `float`, *optional*):
+            The number of minutes between each commit. Defaults to 5 minutes.
+        path_in_repo (`str`, *optional*):
+            Relative path of the directory in the repo, for example: `"checkpoints/"`. Defaults to the root folder
+            of the repository.
+        repo_type (`str`, *optional*):
+            The type of the repo to commit to. Defaults to `model`.
+        revision (`str`, *optional*):
+            The revision of the repo to commit to. Defaults to `main`.
+        private (`bool`, *optional*):
+            Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
+        token (`str`, *optional*):
+            The token to use to commit to the repo. Defaults to the token saved on the machine.
+        allow_patterns (`List[str]` or `str`, *optional*):
+            If provided, only files matching at least one pattern are uploaded.
+        ignore_patterns (`List[str]` or `str`, *optional*):
+            If provided, files matching any of the patterns are not uploaded.
+        squash_history (`bool`, *optional*):
+            Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
+            useful to avoid degraded performances on the repo when it grows too large.
+        hf_api (`HfApi`, *optional*):
+            The [`HfApi`] client to use to commit to the Hub. Can be set with custom settings (user agent, token,...).
+        on_before_commit (`Callable[[], None]`, *optional*):
+            If specified, a function that will be called before the CommitScheduler lists files to create a commit.
+    Example:
+    ```py
+    >>> from pathlib import Path
+    >>> from huggingface_hub import CommitScheduler
+    # Scheduler uploads every 10 minutes
+    >>> csv_path = Path("watched_folder/data.csv")
+    >>> CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path=csv_path.parent, every=10)
+    >>> with csv_path.open("a") as f:
+    ...     f.write("first line")
+    # Some time later (...)
+    >>> with csv_path.open("a") as f:
+    ...     f.write("second line")
+    ```
+    Example using a context manager:
+    ```py
+    >>> from pathlib import Path
+    >>> from huggingface_hub import CommitScheduler
+    >>> with CommitScheduler(repo_id="test_scheduler", repo_type="dataset", folder_path="watched_folder", every=10) as scheduler:
+    ...     csv_path = Path("watched_folder/data.csv")
+    ...     with csv_path.open("a") as f:
+    ...         f.write("first line")
+    ...     (...)
+    ...     with csv_path.open("a") as f:
+    ...         f.write("second line")
+    # Scheduler is now stopped and last commit have been triggered
+    ```
+    """
+    def __init__(
+        self,
+        *,
+        repo_id: str,
+        folder_path: Union[str, Path],
+        every: Union[int, float] = 5,
+        path_in_repo: str | None = None,
+        repo_type: str | None = None,
+        revision: str | None = None,
+        private: bool | None = None,
+        token: str | None = None,
+        allow_patterns: list[str] | str | None = None,
+        ignore_patterns: list[str] | str | None = None,
+        squash_history: bool = False,
+        hf_api: HfApi | None = None,
+        on_before_commit: Callable[[], None] | None = None,
+    ) -> None:
+        self.api = hf_api or HfApi(token=token)
+        self.on_before_commit = on_before_commit
+        # Folder
+        self.folder_path = Path(folder_path).expanduser().resolve()
+        self.path_in_repo = path_in_repo or ""
+        self.allow_patterns = allow_patterns
+        if ignore_patterns is None:
+            ignore_patterns = []
+        elif isinstance(ignore_patterns, str):
+            ignore_patterns = [ignore_patterns]
+        self.ignore_patterns = ignore_patterns + DEFAULT_IGNORE_PATTERNS
+        if self.folder_path.is_file():
+            raise ValueError(
+                f"'folder_path' must be a directory, not a file: '{self.folder_path}'."
+            )
+        self.folder_path.mkdir(parents=True, exist_ok=True)
+        # Repository
+        repo_url = self.api.create_repo(
+            repo_id=repo_id, private=private, repo_type=repo_type, exist_ok=True
+        )
+        self.repo_id = repo_url.repo_id
+        self.repo_type = repo_type
+        self.revision = revision
+        self.token = token
+        self.last_uploaded: Dict[Path, float] = {}
+        self.last_push_time: float | None = None
+        if not every > 0:
+            raise ValueError(f"'every' must be a positive integer, not '{every}'.")
+        self.lock = Lock()
+        self.every = every
+        self.squash_history = squash_history
+        logger.info(
+            f"Scheduled job to push '{self.folder_path}' to '{self.repo_id}' every {self.every} minutes."
+        )
+        self._scheduler_thread = Thread(target=self._run_scheduler, daemon=True)
+        self._scheduler_thread.start()
+        atexit.register(self._push_to_hub)
+        self.__stopped = False
+    def stop(self) -> None:
+        """Stop the scheduler.
+        A stopped scheduler cannot be restarted. Mostly for tests purposes.
+        """
+        self.__stopped = True
+    def __enter__(self) -> "CommitScheduler":
+        return self
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        # Upload last changes before exiting
+        self.trigger().result()
+        self.stop()
+        return
+    def _run_scheduler(self) -> None:
+        """Dumb thread waiting between each scheduled push to Hub."""
+        while True:
+            self.last_future = self.trigger()
+            time.sleep(self.every * 60)
+            if self.__stopped:
+                break
+    def trigger(self) -> Future:
+        """Trigger a `push_to_hub` and return a future.
+        This method is automatically called every `every` minutes. You can also call it manually to trigger a commit
+        immediately, without waiting for the next scheduled commit.
+        """
+        return self.api.run_as_future(self._push_to_hub)
+    def _push_to_hub(self) -> CommitInfo | None:
+        if self.__stopped:  # If stopped, already scheduled commits are ignored
+            return None
+        logger.info("(Background) scheduled commit triggered.")
+        try:
+            value = self.push_to_hub()
+            if self.squash_history:
+                logger.info("(Background) squashing repo history.")
+                self.api.super_squash_history(
+                    repo_id=self.repo_id, repo_type=self.repo_type, branch=self.revision
+                )
+            return value
+        except Exception as e:
+            logger.error(
+                f"Error while pushing to Hub: {e}"
+            )  # Depending on the setup, error might be silenced
+            raise
+    def push_to_hub(self) -> CommitInfo | None:
+        """
+        Push folder to the Hub and return the commit info.
+        <Tip warning={true}>
+        This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
+        queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
+        issues.
+        </Tip>
+        The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
+        uploads only changed files. If no changes are found, the method returns without committing anything. If you want
+        to change this behavior, you can inherit from [`CommitScheduler`] and override this method. This can be useful
+        for example to compress data together in a single file before committing. For more details and examples, check
+        out our [integration guide](https://huggingface.co/docs/huggingface_hub/main/en/guides/upload#scheduled-uploads).
+        """
+        # Check files to upload (with lock)
+        with self.lock:
+            if self.on_before_commit is not None:
+                self.on_before_commit()
+            logger.debug("Listing files to upload for scheduled commit.")
+            # List files from folder (taken from `_prepare_upload_folder_additions`)
+            relpath_to_abspath = {
+                path.relative_to(self.folder_path).as_posix(): path
+                for path in sorted(
+                    self.folder_path.glob("**/*")
+                )  # sorted to be deterministic
+                if path.is_file()
+            }
+            prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
+            # Filter with pattern + filter out unchanged files + retrieve current file size
+            files_to_upload: List[_FileToUpload] = []
+            for relpath in filter_repo_objects(
+                relpath_to_abspath.keys(),
+                allow_patterns=self.allow_patterns,
+                ignore_patterns=self.ignore_patterns,
+            ):
+                local_path = relpath_to_abspath[relpath]
+                stat = local_path.stat()
+                if (
+                    self.last_uploaded.get(local_path) is None
+                    or self.last_uploaded[local_path] != stat.st_mtime
+                ):
+                    files_to_upload.append(
+                        _FileToUpload(
+                            local_path=local_path,
+                            path_in_repo=prefix + relpath,
+                            size_limit=stat.st_size,
+                            last_modified=stat.st_mtime,
+                        )
+                    )
+        # Return if nothing to upload
+        if len(files_to_upload) == 0:
+            logger.debug("Dropping schedule commit: no changed file to upload.")
+            return None
+        # Convert `_FileToUpload` as `CommitOperationAdd` (=> compute file shas + limit to file size)
+        logger.debug("Removing unchanged files since previous scheduled commit.")
+        add_operations = [
+            CommitOperationAdd(
+                # TODO: Cap the file to its current size, even if the user append data to it while a scheduled commit is happening
+                # (requires an upstream fix for XET-535: `hf_xet` should support `BinaryIO` for upload)
+                path_or_fileobj=file_to_upload.local_path,
+                path_in_repo=file_to_upload.path_in_repo,
+            )
+            for file_to_upload in files_to_upload
+        ]
+        # Upload files (append mode expected - no need for lock)
+        logger.debug("Uploading files for scheduled commit.")
+        commit_info = self.api.create_commit(
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+            operations=add_operations,
+            commit_message="Scheduled Commit",
+            revision=self.revision,
+        )
+        for file in files_to_upload:
+            self.last_uploaded[file.local_path] = file.last_modified
+        self.last_push_time = time.time()
+        return commit_info
+class PartialFileIO(BytesIO):
+    """A file-like object that reads only the first part of a file.
+    Useful to upload a file to the Hub when the user might still be appending data to it. Only the first part of the
+    file is uploaded (i.e. the part that was available when the filesystem was first scanned).
+    In practice, only used internally by the CommitScheduler to regularly push a folder to the Hub with minimal
+    disturbance for the user. The object is passed to `CommitOperationAdd`.
+    Only supports `read`, `tell` and `seek` methods.
+    Args:
+        file_path (`str` or `Path`):
+            Path to the file to read.
+        size_limit (`int`):
+            The maximum number of bytes to read from the file. If the file is larger than this, only the first part
+            will be read (and uploaded).
+    """
+    def __init__(self, file_path: Union[str, Path], size_limit: int) -> None:
+        self._file_path = Path(file_path)
+        self._file = self._file_path.open("rb")
+        self._size_limit = min(size_limit, os.fstat(self._file.fileno()).st_size)
+    def __del__(self) -> None:
+        self._file.close()
+        return super().__del__()
+    def __repr__(self) -> str:
+        return (
+            f"<PartialFileIO file_path={self._file_path} size_limit={self._size_limit}>"
+        )
+    def __len__(self) -> int:
+        return self._size_limit
+    def __getattribute__(self, name: str):
+        if name.startswith("_") or name in (
+            "read",
+            "tell",
+            "seek",
+        ):  # only 3 public methods supported
+            return super().__getattribute__(name)
+        raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
+    def tell(self) -> int:
+        """Return the current file position."""
+        return self._file.tell()
+    def seek(self, __offset: int, __whence: int = SEEK_SET) -> int:
+        """Change the stream position to the given offset.
+        Behavior is the same as a regular file, except that the position is capped to the size limit.
+        """
+        if __whence == SEEK_END:
+            # SEEK_END => set from the truncated end
+            __offset = len(self) + __offset
+            __whence = SEEK_SET
+        pos = self._file.seek(__offset, __whence)
+        if pos > self._size_limit:
+            return self._file.seek(self._size_limit)
+        return pos
+    def read(self, __size: int | None = -1) -> bytes:
+        """Read at most `__size` bytes from the file.
+        Behavior is the same as a regular file, except that it is capped to the size limit.
+        """
+        current = self._file.tell()
+        if __size is None or __size < 0:
+            # Read until file limit
+            truncated_size = self._size_limit - current
+        else:
+            # Read until file limit or __size
+            truncated_size = min(__size, self._size_limit - current)
+        return self._file.read(truncated_size)

context_vars.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import contextvars
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from trackio.run import Run
+current_run: contextvars.ContextVar["Run | None"] = contextvars.ContextVar(
+    "current_run", default=None
+)
+current_project: contextvars.ContextVar[str | None] = contextvars.ContextVar(
+    "current_project", default=None
+)
+current_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
+    "current_server", default=None
+)
+current_share_server: contextvars.ContextVar[str | None] = contextvars.ContextVar(
+    "current_share_server", default=None
+)

deploy.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import importlib.metadata
+import io
+import os
+import time
+from importlib.resources import files
+from pathlib import Path
+import gradio
+import huggingface_hub
+from gradio_client import Client, handle_file
+from httpx import ReadTimeout
+from huggingface_hub.errors import RepositoryNotFoundError
+from requests import HTTPError
+import trackio
+from trackio.sqlite_storage import SQLiteStorage
+SPACE_HOST_URL = "https://{user_name}-{space_name}.hf.space/"
+SPACE_URL = "https://huggingface.co/spaces/{space_id}"
+def _is_trackio_installed_from_source() -> bool:
+    """Check if trackio is installed from source/editable install vs PyPI."""
+    try:
+        trackio_file = trackio.__file__
+        if "site-packages" not in trackio_file:
+            return True
+        dist = importlib.metadata.distribution("trackio")
+        if dist.files:
+            files = list(dist.files)
+            has_pth = any(".pth" in str(f) for f in files)
+            if has_pth:
+                return True
+        return False
+    except (
+        AttributeError,
+        importlib.metadata.PackageNotFoundError,
+        importlib.metadata.MetadataError,
+        ValueError,
+        TypeError,
+    ):
+        return True
+def deploy_as_space(
+    space_id: str,
+    space_storage: huggingface_hub.SpaceStorage | None = None,
+    dataset_id: str | None = None,
+    private: bool | None = None,
+):
+    if (
+        os.getenv("SYSTEM") == "spaces"
+    ):  # in case a repo with this function is uploaded to spaces
+        return
+    trackio_path = files("trackio")
+    hf_api = huggingface_hub.HfApi()
+    try:
+        huggingface_hub.create_repo(
+            space_id,
+            private=private,
+            space_sdk="gradio",
+            space_storage=space_storage,
+            repo_type="space",
+            exist_ok=True,
+        )
+    except HTTPError as e:
+        if e.response.status_code in [401, 403]:  # unauthorized or forbidden
+            print("Need 'write' access token to create a Spaces repo.")
+            huggingface_hub.login(add_to_git_credential=False)
+            huggingface_hub.create_repo(
+                space_id,
+                private=private,
+                space_sdk="gradio",
+                space_storage=space_storage,
+                repo_type="space",
+                exist_ok=True,
+            )
+        else:
+            raise ValueError(f"Failed to create Space: {e}")
+    with open(Path(trackio_path, "README.md"), "r") as f:
+        readme_content = f.read()
+        readme_content = readme_content.replace("{GRADIO_VERSION}", gradio.__version__)
+        readme_buffer = io.BytesIO(readme_content.encode("utf-8"))
+        hf_api.upload_file(
+            path_or_fileobj=readme_buffer,
+            path_in_repo="README.md",
+            repo_id=space_id,
+            repo_type="space",
+        )
+    # We can assume pandas, gradio, and huggingface-hub are already installed in a Gradio Space.
+    # Make sure necessary dependencies are installed by creating a requirements.txt.
+    is_source_install = _is_trackio_installed_from_source()
+    if is_source_install:
+        requirements_content = """pyarrow>=21.0
+plotly>=6.0.0,<7.0.0"""
+    else:
+        requirements_content = f"""pyarrow>=21.0
+trackio=={trackio.__version__}
+plotly>=6.0.0,<7.0.0"""
+    requirements_buffer = io.BytesIO(requirements_content.encode("utf-8"))
+    hf_api.upload_file(
+        path_or_fileobj=requirements_buffer,
+        path_in_repo="requirements.txt",
+        repo_id=space_id,
+        repo_type="space",
+    )
+    huggingface_hub.utils.disable_progress_bars()
+    if is_source_install:
+        hf_api.upload_folder(
+            repo_id=space_id,
+            repo_type="space",
+            folder_path=trackio_path,
+            ignore_patterns=["README.md"],
+        )
+    else:
+        app_file_content = """import trackio
+trackio.show()"""
+        app_file_buffer = io.BytesIO(app_file_content.encode("utf-8"))
+        hf_api.upload_file(
+            path_or_fileobj=app_file_buffer,
+            path_in_repo="ui/main.py",
+            repo_id=space_id,
+            repo_type="space",
+        )
+    if hf_token := huggingface_hub.utils.get_token():
+        huggingface_hub.add_space_secret(space_id, "HF_TOKEN", hf_token)
+    if dataset_id is not None:
+        huggingface_hub.add_space_variable(space_id, "TRACKIO_DATASET_ID", dataset_id)
+    if logo_light_url := os.environ.get("TRACKIO_LOGO_LIGHT_URL"):
+        huggingface_hub.add_space_variable(
+            space_id, "TRACKIO_LOGO_LIGHT_URL", logo_light_url
+        )
+    if logo_dark_url := os.environ.get("TRACKIO_LOGO_DARK_URL"):
+        huggingface_hub.add_space_variable(
+            space_id, "TRACKIO_LOGO_DARK_URL", logo_dark_url
+        )
+    if plot_order := os.environ.get("TRACKIO_PLOT_ORDER"):
+        huggingface_hub.add_space_variable(space_id, "TRACKIO_PLOT_ORDER", plot_order)
+    if theme := os.environ.get("TRACKIO_THEME"):
+        huggingface_hub.add_space_variable(space_id, "TRACKIO_THEME", theme)
+def create_space_if_not_exists(
+    space_id: str,
+    space_storage: huggingface_hub.SpaceStorage | None = None,
+    dataset_id: str | None = None,
+    private: bool | None = None,
+) -> None:
+    """
+    Creates a new Hugging Face Space if it does not exist. If a dataset_id is provided, it will be added as a space variable.
+    Args:
+        space_id: The ID of the Space to create.
+        dataset_id: The ID of the Dataset to add to the Space.
+        private: Whether to make the Space private. If None (default), the repo will be
+          public unless the organization's default is private. This value is ignored if
+          the repo already exists.
+    """
+    if "/" not in space_id:
+        raise ValueError(
+            f"Invalid space ID: {space_id}. Must be in the format: username/reponame or orgname/reponame."
+        )
+    if dataset_id is not None and "/" not in dataset_id:
+        raise ValueError(
+            f"Invalid dataset ID: {dataset_id}. Must be in the format: username/datasetname or orgname/datasetname."
+        )
+    try:
+        huggingface_hub.repo_info(space_id, repo_type="space")
+        print(f"* Found existing space: {SPACE_URL.format(space_id=space_id)}")
+        if dataset_id is not None:
+            huggingface_hub.add_space_variable(
+                space_id, "TRACKIO_DATASET_ID", dataset_id
+            )
+        if logo_light_url := os.environ.get("TRACKIO_LOGO_LIGHT_URL"):
+            huggingface_hub.add_space_variable(
+                space_id, "TRACKIO_LOGO_LIGHT_URL", logo_light_url
+            )
+        if logo_dark_url := os.environ.get("TRACKIO_LOGO_DARK_URL"):
+            huggingface_hub.add_space_variable(
+                space_id, "TRACKIO_LOGO_DARK_URL", logo_dark_url
+            )
+        if plot_order := os.environ.get("TRACKIO_PLOT_ORDER"):
+            huggingface_hub.add_space_variable(
+                space_id, "TRACKIO_PLOT_ORDER", plot_order
+            )
+        if theme := os.environ.get("TRACKIO_THEME"):
+            huggingface_hub.add_space_variable(space_id, "TRACKIO_THEME", theme)
+        return
+    except RepositoryNotFoundError:
+        pass
+    except HTTPError as e:
+        if e.response.status_code in [401, 403]:  # unauthorized or forbidden
+            print("Need 'write' access token to create a Spaces repo.")
+            huggingface_hub.login(add_to_git_credential=False)
+            huggingface_hub.add_space_variable(
+                space_id, "TRACKIO_DATASET_ID", dataset_id
+            )
+        else:
+            raise ValueError(f"Failed to create Space: {e}")
+    print(f"* Creating new space: {SPACE_URL.format(space_id=space_id)}")
+    deploy_as_space(space_id, space_storage, dataset_id, private)
+def wait_until_space_exists(
+    space_id: str,
+) -> None:
+    """
+    Blocks the current thread until the space exists.
+    May raise a TimeoutError if this takes quite a while.
+    Args:
+        space_id: The ID of the Space to wait for.
+    """
+    delay = 1
+    for _ in range(10):
+        try:
+            Client(space_id, verbose=False)
+            return
+        except (ReadTimeout, ValueError):
+            time.sleep(delay)
+            delay = min(delay * 2, 30)
+    raise TimeoutError("Waiting for space to exist took longer than expected")
+def upload_db_to_space(project: str, space_id: str) -> None:
+    """
+    Uploads the database of a local Trackio project to a Hugging Face Space.
+    Args:
+        project: The name of the project to upload.
+        space_id: The ID of the Space to upload to.
+    """
+    db_path = SQLiteStorage.get_project_db_path(project)
+    client = Client(space_id, verbose=False)
+    client.predict(
+        api_name="/upload_db_to_space",
+        project=project,
+        uploaded_db=handle_file(db_path),
+        hf_token=huggingface_hub.utils.get_token(),
+    )

dummy_commit_scheduler.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# A dummy object to fit the interface of huggingface_hub's CommitScheduler
+class DummyCommitSchedulerLock:
+    def __enter__(self):
+        return None
+    def __exit__(self, exception_type, exception_value, exception_traceback):
+        pass
+class DummyCommitScheduler:
+    def __init__(self):
+        self.lock = DummyCommitSchedulerLock()

file_storage.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from pathlib import Path
+try:  # absolute imports when installed
+    from trackio.utils import MEDIA_DIR
+except ImportError:  # relative imports for local execution on Spaces
+    from utils import MEDIA_DIR
+class FileStorage:
+    @staticmethod
+    def get_project_media_path(
+        project: str,
+        run: str | None = None,
+        step: int | None = None,
+        filename: str | None = None,
+    ) -> Path:
+        if filename is not None and step is None:
+            raise ValueError("filename requires step")
+        if step is not None and run is None:
+            raise ValueError("step requires run")
+        path = MEDIA_DIR / project
+        if run:
+            path /= run
+        if step is not None:
+            path /= str(step)
+        if filename:
+            path /= filename
+        return path
+    @staticmethod
+    def init_project_media_path(
+        project: str, run: str | None = None, step: int | None = None
+    ) -> Path:
+        path = FileStorage.get_project_media_path(project, run, step)
+        path.mkdir(parents=True, exist_ok=True)
+        return path

histogram.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from typing import Any
+import numpy as np
+class Histogram:
+    """
+    Histogram data type for Trackio, compatible with wandb.Histogram.
+    Example:
+        ```python
+        import trackio
+        import numpy as np
+        # Create histogram from sequence
+        data = np.random.randn(1000)
+        trackio.log({"distribution": trackio.Histogram(data)})
+        # Create histogram from numpy histogram
+        hist, bins = np.histogram(data, bins=30)
+        trackio.log({"distribution": trackio.Histogram(np_histogram=(hist, bins))})
+        # Specify custom number of bins
+        trackio.log({"distribution": trackio.Histogram(data, num_bins=50)})
+        ```
+    Args:
+        sequence: Optional sequence of values to create histogram from
+        np_histogram: Optional pre-computed numpy histogram (hist, bins) tuple
+        num_bins: Number of bins for the histogram (default 64, max 512)
+    """
+    TYPE = "trackio.histogram"
+    def __init__(
+        self,
+        sequence: Any = None,
+        np_histogram: tuple | None = None,
+        num_bins: int = 64,
+    ):
+        if sequence is None and np_histogram is None:
+            raise ValueError("Must provide either sequence or np_histogram")
+        if sequence is not None and np_histogram is not None:
+            raise ValueError("Cannot provide both sequence and np_histogram")
+        num_bins = min(num_bins, 512)
+        if np_histogram is not None:
+            self.histogram, self.bins = np_histogram
+            self.histogram = np.asarray(self.histogram)
+            self.bins = np.asarray(self.bins)
+        else:
+            data = np.asarray(sequence).flatten()
+            data = data[np.isfinite(data)]
+            if len(data) == 0:
+                self.histogram = np.array([])
+                self.bins = np.array([])
+            else:
+                self.histogram, self.bins = np.histogram(data, bins=num_bins)
+    def _to_dict(self) -> dict:
+        """Convert histogram to dictionary for storage."""
+        return {
+            "_type": self.TYPE,
+            "bins": self.bins.tolist(),
+            "values": self.histogram.tolist(),
+        }

imports.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import os
+from pathlib import Path
+import pandas as pd
+from trackio import deploy, utils
+from trackio.sqlite_storage import SQLiteStorage
+def import_csv(
+    csv_path: str | Path,
+    project: str,
+    name: str | None = None,
+    space_id: str | None = None,
+    dataset_id: str | None = None,
+    private: bool | None = None,
+) -> None:
+    """
+    Imports a CSV file into a Trackio project. The CSV file must contain a `"step"`
+    column, may optionally contain a `"timestamp"` column, and any other columns will be
+    treated as metrics. It should also include a header row with the column names.
+    TODO: call init() and return a Run object so that the user can continue to log metrics to it.
+    Args:
+        csv_path (`str` or `Path`):
+            The str or Path to the CSV file to import.
+        project (`str`):
+            The name of the project to import the CSV file into. Must not be an existing
+            project.
+        name (`str`, *optional*):
+            The name of the Run to import the CSV file into. If not provided, a default
+            name will be generated.
+        name (`str`, *optional*):
+            The name of the run (if not provided, a default name will be generated).
+        space_id (`str`, *optional*):
+            If provided, the project will be logged to a Hugging Face Space instead of a
+            local directory. Should be a complete Space name like `"username/reponame"`
+            or `"orgname/reponame"`, or just `"reponame"` in which case the Space will
+            be created in the currently-logged-in Hugging Face user's namespace. If the
+            Space does not exist, it will be created. If the Space already exists, the
+            project will be logged to it.
+        dataset_id (`str`, *optional*):
+            If provided, a persistent Hugging Face Dataset will be created and the
+            metrics will be synced to it every 5 minutes. Should be a complete Dataset
+            name like `"username/datasetname"` or `"orgname/datasetname"`, or just
+            `"datasetname"` in which case the Dataset will be created in the
+            currently-logged-in Hugging Face user's namespace. If the Dataset does not
+            exist, it will be created. If the Dataset already exists, the project will
+            be appended to it. If not provided, the metrics will be logged to a local
+            SQLite database, unless a `space_id` is provided, in which case a Dataset
+            will be automatically created with the same name as the Space but with the
+            `"_dataset"` suffix.
+        private (`bool`, *optional*):
+            Whether to make the Space private. If None (default), the repo will be
+            public unless the organization's default is private. This value is ignored
+            if the repo already exists.
+    """
+    if SQLiteStorage.get_runs(project):
+        raise ValueError(
+            f"Project '{project}' already exists. Cannot import CSV into existing project."
+        )
+    csv_path = Path(csv_path)
+    if not csv_path.exists():
+        raise FileNotFoundError(f"CSV file not found: {csv_path}")
+    df = pd.read_csv(csv_path)
+    if df.empty:
+        raise ValueError("CSV file is empty")
+    column_mapping = utils.simplify_column_names(df.columns.tolist())
+    df = df.rename(columns=column_mapping)
+    step_column = None
+    for col in df.columns:
+        if col.lower() == "step":
+            step_column = col
+            break
+    if step_column is None:
+        raise ValueError("CSV file must contain a 'step' or 'Step' column")
+    if name is None:
+        name = csv_path.stem
+    metrics_list = []
+    steps = []
+    timestamps = []
+    numeric_columns = []
+    for column in df.columns:
+        if column == step_column:
+            continue
+        if column == "timestamp":
+            continue
+        try:
+            pd.to_numeric(df[column], errors="raise")
+            numeric_columns.append(column)
+        except (ValueError, TypeError):
+            continue
+    for _, row in df.iterrows():
+        metrics = {}
+        for column in numeric_columns:
+            value = row[column]
+            if bool(pd.notna(value)):
+                metrics[column] = float(value)
+        if metrics:
+            metrics_list.append(metrics)
+            steps.append(int(row[step_column]))
+            if "timestamp" in df.columns and bool(pd.notna(row["timestamp"])):
+                timestamps.append(str(row["timestamp"]))
+            else:
+                timestamps.append("")
+    if metrics_list:
+        SQLiteStorage.bulk_log(
+            project=project,
+            run=name,
+            metrics_list=metrics_list,
+            steps=steps,
+            timestamps=timestamps,
+        )
+    print(
+        f"* Imported {len(metrics_list)} rows from {csv_path} into project '{project}' as run '{name}'"
+    )
+    print(f"* Metrics found: {', '.join(metrics_list[0].keys())}")
+    space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
+    if dataset_id is not None:
+        os.environ["TRACKIO_DATASET_ID"] = dataset_id
+        print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
+    if space_id is None:
+        utils.print_dashboard_instructions(project)
+    else:
+        deploy.create_space_if_not_exists(
+            space_id=space_id, dataset_id=dataset_id, private=private
+        )
+        deploy.wait_until_space_exists(space_id=space_id)
+        deploy.upload_db_to_space(project=project, space_id=space_id)
+        print(
+            f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
+        )
+def import_tf_events(
+    log_dir: str | Path,
+    project: str,
+    name: str | None = None,
+    space_id: str | None = None,
+    dataset_id: str | None = None,
+    private: bool | None = None,
+) -> None:
+    """
+    Imports TensorFlow Events files from a directory into a Trackio project. Each
+    subdirectory in the log directory will be imported as a separate run.
+    Args:
+        log_dir (`str` or `Path`):
+            The str or Path to the directory containing TensorFlow Events files.
+        project (`str`):
+            The name of the project to import the TensorFlow Events files into. Must not
+            be an existing project.
+        name (`str`, *optional*):
+            The name prefix for runs (if not provided, will use directory names). Each
+            subdirectory will create a separate run.
+        space_id (`str`, *optional*):
+            If provided, the project will be logged to a Hugging Face Space instead of a
+            local directory. Should be a complete Space name like `"username/reponame"`
+            or `"orgname/reponame"`, or just `"reponame"` in which case the Space will
+            be created in the currently-logged-in Hugging Face user's namespace. If the
+            Space does not exist, it will be created. If the Space already exists, the
+            project will be logged to it.
+        dataset_id (`str`, *optional*):
+            If provided, a persistent Hugging Face Dataset will be created and the
+            metrics will be synced to it every 5 minutes. Should be a complete Dataset
+            name like `"username/datasetname"` or `"orgname/datasetname"`, or just
+            `"datasetname"` in which case the Dataset will be created in the
+            currently-logged-in Hugging Face user's namespace. If the Dataset does not
+            exist, it will be created. If the Dataset already exists, the project will
+            be appended to it. If not provided, the metrics will be logged to a local
+            SQLite database, unless a `space_id` is provided, in which case a Dataset
+            will be automatically created with the same name as the Space but with the
+            `"_dataset"` suffix.
+        private (`bool`, *optional*):
+            Whether to make the Space private. If None (default), the repo will be
+            public unless the organization's default is private. This value is ignored
+            if the repo already exists.
+    """
+    try:
+        from tbparse import SummaryReader
+    except ImportError:
+        raise ImportError(
+            "The `tbparse` package is not installed but is required for `import_tf_events`. Please install trackio with the `tensorboard` extra: `pip install trackio[tensorboard]`."
+        )
+    if SQLiteStorage.get_runs(project):
+        raise ValueError(
+            f"Project '{project}' already exists. Cannot import TF events into existing project."
+        )
+    path = Path(log_dir)
+    if not path.exists():
+        raise FileNotFoundError(f"TF events directory not found: {path}")
+    # Use tbparse to read all tfevents files in the directory structure
+    reader = SummaryReader(str(path), extra_columns={"dir_name"})
+    df = reader.scalars
+    if df.empty:
+        raise ValueError(f"No TensorFlow events data found in {path}")
+    total_imported = 0
+    imported_runs = []
+    # Group by dir_name to create separate runs
+    for dir_name, group_df in df.groupby("dir_name"):
+        try:
+            # Determine run name based on directory name
+            if dir_name == "":
+                run_name = "main"  # For files in the root directory
+            else:
+                run_name = dir_name  # Use directory name
+            if name:
+                run_name = f"{name}_{run_name}"
+            if group_df.empty:
+                print(f"* Skipping directory {dir_name}: no scalar data found")
+                continue
+            metrics_list = []
+            steps = []
+            timestamps = []
+            for _, row in group_df.iterrows():
+                # Convert row values to appropriate types
+                tag = str(row["tag"])
+                value = float(row["value"])
+                step = int(row["step"])
+                metrics = {tag: value}
+                metrics_list.append(metrics)
+                steps.append(step)
+                # Use wall_time if present, else fallback
+                if "wall_time" in group_df.columns and not bool(
+                    pd.isna(row["wall_time"])
+                ):
+                    timestamps.append(str(row["wall_time"]))
+                else:
+                    timestamps.append("")
+            if metrics_list:
+                SQLiteStorage.bulk_log(
+                    project=project,
+                    run=str(run_name),
+                    metrics_list=metrics_list,
+                    steps=steps,
+                    timestamps=timestamps,
+                )
+                total_imported += len(metrics_list)
+                imported_runs.append(run_name)
+                print(
+                    f"* Imported {len(metrics_list)} scalar events from directory '{dir_name}' as run '{run_name}'"
+                )
+                print(f"* Metrics in this run: {', '.join(set(group_df['tag']))}")
+        except Exception as e:
+            print(f"* Error processing directory {dir_name}: {e}")
+            continue
+    if not imported_runs:
+        raise ValueError("No valid TensorFlow events data could be imported")
+    print(f"* Total imported events: {total_imported}")
+    print(f"* Created runs: {', '.join(imported_runs)}")
+    space_id, dataset_id = utils.preprocess_space_and_dataset_ids(space_id, dataset_id)
+    if dataset_id is not None:
+        os.environ["TRACKIO_DATASET_ID"] = dataset_id
+        print(f"* Trackio metrics will be synced to Hugging Face Dataset: {dataset_id}")
+    if space_id is None:
+        utils.print_dashboard_instructions(project)
+    else:
+        deploy.create_space_if_not_exists(
+            space_id, dataset_id=dataset_id, private=private
+        )
+        deploy.wait_until_space_exists(space_id)
+        deploy.upload_db_to_space(project, space_id)
+        print(
+            f"* View dashboard by going to: {deploy.SPACE_URL.format(space_id=space_id)}"
+        )