File size: 2,251 Bytes
8b5be8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import json
import argparse
from pathlib import Path
import chromadb
from chromadb.config import Settings


def import_collection(client, json_file: Path, include_embeddings=False):
    """Import a JSON file into a ChromaDB collection."""
    
    collection_name = json_file.stem
    print(f"πŸ“₯ Importing {collection_name} from {json_file}")

    # Load JSON
    data = json.loads(json_file.read_text(encoding="utf-8"))

    # Extract fields
    ids = [item["id"] for item in data]
    documents = [item.get("document") for item in data]
    metadatas = [item.get("metadata") for item in data]

    if include_embeddings:
        embeddings = [item.get("embedding") for item in data]
    else:
        embeddings = None

    # Create or get collection
    collection = client.get_or_create_collection(collection_name)

    # Add to collection
    collection.add(
        ids=ids,
        documents=documents,
        metadatas=metadatas,
        embeddings=embeddings
    )

    print(f"βœ” Imported {len(ids)} items into {collection_name}")


def main():
    parser = argparse.ArgumentParser(description="Import JSON files into ChromaDB collections.")
    parser.add_argument(
        "--db-path",
        type=str,
        required=True,
        help="Path to the target chromadb_store folder",
    )
    parser.add_argument(
        "--input",
        type=str,
        default="chroma_exports",
        help="Folder containing JSON files to import",
    )
    parser.add_argument(
        "--include-embeddings",
        action="store_true",
        help="Load embeddings from JSON (off by default)",
    )

    args = parser.parse_args()

    db_path = Path(args.db_path).expanduser().resolve()
    input_dir = Path(args.input).expanduser().resolve()
    if not input_dir.exists():
        print(f"❌ Input folder does not exist: {input_dir}")
        return

    # Connect to ChromaDB
    client = chromadb.PersistentClient(
        path=str(db_path),
        settings=Settings(anonymized_telemetry=False)
    )

    # Iterate JSON files
    for json_file in input_dir.glob("*.json"):
        import_collection(client, json_file, args.include_embeddings)

    print("\nπŸŽ‰ All JSON files imported!")


if __name__ == "__main__":
    main()