File size: 3,646 Bytes
e8ea372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import boto3
import os
import re
import json
from pathlib import Path
import sqlite3
from huggingface_hub import Repository, HfFolder
import tqdm
import subprocess

from fastapi import FastAPI
from fastapi_utils.tasks import repeat_every


AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_KEY = os.getenv('AWS_SECRET_KEY')
AWS_S3_BUCKET_NAME = os.getenv('AWS_S3_BUCKET_NAME')

s3 = boto3.client(service_name='s3',
                  aws_access_key_id=AWS_ACCESS_KEY_ID,
                  aws_secret_access_key=AWS_SECRET_KEY)


paginator = s3.get_paginator('list_objects_v2')


S3_DATA_FOLDER = Path("sd-multiplayer-data")
ROOMS_DATA_DB = S3_DATA_FOLDER / "rooms_data.db"


repo = Repository(
    local_dir=S3_DATA_FOLDER,
    repo_type="dataset",
    clone_from="huggingface-projects/sd-multiplayer-data",
    use_auth_token=True,
)
repo.git_pull()


if not ROOMS_DATA_DB.exists():
    print("Creating database")
    print("ROOMS_DATA_DB", ROOMS_DATA_DB)
    db = sqlite3.connect(ROOMS_DATA_DB)
    with open(Path("schema.sql"), "r") as f:
        db.executescript(f.read())
    db.commit()
    db.close()


def get_db(db_path):
    db = sqlite3.connect(db_path, check_same_thread=False)
    db.row_factory = sqlite3.Row
    try:
        yield db
    except Exception:
        db.rollback()
    finally:
        db.close()


def sync_rooms_to_dataset():
    for room_data_db in get_db(ROOMS_DATA_DB):
        rooms = room_data_db.execute("SELECT * FROM rooms").fetchall()
        cursor = room_data_db.cursor()
        for row in tqdm.tqdm(rooms):
            room_id = row["room_id"]
            print("syncing room data: ", room_id)

            objects = []
            for result in paginator.paginate(Bucket=AWS_S3_BUCKET_NAME, Prefix=f'{room_id}/', Delimiter='/'):
                results = []
                for obj in result.get('Contents'):
                    try:
                        key = obj.get('Key')
                        time = obj.get('LastModified').isoformat()
                        split_str = re.split(r'[-/.]', key)
                        uuid = split_str[3]
                        x, y = [int(n)
                                for n in re.split(r'[_]', split_str[4])]
                        prompt = ' '.join(split_str[4:])
                        results.append(
                            {'x': x, 'y': y, 'prompt': prompt, 'time': time, 'key': key, 'uuid': uuid})
                        cursor.execute(
                            'INSERT INTO rooms_data VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)', (room_id, uuid, x, y, prompt, time, key))
                    except Exception as e:
                        print(e)
                        continue

                objects += results
            room_data_db.commit()

            all_rows = [dict(row) for row in room_data_db.execute(
                "SELECT * FROM rooms_data WHERE room_id = ?", (room_id,)).fetchall()]
            data_path = S3_DATA_FOLDER / f"{room_id}.json"
            with open(data_path, 'w') as f:
                json.dump(all_rows, f, separators=(',', ':'))
    print("Updating repository")
    subprocess.Popen(
        "git add . && git commit --amend -m 'update' && git push --force", cwd=S3_DATA_FOLDER, shell=True)


app = FastAPI()


@app.get("/")
def read_root():
    return "Just a bot to sync data to huggingface datasets and tweet tha latest data"


@app.get("/sync")
def sync():
    sync_rooms_to_dataset()
    return "Synced data to huggingface datasets"


@app.on_event("startup")
@repeat_every(seconds=1800)
def repeat_sync():
    sync_rooms_to_dataset()
    return "Synced data to huggingface datasets"