Spaces:
Runtime error
Runtime error
File size: 3,646 Bytes
e8ea372 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import boto3
import os
import re
import json
from pathlib import Path
import sqlite3
from huggingface_hub import Repository, HfFolder
import tqdm
import subprocess
from fastapi import FastAPI
from fastapi_utils.tasks import repeat_every
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
AWS_SECRET_KEY = os.getenv('AWS_SECRET_KEY')
AWS_S3_BUCKET_NAME = os.getenv('AWS_S3_BUCKET_NAME')
s3 = boto3.client(service_name='s3',
aws_access_key_id=AWS_ACCESS_KEY_ID,
aws_secret_access_key=AWS_SECRET_KEY)
paginator = s3.get_paginator('list_objects_v2')
S3_DATA_FOLDER = Path("sd-multiplayer-data")
ROOMS_DATA_DB = S3_DATA_FOLDER / "rooms_data.db"
repo = Repository(
local_dir=S3_DATA_FOLDER,
repo_type="dataset",
clone_from="huggingface-projects/sd-multiplayer-data",
use_auth_token=True,
)
repo.git_pull()
if not ROOMS_DATA_DB.exists():
print("Creating database")
print("ROOMS_DATA_DB", ROOMS_DATA_DB)
db = sqlite3.connect(ROOMS_DATA_DB)
with open(Path("schema.sql"), "r") as f:
db.executescript(f.read())
db.commit()
db.close()
def get_db(db_path):
db = sqlite3.connect(db_path, check_same_thread=False)
db.row_factory = sqlite3.Row
try:
yield db
except Exception:
db.rollback()
finally:
db.close()
def sync_rooms_to_dataset():
for room_data_db in get_db(ROOMS_DATA_DB):
rooms = room_data_db.execute("SELECT * FROM rooms").fetchall()
cursor = room_data_db.cursor()
for row in tqdm.tqdm(rooms):
room_id = row["room_id"]
print("syncing room data: ", room_id)
objects = []
for result in paginator.paginate(Bucket=AWS_S3_BUCKET_NAME, Prefix=f'{room_id}/', Delimiter='/'):
results = []
for obj in result.get('Contents'):
try:
key = obj.get('Key')
time = obj.get('LastModified').isoformat()
split_str = re.split(r'[-/.]', key)
uuid = split_str[3]
x, y = [int(n)
for n in re.split(r'[_]', split_str[4])]
prompt = ' '.join(split_str[4:])
results.append(
{'x': x, 'y': y, 'prompt': prompt, 'time': time, 'key': key, 'uuid': uuid})
cursor.execute(
'INSERT INTO rooms_data VALUES (NULL, ?, ?, ?, ?, ?, ?, ?)', (room_id, uuid, x, y, prompt, time, key))
except Exception as e:
print(e)
continue
objects += results
room_data_db.commit()
all_rows = [dict(row) for row in room_data_db.execute(
"SELECT * FROM rooms_data WHERE room_id = ?", (room_id,)).fetchall()]
data_path = S3_DATA_FOLDER / f"{room_id}.json"
with open(data_path, 'w') as f:
json.dump(all_rows, f, separators=(',', ':'))
print("Updating repository")
subprocess.Popen(
"git add . && git commit --amend -m 'update' && git push --force", cwd=S3_DATA_FOLDER, shell=True)
app = FastAPI()
@app.get("/")
def read_root():
return "Just a bot to sync data to huggingface datasets and tweet tha latest data"
@app.get("/sync")
def sync():
sync_rooms_to_dataset()
return "Synced data to huggingface datasets"
@app.on_event("startup")
@repeat_every(seconds=1800)
def repeat_sync():
sync_rooms_to_dataset()
return "Synced data to huggingface datasets" |