reader / Dockerfile
yangchn's picture
Update Dockerfile
2a70d44 verified
FROM hectorqin/reader:openj9-latest
USER root
# socat: 端口转发
# python3/pip + huggingface_hub: 上传/下载 dataset 文件
RUN apt-get update && apt-get install -y socat python3 python3-pip && \
pip3 install --no-cache-dir huggingface_hub
RUN mkdir -p /storage /app/bin/logs && chmod -R 777 /storage /app/bin/logs
EXPOSE 7860
RUN cat > /app/run.sh <<'SH'
#!/bin/sh
set -u
: "${BACKUP_REPO:=yourname/reader-backup}"
: "${BACKUP_FILENAME:=reader_backup_min.tgz}"
: "${BACKUP_INTERVAL:=1800}" # 30 分钟一次,别太频繁
: "${STORAGE_DIR:=/storage}"
echo "==== Reader minimal backup via HF Dataset ===="
echo "BACKUP_REPO=$BACKUP_REPO"
echo "BACKUP_FILENAME=$BACKUP_FILENAME"
echo "BACKUP_INTERVAL=$BACKUP_INTERVAL"
echo "STORAGE_DIR=$STORAGE_DIR"
mkdir -p "$STORAGE_DIR"
chmod -R 777 "$STORAGE_DIR" || true
# 1) 启动时:从 dataset 恢复(如果存在)
echo "[1/3] Try restore from dataset..."
RESTORE_PATH="$(python3 - <<PY
import os, sys
from huggingface_hub import hf_hub_download
repo=os.environ.get("BACKUP_REPO")
token=os.environ.get("HF_TOKEN")
fname=os.environ.get("BACKUP_FILENAME","reader_backup_min.tgz")
try:
p = hf_hub_download(repo_id=repo, repo_type="dataset", filename=fname, token=token)
sys.stdout.write(p)
except Exception:
sys.exit(1)
PY
)" || RESTORE_PATH=""
if [ -n "$RESTORE_PATH" ] && [ -f "$RESTORE_PATH" ]; then
echo "Restore file found: $RESTORE_PATH"
tar -xzf "$RESTORE_PATH" -C "$STORAGE_DIR" || true
else
echo "No backup yet. (First run is normal.)"
fi
# 2) 后台:定时备份(只备份 users/bookSource/bookshelf)
backup_loop() {
while true; do
TS="$(date -u +%Y%m%d_%H%M%S)"
TMP_DIR="/tmp/reader_min_backup_$TS"
TMP_TGZ="/tmp/$BACKUP_FILENAME"
mkdir -p "$TMP_DIR"
# 生成最小文件清单
(
cd "$STORAGE_DIR" || exit 0
# 1) 用户文件
[ -f "data/users.json" ] && echo "data/users.json"
# 2) 每个用户的书源/书架
# 常见路径类似:data/<userid>/bookSource.json, data/<userid>/bookshelf.json
find data -maxdepth 2 -type f \( -name "bookSource.json" -o -name "bookshelf.json" \) 2>/dev/null || true
) > "$TMP_DIR/filelist.txt"
if [ -s "$TMP_DIR/filelist.txt" ]; then
(cd "$STORAGE_DIR" && tar -czf "$TMP_TGZ" -T "$TMP_DIR/filelist.txt") || true
if [ -f "$TMP_TGZ" ]; then
echo "[backup $TS] Uploading minimal backup..."
python3 - <<PY
import os
from huggingface_hub import HfApi
api = HfApi()
api.upload_file(
path_or_fileobj=os.environ["TMP_TGZ"],
path_in_repo=os.environ["BACKUP_FILENAME"],
repo_id=os.environ["BACKUP_REPO"],
repo_type="dataset",
token=os.environ.get("HF_TOKEN"),
commit_message=f"reader minimal backup {os.environ.get('TS')}",
)
print("upload ok")
PY
fi
else
echo "[backup $TS] Nothing to backup yet."
fi
rm -rf "$TMP_DIR" || true
sleep "$BACKUP_INTERVAL"
done
}
export TS TMP_TGZ
backup_loop &
# 3) 端口转发(HF 要 7860;Reader 跑 8080)
echo "[2/3] Start socat 7860->8080"
socat TCP-LISTEN:7860,fork TCP:127.0.0.1:8080 &
# 4) 启动 Reader,并明确 storage 路径
echo "[3/3] Start Reader..."
exec java -Dreader.app.storagePath=/storage -jar /app/bin/reader.jar --server.port=8080
SH
RUN chmod +x /app/run.sh
ENTRYPOINT ["/app/run.sh"]