Spaces:
Sleeping
Sleeping
Commit
·
7a967cc
1
Parent(s):
70165d2
- Dockerfile +0 -13
- backend/main.py +3 -3
- run.sh +12 -68
Dockerfile
CHANGED
@@ -23,13 +23,6 @@ USER root
|
|
23 |
USER root
|
24 |
RUN apt-get update && \
|
25 |
apt-get install -y nginx python3-pip curl gnupg lsb-release && \
|
26 |
-
export GCSFUSE_REPO=gcsfuse-$(lsb_release -c -s) && \
|
27 |
-
echo "deb http://packages.cloud.google.com/apt $GCSFUSE_REPO main" \
|
28 |
-
| tee /etc/apt/sources.list.d/gcsfuse.list && \
|
29 |
-
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
|
30 |
-
| apt-key add - && \
|
31 |
-
apt-get update && \
|
32 |
-
apt-get install -y gcsfuse && \
|
33 |
rm -rf /var/lib/apt/lists/* && \
|
34 |
rm -f /etc/nginx/sites-enabled/default && \
|
35 |
rm -f /etc/nginx/conf.d/default.conf
|
@@ -72,12 +65,6 @@ ENV HF_HOME=/tmp/hf_cache \
|
|
72 |
RUN mkdir -p /tmp/hf_cache \
|
73 |
&& chmod 777 /tmp/hf_cache
|
74 |
|
75 |
-
RUN mkdir -p /mnt/project/data \
|
76 |
-
/mnt/project/vectorstore_index \
|
77 |
-
/mnt/project/whoosh_index \
|
78 |
-
/mnt/project/cache \
|
79 |
-
&& chmod -R a+rwx /mnt/project
|
80 |
-
|
81 |
# Install Python deps from requirements (ensures numpy/pandas compatibility), then ASGI
|
82 |
# copy in your requirements
|
83 |
COPY --from=backend-builder /app/backend/requirements.txt /tmp/requirements.txt
|
|
|
23 |
USER root
|
24 |
RUN apt-get update && \
|
25 |
apt-get install -y nginx python3-pip curl gnupg lsb-release && \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
rm -rf /var/lib/apt/lists/* && \
|
27 |
rm -f /etc/nginx/sites-enabled/default && \
|
28 |
rm -f /etc/nginx/conf.d/default.conf
|
|
|
65 |
RUN mkdir -p /tmp/hf_cache \
|
66 |
&& chmod 777 /tmp/hf_cache
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# Install Python deps from requirements (ensures numpy/pandas compatibility), then ASGI
|
69 |
# copy in your requirements
|
70 |
COPY --from=backend-builder /app/backend/requirements.txt /tmp/requirements.txt
|
backend/main.py
CHANGED
@@ -44,9 +44,9 @@ logger = logging.getLogger(__name__)
|
|
44 |
|
45 |
class Settings(BaseSettings):
|
46 |
# Parquet + Whoosh/FAISS
|
47 |
-
parquet_path: str = "/
|
48 |
-
whoosh_dir: str = "/
|
49 |
-
vectorstore_path: str = "/
|
50 |
# Models
|
51 |
embedding_model: str = "sentence-transformers/LaBSE"
|
52 |
llm_model: str = "google/mt5-large"#"bigscience/bloom-3b"#"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
|
|
|
44 |
|
45 |
class Settings(BaseSettings):
|
46 |
# Parquet + Whoosh/FAISS
|
47 |
+
parquet_path: str = "gs://mda_eu_project/data/consolidated_clean_pred.parquet"
|
48 |
+
whoosh_dir: str = "gs://mda_eu_project/whoosh_index"
|
49 |
+
vectorstore_path: str = "gs://mda_eu_project/vectorstore_index"
|
50 |
# Models
|
51 |
embedding_model: str = "sentence-transformers/LaBSE"
|
52 |
llm_model: str = "google/mt5-large"#"bigscience/bloom-3b"#"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
|
run.sh
CHANGED
@@ -13,82 +13,26 @@
|
|
13 |
#pkill -F http_server.pid
|
14 |
#rm http_server.pid
|
15 |
# Start nginx in foreground
|
16 |
-
|
17 |
-
#echo "$GCP_SA_JSON" > /tmp/sa.json
|
18 |
-
#chmod 600 /tmp/sa.json
|
19 |
-
|
20 |
-
#export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json
|
21 |
-
#nginx -g "daemon off;" &
|
22 |
-
#NGINX_PID=$!
|
23 |
-
|
24 |
-
# Serve static files via simple HTTP server on port 8000
|
25 |
-
#env HTTP_SERVER_PORT=8000
|
26 |
-
#python3 -m http.server --directory ./static --bind 0.0.0.0 ${HTTP_SERVER_PORT} &
|
27 |
-
#HTTP_SERVER_PID=$!
|
28 |
-
|
29 |
-
# Setup cleanup on exit
|
30 |
-
#cleanup() {
|
31 |
-
# echo "Shutting down servers..."
|
32 |
-
# kill "${HTTP_SERVER_PID}" || true
|
33 |
-
# kill "${NGINX_PID}" || true
|
34 |
-
#}
|
35 |
-
#trap cleanup EXIT
|
36 |
-
|
37 |
-
# Start FastAPI; ensure correct module path
|
38 |
-
#uvicorn "app.main:app" --host 0.0.0.0 --port 7860
|
39 |
-
|
40 |
-
#!/bin/bash
|
41 |
-
set -e
|
42 |
-
|
43 |
-
echo "Starting with HF_SDK=$HF_SPACE_SDK, APP_PORT=$APP_PORT, PORT=$PORT"
|
44 |
-
|
45 |
-
# Write out GCP key and export
|
46 |
echo "$GCP_SA_JSON" > /tmp/sa.json
|
47 |
chmod 600 /tmp/sa.json
|
48 |
-
export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json
|
49 |
-
|
50 |
-
# 1️⃣ Mount your GCS bucket under /mnt/project
|
51 |
-
MOUNT_POINT=/mnt/project
|
52 |
-
BUCKET_NAME=mda_eu_project
|
53 |
-
#mkdir -p ${MOUNT_POINT}
|
54 |
-
# allow_other so nginx, uvicorn, etc. (non-root) can write
|
55 |
-
gcsfuse \
|
56 |
-
--implicit-dirs \
|
57 |
-
--file-mode=777 \
|
58 |
-
--dir-mode=777 \
|
59 |
-
${BUCKET_NAME} \
|
60 |
-
"${MOUNT_POINT}"
|
61 |
-
|
62 |
-
# 2️⃣ Ensure our four dirs exist
|
63 |
-
#for d in data vectorstore_index whoosh_index cache; do
|
64 |
-
# mkdir -p ${MOUNT_POINT}/$d
|
65 |
-
#done
|
66 |
-
|
67 |
-
# 3️⃣ Point HF caches into bucket
|
68 |
-
export HF_HOME=${MOUNT_POINT}/cache
|
69 |
-
export TRANSFORMERS_CACHE=${MOUNT_POINT}/cache
|
70 |
-
export HF_HUB_CACHE=${MOUNT_POINT}/cache
|
71 |
-
export XDG_CACHE_HOME=${MOUNT_POINT}/cache
|
72 |
|
73 |
-
|
74 |
-
export DATA_DIR=${MOUNT_POINT}/data
|
75 |
-
export VSTORE_DIR=${MOUNT_POINT}/vectorstore_index
|
76 |
-
export WHOOSH_DIR=${MOUNT_POINT}/whoosh_index
|
77 |
-
|
78 |
-
# 5️⃣ Start nginx + static server
|
79 |
nginx -g "daemon off;" &
|
80 |
NGINX_PID=$!
|
81 |
|
82 |
-
|
|
|
|
|
83 |
HTTP_SERVER_PID=$!
|
84 |
|
85 |
-
#
|
86 |
-
cleanup(){
|
87 |
-
echo "Shutting down
|
88 |
-
kill $HTTP_SERVER_PID || true
|
89 |
-
kill $NGINX_PID
|
90 |
}
|
91 |
trap cleanup EXIT
|
92 |
|
93 |
-
#
|
94 |
-
uvicorn "app.main:app" --host 0.0.0.0 --port
|
|
|
13 |
#pkill -F http_server.pid
|
14 |
#rm http_server.pid
|
15 |
# Start nginx in foreground
|
16 |
+
echo "HF_SDK = $HF_SPACE_SDK, APP_PORT = $APP_PORT, PORT = $PORT"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
echo "$GCP_SA_JSON" > /tmp/sa.json
|
18 |
chmod 600 /tmp/sa.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json
|
|
|
|
|
|
|
|
|
|
|
21 |
nginx -g "daemon off;" &
|
22 |
NGINX_PID=$!
|
23 |
|
24 |
+
# Serve static files via simple HTTP server on port 8000
|
25 |
+
env HTTP_SERVER_PORT=8000
|
26 |
+
python3 -m http.server --directory ./static --bind 0.0.0.0 ${HTTP_SERVER_PORT} &
|
27 |
HTTP_SERVER_PID=$!
|
28 |
|
29 |
+
# Setup cleanup on exit
|
30 |
+
cleanup() {
|
31 |
+
echo "Shutting down servers..."
|
32 |
+
kill "${HTTP_SERVER_PID}" || true
|
33 |
+
kill "${NGINX_PID}" || true
|
34 |
}
|
35 |
trap cleanup EXIT
|
36 |
|
37 |
+
# Start FastAPI; ensure correct module path
|
38 |
+
uvicorn "app.main:app" --host 0.0.0.0 --port 7860
|