Rom89823974978 commited on
Commit
7a967cc
·
1 Parent(s): 70165d2
Files changed (3) hide show
  1. Dockerfile +0 -13
  2. backend/main.py +3 -3
  3. run.sh +12 -68
Dockerfile CHANGED
@@ -23,13 +23,6 @@ USER root
23
  USER root
24
  RUN apt-get update && \
25
  apt-get install -y nginx python3-pip curl gnupg lsb-release && \
26
- export GCSFUSE_REPO=gcsfuse-$(lsb_release -c -s) && \
27
- echo "deb http://packages.cloud.google.com/apt $GCSFUSE_REPO main" \
28
- | tee /etc/apt/sources.list.d/gcsfuse.list && \
29
- curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \
30
- | apt-key add - && \
31
- apt-get update && \
32
- apt-get install -y gcsfuse && \
33
  rm -rf /var/lib/apt/lists/* && \
34
  rm -f /etc/nginx/sites-enabled/default && \
35
  rm -f /etc/nginx/conf.d/default.conf
@@ -72,12 +65,6 @@ ENV HF_HOME=/tmp/hf_cache \
72
  RUN mkdir -p /tmp/hf_cache \
73
  && chmod 777 /tmp/hf_cache
74
 
75
- RUN mkdir -p /mnt/project/data \
76
- /mnt/project/vectorstore_index \
77
- /mnt/project/whoosh_index \
78
- /mnt/project/cache \
79
- && chmod -R a+rwx /mnt/project
80
-
81
  # Install Python deps from requirements (ensures numpy/pandas compatibility), then ASGI
82
  # copy in your requirements
83
  COPY --from=backend-builder /app/backend/requirements.txt /tmp/requirements.txt
 
23
  USER root
24
  RUN apt-get update && \
25
  apt-get install -y nginx python3-pip curl gnupg lsb-release && \
 
 
 
 
 
 
 
26
  rm -rf /var/lib/apt/lists/* && \
27
  rm -f /etc/nginx/sites-enabled/default && \
28
  rm -f /etc/nginx/conf.d/default.conf
 
65
  RUN mkdir -p /tmp/hf_cache \
66
  && chmod 777 /tmp/hf_cache
67
 
 
 
 
 
 
 
68
  # Install Python deps from requirements (ensures numpy/pandas compatibility), then ASGI
69
  # copy in your requirements
70
  COPY --from=backend-builder /app/backend/requirements.txt /tmp/requirements.txt
backend/main.py CHANGED
@@ -44,9 +44,9 @@ logger = logging.getLogger(__name__)
44
 
45
  class Settings(BaseSettings):
46
  # Parquet + Whoosh/FAISS
47
- parquet_path: str = "/mnt/project/data/consolidated_clean_pred.parquet"
48
- whoosh_dir: str = "/mnt/project/whoosh_index"
49
- vectorstore_path: str = "/mnt/project/vectorstore_index"
50
  # Models
51
  embedding_model: str = "sentence-transformers/LaBSE"
52
  llm_model: str = "google/mt5-large"#"bigscience/bloom-3b"#"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
 
44
 
45
  class Settings(BaseSettings):
46
  # Parquet + Whoosh/FAISS
47
+ parquet_path: str = "gs://mda_eu_project/data/consolidated_clean_pred.parquet"
48
+ whoosh_dir: str = "gs://mda_eu_project/whoosh_index"
49
+ vectorstore_path: str = "gs://mda_eu_project/vectorstore_index"
50
  # Models
51
  embedding_model: str = "sentence-transformers/LaBSE"
52
  llm_model: str = "google/mt5-large"#"bigscience/bloom-3b"#"RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
run.sh CHANGED
@@ -13,82 +13,26 @@
13
  #pkill -F http_server.pid
14
  #rm http_server.pid
15
  # Start nginx in foreground
16
- #echo "HF_SDK = $HF_SPACE_SDK, APP_PORT = $APP_PORT, PORT = $PORT"
17
- #echo "$GCP_SA_JSON" > /tmp/sa.json
18
- #chmod 600 /tmp/sa.json
19
-
20
- #export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json
21
- #nginx -g "daemon off;" &
22
- #NGINX_PID=$!
23
-
24
- # Serve static files via simple HTTP server on port 8000
25
- #env HTTP_SERVER_PORT=8000
26
- #python3 -m http.server --directory ./static --bind 0.0.0.0 ${HTTP_SERVER_PORT} &
27
- #HTTP_SERVER_PID=$!
28
-
29
- # Setup cleanup on exit
30
- #cleanup() {
31
- # echo "Shutting down servers..."
32
- # kill "${HTTP_SERVER_PID}" || true
33
- # kill "${NGINX_PID}" || true
34
- #}
35
- #trap cleanup EXIT
36
-
37
- # Start FastAPI; ensure correct module path
38
- #uvicorn "app.main:app" --host 0.0.0.0 --port 7860
39
-
40
- #!/bin/bash
41
- set -e
42
-
43
- echo "Starting with HF_SDK=$HF_SPACE_SDK, APP_PORT=$APP_PORT, PORT=$PORT"
44
-
45
- # Write out GCP key and export
46
  echo "$GCP_SA_JSON" > /tmp/sa.json
47
  chmod 600 /tmp/sa.json
48
- export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json
49
-
50
- # 1️⃣ Mount your GCS bucket under /mnt/project
51
- MOUNT_POINT=/mnt/project
52
- BUCKET_NAME=mda_eu_project
53
- #mkdir -p ${MOUNT_POINT}
54
- # allow_other so nginx, uvicorn, etc. (non-root) can write
55
- gcsfuse \
56
- --implicit-dirs \
57
- --file-mode=777 \
58
- --dir-mode=777 \
59
- ${BUCKET_NAME} \
60
- "${MOUNT_POINT}"
61
-
62
- # 2️⃣ Ensure our four dirs exist
63
- #for d in data vectorstore_index whoosh_index cache; do
64
- # mkdir -p ${MOUNT_POINT}/$d
65
- #done
66
-
67
- # 3️⃣ Point HF caches into bucket
68
- export HF_HOME=${MOUNT_POINT}/cache
69
- export TRANSFORMERS_CACHE=${MOUNT_POINT}/cache
70
- export HF_HUB_CACHE=${MOUNT_POINT}/cache
71
- export XDG_CACHE_HOME=${MOUNT_POINT}/cache
72
 
73
- # 4️⃣ (Optional) export paths for your app
74
- export DATA_DIR=${MOUNT_POINT}/data
75
- export VSTORE_DIR=${MOUNT_POINT}/vectorstore_index
76
- export WHOOSH_DIR=${MOUNT_POINT}/whoosh_index
77
-
78
- # 5️⃣ Start nginx + static server
79
  nginx -g "daemon off;" &
80
  NGINX_PID=$!
81
 
82
- python3 -m http.server --directory ./static --bind 0.0.0.0 ${HTTP_SERVER_PORT:-8000} &
 
 
83
  HTTP_SERVER_PID=$!
84
 
85
- # 6️⃣ Cleanup
86
- cleanup(){
87
- echo "Shutting down"
88
- kill $HTTP_SERVER_PID || true
89
- kill $NGINX_PID || true
90
  }
91
  trap cleanup EXIT
92
 
93
- # 7️⃣ Finally, launch FastAPI/uvicorn
94
- uvicorn "app.main:app" --host 0.0.0.0 --port ${PORT:-7860}
 
13
  #pkill -F http_server.pid
14
  #rm http_server.pid
15
  # Start nginx in foreground
16
+ echo "HF_SDK = $HF_SPACE_SDK, APP_PORT = $APP_PORT, PORT = $PORT"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  echo "$GCP_SA_JSON" > /tmp/sa.json
18
  chmod 600 /tmp/sa.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ export GOOGLE_APPLICATION_CREDENTIALS=/tmp/sa.json
 
 
 
 
 
21
  nginx -g "daemon off;" &
22
  NGINX_PID=$!
23
 
24
+ # Serve static files via simple HTTP server on port 8000
25
+ env HTTP_SERVER_PORT=8000
26
+ python3 -m http.server --directory ./static --bind 0.0.0.0 ${HTTP_SERVER_PORT} &
27
  HTTP_SERVER_PID=$!
28
 
29
+ # Setup cleanup on exit
30
+ cleanup() {
31
+ echo "Shutting down servers..."
32
+ kill "${HTTP_SERVER_PID}" || true
33
+ kill "${NGINX_PID}" || true
34
  }
35
  trap cleanup EXIT
36
 
37
+ # Start FastAPI; ensure correct module path
38
+ uvicorn "app.main:app" --host 0.0.0.0 --port 7860