Nightwing11 commited on
Commit
f3704c3
·
1 Parent(s): e29b955

Dockeriz optimized/complete

Browse files
Files changed (4) hide show
  1. .dockerignore +18 -31
  2. Dockerfile +41 -9
  3. Rag/rag_pipeline.py +3 -3
  4. requirements.txt +11 -9
.dockerignore CHANGED
@@ -1,36 +1,23 @@
1
- # Ignore virtual environments and cache
2
- __pycache__/
3
- *.pyc
4
- *.pyo
5
- *.pyd
6
- *.pyc
7
- .venv/
8
- venv/
9
- __pycache__/
10
- *.sqlite3-journal
11
 
12
- # Ignore IDE files
13
- .idea/
14
- .vscode/
15
 
16
- # Ignore development files
17
- *.swp
18
- *.swo
19
- *.swn
20
- *.swo
21
- *.swn
22
- *.swn
23
- *.swo
24
- *.swn
25
- *.swo
26
- *.swn
27
 
28
- # Ignore data files
29
- chromadb.db
30
- chroma.sqlite3
31
- Rag/chromadb.db
32
- # Ignore setup scripts
33
- setup.sh
34
 
35
  # Ignore environment files
36
- .env
 
 
 
1
+ # Ignore version control
2
+ .git
3
+ .gitignore
 
 
 
 
 
 
 
4
 
5
+ # Ignore notebooks
6
+ Notebook/
 
7
 
8
+ # Ignore databases and logs
9
+ **/*.db
10
+ **/*.sqlite3
11
+ **/chromadb.db
12
+ **/error_log.txt
 
 
 
 
 
 
13
 
14
+ # Ignore cache
15
+ **/__pycache__/
16
+ **/*.pyc
17
+ **/*.pyo
18
+ **/*.pyd
 
19
 
20
  # Ignore environment files
21
+ .env
22
+ venv/
23
+ .venv/
Dockerfile CHANGED
@@ -1,17 +1,49 @@
1
- # Use the official Python 3.11.11 image
2
- FROM python:3.11.11-slim
 
3
 
4
- # Set the working directory in the container
 
5
  WORKDIR /app
6
-
7
- # Copy the requirements file into the container
 
 
 
 
 
8
  COPY requirements.txt .
9
-
10
- # Install Python dependencies
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Copy the rest of the application code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  COPY . .
15
 
16
- # Set the main entry point
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  CMD ["python", "-m", "Example.rag_example"]
 
1
+ # Declare build arguments at the top (for initial stage)
2
+ ARG USER_UID=1000
3
+ ARG USER_GID=1000
4
 
5
+ # Stage 1: Build dependencies
6
+ FROM python:3.11-slim AS builder
7
  WORKDIR /app
8
+ RUN apt-get update && \
9
+ apt-get install -y --no-install-recommends \
10
+ build-essential \
11
+ git && \
12
+ rm -rf /var/lib/apt/lists/*
13
+ RUN python -m venv /opt/venv
14
+ ENV PATH="/opt/venv/bin:$PATH"
15
  COPY requirements.txt .
 
 
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
+ # Stage 2: Final image
19
+ FROM python:3.11-slim
20
+
21
+ # Re-declare build arguments for this stage
22
+ ARG USER_UID=1000
23
+ ARG USER_GID=1000
24
+
25
+ COPY --from=builder /opt/venv /opt/venv
26
+ ENV PATH="/opt/venv/bin:$PATH"
27
+ WORKDIR /app
28
+ RUN apt-get update && \
29
+ apt-get install -y --no-install-recommends \
30
+ libgomp1 && \
31
+ rm -rf /var/lib/apt/lists/*
32
+
33
  COPY . .
34
 
35
+ # Create the group and user first
36
+ RUN groupadd -g ${USER_GID} appuser && \
37
+ useradd -m -u ${USER_UID} -g appuser appuser
38
+
39
+ # Create directories and set permissions
40
+ RUN mkdir -p /app/Rag/chromadb.db && \
41
+ mkdir -p /app/Data && \
42
+ chown -R appuser:appuser /app
43
+
44
+ USER appuser
45
+
46
+ # Make sure your Python code uses this path for ChromaDB
47
+ ENV CHROMA_PERSISTENCE_DIRECTORY=/app/Rag/chromadb.db
48
+
49
  CMD ["python", "-m", "Example.rag_example"]
Rag/rag_pipeline.py CHANGED
@@ -13,13 +13,13 @@ API_KEY = os.getenv("GOOGLE_API_KEY")
13
  if API_KEY:
14
  genai.configure(api_key=API_KEY)
15
 
16
- chromadb_path = "Rag/chromadb.db"
17
  # transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
18
  # processed_files_path = "/home/nightwing/Codes/Xyzbot/Rag/Processed_folder/processed_files.json"
19
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
20
 
21
- client = chromadb.PersistentClient(path=chromadb_path)
22
- collection = client.get_or_create_collection(name="yt_transcript_collection")
23
 
24
  # Logging
25
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
 
13
  if API_KEY:
14
  genai.configure(api_key=API_KEY)
15
 
16
+ chromadb_path = "app/Rag/chromadb.db"
17
  # transcripts_folder_path = '/home/nightwing/Codes/Xyzbot/Data/transcripts'
18
  # processed_files_path = "/home/nightwing/Codes/Xyzbot/Rag/Processed_folder/processed_files.json"
19
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
20
 
21
+ # client = chromadb.PersistentClient(path=chromadb_path)
22
+ # collection = client.get_or_create_collection(name="yt_transcript_collection")
23
 
24
  # Logging
25
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(message)s')
requirements.txt CHANGED
@@ -1,16 +1,18 @@
1
- pyarrow
2
- pandas
3
- pendulum
4
- google.generativeai
5
  langchain>=0.3.16,<0.4.0
6
  langchain_openai
7
  langchain_chroma
8
  langchain-community>=0.3.16,<0.4.0
9
  chromadb>=0.4.14
10
- pypdf==4.2.0
11
  flask==3.0.1
12
  flask_cors==3.0.10
13
- sentence_transformers==3.3.1
14
- tqdm==4.67.1
15
- torch==2.5.1
16
- pydantic>=2.7.4,<3.0.0
 
 
 
 
 
 
 
1
+ # Core dependencies
 
 
 
2
  langchain>=0.3.16,<0.4.0
3
  langchain_openai
4
  langchain_chroma
5
  langchain-community>=0.3.16,<0.4.0
6
  chromadb>=0.4.14
 
7
  flask==3.0.1
8
  flask_cors==3.0.10
9
+ google.generativeai
10
+ pydantic>=2.7.4,<3.0.0
11
+ streamlit
12
+ # PDF Processing
13
+ pypdf==4.2.0
14
+
15
+ # ML/AI Dependencies (with CPU-only versions)
16
+ sentence_transformers==2.3.1
17
+ --extra-index-url https://download.pytorch.org/whl/cpu
18
+ torch==2.1.0+cpu