File size: 2,156 Bytes
ef821d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Core dependencies (updated versions)
python-dotenv>=1.1.1
pymupdf4llm>=0.0.27
beautifulsoup4>=4.13.4
requests>=2.32.4
pandas>=2.2.3
openai>=1.99.9
networkx>=3.5
numpy>=2.3.1
scikit-learn>=1.7.1
streamlit>=1.47.0

# FastAPI and realtime API dependencies
fastapi>=0.104.0  # For realtime API server
uvicorn[standard]>=0.24.0  # ASGI server for FastAPI
pydantic>=2.4.0  # Data validation and settings management

# Document processing
pymupdf>=1.24.0  # For PDF processing and image extraction
Pillow>=10.0.0  # For image processing
lxml>=5.0.0  # For HTML parsing
html5lib>=1.1  # Alternative HTML parser

# Vector stores and search
faiss-cpu>=1.8.0  # For vector similarity search (use faiss-gpu if CUDA available)
chromadb>=0.5.0  # Alternative vector database
rank-bm25>=0.2.2  # For BM25 keyword search

# Language models and embeddings
sentence-transformers>=3.0.0  # For DPR and cross-encoder
transformers>=4.40.0  # Required by sentence-transformers
torch>=2.0.0  # For neural models (CPU version)
# For GPU support, install separately:
# pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
ftfy>=6.1.1  # Text preprocessing for CLIP
regex>=2023.0.0  # Text processing
# For CLIP (optional - enable if needed):
# git+https://github.com/openai/CLIP.git

# Token counting and management
tiktoken>=0.7.0  # For OpenAI token counting

# Database (optional)
# pymongo>=4.0.0  # Uncomment if using MongoDB for metadata

# Development and debugging
tqdm>=4.65.0  # Progress bars
ipython>=8.0.0  # For interactive debugging
jupyter>=1.0.0  # For notebook development

# Data visualization (optional)
matplotlib>=3.7.0  # For plotting
seaborn>=0.12.0  # Statistical visualization
plotly>=5.15.0  # Interactive plots

# Optional advanced features (uncomment if needed)
# langchain>=0.2.11  # For advanced RAG patterns
# langchain-openai>=0.1.20  # OpenAI integration for LangChain
# llama-index>=0.10.51  # Alternative RAG framework

# Additional utility packages
colorama>=0.4.6  # Colored console output
rich>=13.0.0  # Rich text and beautiful formatting in terminal