Markit_v2 / requirements.txt
AnseMin's picture
Integrate Gemini API for enhanced image processing in MarkItDown
033e4ba
# Core dependencies
pydantic==2.10.6
gradio
markdown==3.7
pillow # Match exact dependency from GOT-OCR
numpy==1.26.3 # Match exact dependency from GOT-OCR
# For ZeroGPU support
spaces
# Image processing
opencv-python # Match exact dependency from GOT-OCR
# Utility dependencies
python-dotenv>=1.0.0
# Gemini API client
google-genai==1.5.0
# Mistral AI client
mistralai>=1.0.0
# GOT-OCR dependencies - exactly as in original
torch
torchvision
git+https://github.com/huggingface/transformers.git@main
accelerate
verovio # Added missing dependency
huggingface_hub[cli]>=0.19.0
# MarkItDown and its dependencies
markitdown[all]
ffmpeg-python # For audio processing in MarkItDown
# Note: Using Gemini Flash 2.5 for LLM image descriptions instead of OpenAI
# Docling dependencies
docling
# RAG and LangChain dependencies
langchain>=0.3.0
langchain-openai>=0.2.0
langchain-google-genai>=2.0.0
langchain-chroma>=0.1.0
langchain-text-splitters>=0.3.0
langchain-community>=0.3.0 # For BM25Retriever and EnsembleRetriever
chromadb>=0.5.0
sentence-transformers>=3.0.0
rank-bm25>=0.2.0 # Required for BM25Retriever