Document_intelligence / requirements.txt
Abhinav Gavireddi
[fix]: fixed pdf parsing
af30fa0
# This file was autogenerated by uv via the following command:
# uv pip compile requirements.in -o requirements.txt
aiofiles==24.1.0
# via
# gradio
# unstructured-client
albucore==0.0.24
# via albumentations
albumentations==2.0.8
# via doclayout-yolo
altair==5.5.0
# via streamlit
annotated-types==0.7.0
# via pydantic
antlr4-python3-runtime==4.9.3
# via omegaconf
anyio==4.9.0
# via
# gradio
# httpx
# openai
# starlette
# watchfiles
attrs==25.3.0
# via
# jsonschema
# referencing
backoff==1.11.1
# via
# opentelemetry-exporter-otlp-proto-grpc
# posthog
# unstructured
bcrypt==4.3.0
# via chromadb
beautifulsoup4==4.13.4
# via unstructured
bleach==6.2.0
# via -r requirements.in
blinker==1.9.0
# via streamlit
boto3==1.38.40
# via
# -r requirements.in
# magic-pdf
# mineru
botocore==1.38.40
# via
# boto3
# s3transfer
brotli==1.1.0
# via
# -r requirements.in
# magic-pdf
build==1.2.2.post1
# via chromadb
cachetools==6.1.0
# via
# google-auth
# streamlit
certifi==2025.6.15
# via
# httpcore
# httpx
# kubernetes
# requests
cffi==1.17.1
# via cryptography
chardet==5.2.0
# via unstructured
charset-normalizer==3.4.2
# via
# pdfminer-six
# reportlab
# requests
chroma-hnswlib==0.7.6
# via -r requirements.in
chromadb==1.0.13
# via -r requirements.in
click==8.2.1
# via
# -r requirements.in
# magic-pdf
# mineru
# nltk
# pdftext
# python-oxmsg
# streamlit
# typer
# uvicorn
coloredlogs==15.0.1
# via onnxruntime
colorlog==6.9.0
# via
# rapid-table
# robust-downloader
contourpy==1.3.2
# via matplotlib
cryptography==45.0.4
# via
# pdfminer-six
# unstructured-client
cycler==0.12.1
# via matplotlib
dataclasses-json==0.6.7
# via unstructured
dill==0.4.0
# via -r requirements.in
distro==1.9.0
# via
# openai
# posthog
doclayout-yolo==0.0.2b1
# via -r requirements.in
durationpy==0.10
# via kubernetes
emoji==2.14.1
# via unstructured
eval-type-backport==0.2.2
# via unstructured-client
fast-langdetect==0.2.5
# via
# -r requirements.in
# magic-pdf
fastapi==0.115.13
# via gradio
fasttext-predict==0.9.2.4
# via fast-langdetect
ffmpy==0.6.0
# via gradio
filelock==3.18.0
# via
# huggingface-hub
# torch
# transformers
filetype==1.2.0
# via unstructured
flatbuffers==25.2.10
# via onnxruntime
fonttools==4.58.4
# via matplotlib
fsspec==2025.5.1
# via
# gradio-client
# huggingface-hub
# torch
ftfy==6.3.1
# via -r requirements.in
gitdb==4.0.12
# via gitpython
gitpython==3.1.44
# via streamlit
google-auth==1.6.3
# via kubernetes
googleapis-common-protos==1.56.4
# via opentelemetry-exporter-otlp-proto-grpc
gradio==5.34.2
# via gradio-pdf
gradio-client==1.10.3
# via gradio
gradio-pdf==0.0.22
# via -r requirements.in
groovy==0.1.2
# via gradio
grpcio==1.73.0
# via
# chromadb
# opentelemetry-exporter-otlp-proto-grpc
h11==0.16.0
# via
# httpcore
# uvicorn
hf-xet==1.1.4
# via huggingface-hub
html5lib==1.1
# via unstructured
httpcore==1.0.9
# via httpx
httptools==0.6.4
# via uvicorn
httpx==0.28.1
# via
# chromadb
# gradio
# gradio-client
# langsmith
# mineru
# openai
# safehttpx
# unstructured-client
huggingface-hub==0.33.0
# via
# -r requirements.in
# gradio
# gradio-client
# mineru
# sentence-transformers
# tokenizers
# transformers
humanfriendly==10.0
# via coloredlogs
idna==3.10
# via
# anyio
# httpx
# requests
importlib-metadata==8.7.0
# via opentelemetry-api
importlib-resources==6.5.2
# via chromadb
jinja2==3.1.6
# via
# altair
# gradio
# pydeck
# torch
jiter==0.10.0
# via openai
jmespath==1.0.1
# via
# boto3
# botocore
joblib==1.5.1
# via
# nltk
# scikit-learn
json-repair==0.47.1
# via mineru
jsonpatch==1.33
# via langchain-core
jsonpointer==3.0.0
# via jsonpatch
jsonschema==4.24.0
# via
# altair
# chromadb
jsonschema-specifications==2025.4.1
# via jsonschema
kiwisolver==1.4.8
# via matplotlib
kubernetes==33.1.0
# via chromadb
langchain==0.3.25
# via -r requirements.in
langchain-core==0.3.65
# via
# langchain
# langchain-openai
# langchain-text-splitters
langchain-openai==0.3.24
# via -r requirements.in
langchain-text-splitters==0.3.8
# via langchain
langdetect==1.0.9
# via unstructured
langsmith==0.3.45
# via
# langchain
# langchain-core
loguru==0.7.3
# via
# -r requirements.in
# magic-pdf
# mineru
lxml==5.4.0
# via unstructured
magic-pdf==1.3.12
# via -r requirements.in
markdown-it-py==3.0.0
# via rich
markupsafe==3.0.2
# via
# gradio
# jinja2
# werkzeug
marshmallow==3.26.1
# via dataclasses-json
matplotlib==3.10.3
# via
# doclayout-yolo
# seaborn
# ultralytics
mdurl==0.1.2
# via markdown-it-py
mineru @ git+https://github.com/opendatalab/MinerU.git@6162ae2be150b53ea755fbc06c67f815f38e2ea6
# via -r requirements.in
mmh3==5.1.0
# via chromadb
modelscope==1.27.0
# via mineru
mpmath==1.3.0
# via sympy
mypy-extensions==1.1.0
# via typing-inspect
narwhals==1.43.1
# via altair
nest-asyncio==1.6.0
# via unstructured-client
nltk==3.9.1
# via unstructured
numpy==1.26.4
# via
# -r requirements.in
# albucore
# albumentations
# chroma-hnswlib
# chromadb
# contourpy
# gradio
# magic-pdf
# matplotlib
# mineru
# onnxruntime
# opencv-python
# opencv-python-headless
# pandas
# pydeck
# rapid-table
# scikit-learn
# scipy
# seaborn
# shapely
# streamlit
# torchvision
# transformers
# ultralytics
# ultralytics-thop
# unstructured
oauthlib==3.3.1
# via
# kubernetes
# requests-oauthlib
olefile==0.47
# via python-oxmsg
omegaconf==2.3.0
# via -r requirements.in
onnxruntime==1.22.0
# via
# chromadb
# rapid-table
openai==1.88.0
# via
# -r requirements.in
# langchain-openai
opencv-python==4.11.0.86
# via
# doclayout-yolo
# rapid-table
# ultralytics
opencv-python-headless==4.11.0.86
# via
# albucore
# albumentations
opentelemetry-api==1.34.1
# via
# chromadb
# opentelemetry-exporter-otlp-proto-grpc
# opentelemetry-sdk
# opentelemetry-semantic-conventions
opentelemetry-exporter-otlp-proto-grpc==1.11.1
# via chromadb
opentelemetry-proto==1.11.1
# via opentelemetry-exporter-otlp-proto-grpc
opentelemetry-sdk==1.34.1
# via
# chromadb
# opentelemetry-exporter-otlp-proto-grpc
opentelemetry-semantic-conventions==0.55b1
# via opentelemetry-sdk
orjson==3.10.18
# via
# chromadb
# gradio
# langsmith
overrides==7.7.0
# via chromadb
packaging==24.2
# via
# altair
# build
# gradio
# gradio-client
# huggingface-hub
# langchain-core
# langsmith
# marshmallow
# matplotlib
# onnxruntime
# streamlit
# transformers
pandas==2.3.0
# via
# doclayout-yolo
# gradio
# seaborn
# streamlit
# ultralytics
pdfminer-six==20250506
# via
# -r requirements.in
# magic-pdf
# mineru
pdftext==0.6.3
# via mineru
pillow==11.2.1
# via
# doclayout-yolo
# gradio
# matplotlib
# mineru
# rapid-table
# reportlab
# sentence-transformers
# streamlit
# torchvision
# ultralytics
posthog==5.3.0
# via chromadb
protobuf==3.20.0
# via
# -r requirements.in
# googleapis-common-protos
# onnxruntime
# opentelemetry-proto
# streamlit
psutil==7.0.0
# via
# doclayout-yolo
# ultralytics
# unstructured
py-cpuinfo==9.0.0
# via
# doclayout-yolo
# ultralytics
pyarrow==20.0.0
# via streamlit
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.2
# via google-auth
pybase64==1.4.1
# via chromadb
pyclipper==1.3.0.post6
# via -r requirements.in
pycparser==2.22
# via cffi
pydantic==2.10.6
# via
# -r requirements.in
# albumentations
# chromadb
# fastapi
# gradio
# langchain
# langchain-core
# langsmith
# magic-pdf
# openai
# pdftext
# pydantic-settings
# unstructured-client
pydantic-core==2.27.2
# via pydantic
pydantic-settings==2.9.1
# via pdftext
pydeck==0.9.1
# via streamlit
pydub==0.25.1
# via gradio
pygments==2.19.1
# via rich
pymupdf==1.24.14
# via
# -r requirements.in
# magic-pdf
pyparsing==3.2.3
# via matplotlib
pypdf==5.6.0
# via
# mineru
# unstructured-client
pypdfium2==4.30.0
# via
# mineru
# pdftext
pypika==0.48.9
# via chromadb
pyproject-hooks==1.2.0
# via build
python-dateutil==2.9.0.post0
# via
# botocore
# kubernetes
# matplotlib
# pandas
# posthog
# unstructured-client
python-dotenv==1.1.0
# via
# -r requirements.in
# pydantic-settings
# uvicorn
python-iso639==2025.2.18
# via unstructured
python-magic==0.4.27
# via unstructured
python-multipart==0.0.20
# via gradio
python-oxmsg==0.0.2
# via unstructured
pytz==2025.2
# via pandas
pyyaml==6.0.2
# via
# -r requirements.in
# albumentations
# chromadb
# doclayout-yolo
# gradio
# huggingface-hub
# kubernetes
# langchain
# langchain-core
# omegaconf
# transformers
# ultralytics
# uvicorn
rapid-table==1.0.5
# via -r requirements.in
rapidfuzz==3.13.0
# via unstructured
referencing==0.36.2
# via
# jsonschema
# jsonschema-specifications
regex==2024.11.6
# via
# nltk
# tiktoken
# transformers
reportlab==4.4.2
# via mineru
requests==2.32.4
# via
# doclayout-yolo
# fast-langdetect
# huggingface-hub
# kubernetes
# langchain
# langsmith
# mineru
# modelscope
# posthog
# rapid-table
# requests-oauthlib
# requests-toolbelt
# robust-downloader
# streamlit
# tiktoken
# transformers
# ultralytics
# unstructured
requests-oauthlib==2.0.0
# via kubernetes
requests-toolbelt==1.0.0
# via
# langsmith
# unstructured-client
rich==14.0.0
# via
# chromadb
# typer
robust-downloader==0.0.2
# via fast-langdetect
rpds-py==0.25.1
# via
# jsonschema
# referencing
rsa==4.9.1
# via google-auth
ruff==0.12.0
# via gradio
s3transfer==0.13.0
# via boto3
safehttpx==0.1.6
# via gradio
safetensors==0.5.3
# via transformers
scikit-learn==1.7.0
# via
# -r requirements.in
# magic-pdf
# sentence-transformers
scipy==1.15.3
# via
# albumentations
# doclayout-yolo
# scikit-learn
# sentence-transformers
# ultralytics
seaborn==0.13.2
# via doclayout-yolo
semantic-version==2.10.0
# via gradio
sentence-transformers==4.1.0
# via -r requirements.in
setuptools==80.9.0
# via
# modelscope
# torch
shapely==2.1.1
# via -r requirements.in
shellingham==1.5.4
# via typer
simsimd==6.4.9
# via albucore
six==1.17.0
# via
# google-auth
# html5lib
# kubernetes
# langdetect
# posthog
# python-dateutil
smmap==5.0.2
# via gitdb
sniffio==1.3.1
# via
# anyio
# openai
soupsieve==2.7
# via beautifulsoup4
sqlalchemy==2.0.41
# via langchain
starlette==0.46.2
# via
# fastapi
# gradio
streamlit==1.46.0
# via -r requirements.in
stringzilla==3.12.5
# via albucore
structlog==25.4.0
# via -r requirements.in
sympy==1.14.0
# via
# onnxruntime
# torch
tenacity==9.1.2
# via
# chromadb
# langchain-core
# streamlit
thop==0.1.1.post2209072238
# via doclayout-yolo
threadpoolctl==3.6.0
# via scikit-learn
tiktoken==0.9.0
# via
# -r requirements.in
# langchain-openai
tokenizers==0.21.1
# via
# chromadb
# transformers
toml==0.10.2
# via streamlit
tomlkit==0.13.3
# via gradio
torch==2.7.1
# via
# -r requirements.in
# doclayout-yolo
# magic-pdf
# sentence-transformers
# thop
# torchvision
# ultralytics
# ultralytics-thop
torchvision==0.22.1
# via
# -r requirements.in
# doclayout-yolo
# magic-pdf
# ultralytics
tornado==6.5.1
# via streamlit
tqdm==4.67.1
# via
# -r requirements.in
# chromadb
# doclayout-yolo
# huggingface-hub
# magic-pdf
# mineru
# modelscope
# nltk
# openai
# robust-downloader
# sentence-transformers
# transformers
# ultralytics
# unstructured
transformers==4.52.4
# via
# magic-pdf
# sentence-transformers
typer==0.16.0
# via
# chromadb
# gradio
typing-extensions==4.14.0
# via
# altair
# anyio
# beautifulsoup4
# chromadb
# fastapi
# gradio
# gradio-client
# huggingface-hub
# langchain-core
# openai
# opentelemetry-api
# opentelemetry-sdk
# opentelemetry-semantic-conventions
# pydantic
# pydantic-core
# python-oxmsg
# referencing
# sentence-transformers
# sqlalchemy
# streamlit
# torch
# typer
# typing-inspect
# typing-inspection
# unstructured
typing-inspect==0.9.0
# via dataclasses-json
typing-inspection==0.4.1
# via
# pydantic-settings
# unstructured-client
tzdata==2025.2
# via pandas
ultralytics==8.3.156
# via -r requirements.in
ultralytics-thop==2.0.14
# via ultralytics
unstructured==0.17.2
# via -r requirements.in
unstructured-client==0.32.3
# via unstructured
urllib3==2.5.0
# via
# botocore
# kubernetes
# modelscope
# requests
uvicorn==0.34.3
# via
# chromadb
# gradio
uvloop==0.21.0
# via uvicorn
watchfiles==1.1.0
# via uvicorn
wcwidth==0.2.13
# via ftfy
webencodings==0.5.1
# via
# bleach
# html5lib
websocket-client==1.8.0
# via kubernetes
websockets==15.0.1
# via
# gradio-client
# uvicorn
werkzeug==3.1.3
# via -r requirements.in
wrapt==1.17.2
# via unstructured
zipp==3.23.0
# via importlib-metadata
zstandard==0.23.0
# via langsmith