sparrow-ml / config.yml
katanaml's picture
Added sparrow key support
dca3ac6
PROTECTED_ACCESS: True
# AGENT FOR LLAMAINDEX
# Tested with these LLMs
#LLM: 'starling-lm:7b-alpha-q4_K_M'
#LLM: 'starling-lm:7b-alpha-q5_K_M'
LLM: 'adrienbrault/nous-hermes2theta-llama3-8b:q5_K_M'
#LLM: 'llama3:8b-instruct-q5_K_M'
EMBEDDINGS: 'sentence-transformers/all-mpnet-base-v2'
WEAVIATE_URL: 'http://localhost:8080'
CHUNK_SIZE: 3000
OLLAMA_BASE_URL: 'http://127.0.0.1:11434'
#OLLAMA_BASE_URL: 'http://192.168.68.107:11434'
# AGENT FOR HAYSTACK
SPLIT_BY_HAYSTACK: 'sentence'
SPLIT_LENGTH_HAYSTACK: 3000
SPLIT_OVERLAP_HAYSTACK: 100
EMBEDDINGS_HAYSTACK: 'sentence-transformers/all-MiniLM-L6-v2'
# Tested with these LLMs
#LLM_HAYSTACK: 'starling-lm:7b-alpha-q4_K_M'
#LLM_HAYSTACK: 'starling-lm:7b-alpha-q5_K_M'
LLM_HAYSTACK: 'adrienbrault/nous-hermes2theta-llama3-8b:q5_K_M'
#LLM_HAYSTACK: 'llama3:8b-instruct-q5_K_M'
OLLAMA_BASE_URL_HAYSTACK: 'http://127.0.0.1:11434'
#OLLAMA_BASE_URL_HAYSTACK: 'http://192.168.68.107:11434'
MAX_LOOPS_ALLOWED_HAYSTACK: 3
# AGENT FOR VLLAMAINDEX
# Tested with these LLMs
LLM_VLLAMAINDEX: 'llava:13b'
# AGENT FOR VPROCESSOR
OCR_ENDPOINT_VPROCESSOR: 'http://127.0.0.1:8001/api/v1/sparrow-ocr/inference'
# Tested with these LLMs
#LLM_VPROCESSOR: 'starling-lm:7b-alpha-q5_K_M'
#LLM_VPROCESSOR: 'adrienbrault/nous-hermes2pro:Q5_K_M-json'
LLM_VPROCESSOR: 'llama3:8b-instruct-q5_K_M'
OLLAMA_BASE_URL_VPROCESSOR: 'http://127.0.0.1:11434'
# AGENT FOR FUNCTION CALL
OLLAMA_BASE_URL_FUNCTION: 'http://127.0.0.1:11434/v1'
# Tested with these LLMs
LLM_FUNCTION: 'adrienbrault/nous-hermes2theta-llama3-8b:q5_K_M'
# AGENT FOR UNSTRUCTURED LIGHT
# Tested with these LLMs
LLM_UNSTRUCTURED_LIGHT: 'adrienbrault/nous-hermes2pro:Q5_K_M-json'
# Strategy for analyzing PDFs and extracting table structure
STRATEGY_UNSTRUCTURED_LIGHT: 'hi_res'
# Best model for table extraction. Other options are detectron2_onnx and chipper depending on file layout
MODEL_UNSTRUCTURED_LIGHT: 'yolox'
CHUNK_SIZE_UNSTRUCTURED_LIGHT: 1000
OVERLAP_UNSTRUCTURED_LIGHT: 200
# ollama pull nomic-embed-text
EMBEDDINGS_UNSTRUCTURED_LIGHT: 'nomic-embed-text'
BASE_URL_UNSTRUCTURED_LIGHT: 'http://127.0.0.1:11434'
# AGENT FOR UNSTRUCTURED
# Tested with these LLMs
LLM_UNSTRUCTURED: 'adrienbrault/nous-hermes2pro:Q5_K_M-json'
OUTPUT_DIR_UNSTRUCTURED: 'data/json'
INPUT_DIR_UNSTRUCTURED: 'data/pdf'
WEAVIATE_URL_UNSTRUCTURED: 'http://localhost:8080'
EMBEDDINGS_UNSTRUCTURED: 'all-MiniLM-L6-v2'
DEVICE_UNSTRUCTURED: 'cpu'
CHUNK_UNDER_N_CHARS_UNSTRUCTURED: 250
CHUNK_NEW_AFTER_N_CHARS_UNSTRUCTURED: 500
BASE_URL_UNSTRUCTURED: 'http://127.0.0.1:11434'
# AGENT FOR INSTRUCTOR
OLLAMA_BASE_URL_INSTRUCTOR: 'http://127.0.0.1:11434/v1'
#OLLAMA_BASE_URL_INSTRUCTOR: 'http://192.168.68.107:11434/v1'
# Tested with these LLMs
LLM_INSTRUCTOR: 'adrienbrault/nous-hermes2theta-llama3-8b:q5_K_M'
#LLM_INSTRUCTOR: 'adrienbrault/nous-hermes2pro:Q5_K_M-json'
#LLM_INSTRUCTOR: 'wizardlm2:7b-q5_K_M'
# Strategy for analyzing PDFs and extracting table structure
STRATEGY_INSTRUCTOR: 'hi_res'
# Using yolox model by default. Other option is detectron2_onnx, depending on file layout
MODEL_INSTRUCTOR: 'yolox'
SIMILARITY_THRESHOLD_JUNK_COLUMNS_INSTRUCTOR: 0.5
SIMILARITY_THRESHOLD_COLUMN_ID_INSTRUCTOR: 0.3
PDF_SPLIT_OUTPUT_DIR_INSTRUCTOR: ""
PDF_CONVERT_TO_IMAGES_INSTRUCTOR: False