LiteLLM / config.yaml
keungliang's picture
Update config.yaml
63371ee verified
model_list:
- model_name: azure_o3-pro
litellm_params:
model: azure/azure_o3-pro
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
api_version: "preview"
mode: responses
drop_params: true
additional_drop_params: ["temperature"]
model_info:
mode: responses
max_tokens: 100000
max_input_tokens: 200000
max_output_tokens: 100000
input_cost_per_token: 0.00002
output_cost_per_token: 0.00008
input_cost_per_token_batches: 0.00001
output_cost_per_token_batches: 0.00004
supports_function_calling: true
supports_parallel_function_calling: false
supports_vision: true
supports_pdf_input: true
supports_prompt_caching: true
supports_response_schema: true
supports_reasoning: true
supports_tool_choice: true
supported_endpoints:
- /v1/responses
- /v1/batch
supported_modalities:
- text
- image
supported_output_modalities:
- text
# --------------Other Settings--------------------
litellm_settings:
# Networking settings
request_timeout: 4000 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
num_retries: 3
# fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
drop_params: true
general_settings:
master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
# router_settings:
# fallbacks:
# [
# { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
# { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
# ]
# model_group_alias: { "gpt-4": "gemini-1.5-pro" }
# routing_strategy: simple-shuffle