Spaces:
Running
Running
model_list: | |
- model_name: azure_o3-pro | |
litellm_params: | |
model: azure/azure_o3-pro | |
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY | |
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL | |
api_version: "preview" | |
mode: responses | |
drop_params: true | |
additional_drop_params: ["temperature"] | |
model_info: | |
mode: responses | |
max_tokens: 100000 | |
max_input_tokens: 200000 | |
max_output_tokens: 100000 | |
input_cost_per_token: 0.00002 | |
output_cost_per_token: 0.00008 | |
input_cost_per_token_batches: 0.00001 | |
output_cost_per_token_batches: 0.00004 | |
supports_function_calling: true | |
supports_parallel_function_calling: false | |
supports_vision: true | |
supports_pdf_input: true | |
supports_prompt_caching: true | |
supports_response_schema: true | |
supports_reasoning: true | |
supports_tool_choice: true | |
supported_endpoints: | |
- /v1/responses | |
- /v1/batch | |
supported_modalities: | |
- text | |
- image | |
supported_output_modalities: | |
- text | |
# --------------Other Settings-------------------- | |
litellm_settings: | |
# Networking settings | |
request_timeout: 4000 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout | |
num_retries: 3 | |
# fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }] | |
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. | |
cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails | |
drop_params: true | |
general_settings: | |
master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234) | |
# router_settings: | |
# fallbacks: | |
# [ | |
# { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] }, | |
# { "gpt-3.5-turbo": ["gemini-1.5-flash"] }, | |
# ] | |
# model_group_alias: { "gpt-4": "gemini-1.5-pro" } | |
# routing_strategy: simple-shuffle | |