Spaces:
Running
Running
File size: 3,186 Bytes
f54dfc9 858ac48 fa497b5 8c59718 fa497b5 fff312f 44f8f01 fa497b5 8c59718 44f8f01 3f2b7cb 44f8f01 fa497b5 8c0b4cc fa497b5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
model_list:
- model_name: azure_o3-pro
litellm_params:
model: azure/azure_o3-pro
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
api_version: "preview"
drop_params: true
additional_drop_params: ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"]
reasoning:
effort: high
summary: detailed
text:
verbosity: high
model_info:
mode: responses
background: True
max_tokens: 100000
max_input_tokens: 200000
max_output_tokens: 100000
input_cost_per_token: 0.00002
output_cost_per_token: 0.00008
input_cost_per_token_batches: 0.00001
output_cost_per_token_batches: 0.00004
supports_function_calling: true
supports_parallel_function_calling: false
supports_vision: true
supports_pdf_input: true
supports_prompt_caching: true
supports_response_schema: true
supports_reasoning: true
supports_tool_choice: true
supported_endpoints:
- /v1/responses
- /v1/batch
supported_modalities:
- text
- image
supported_output_modalities:
- text
- model_name: azure_gpt-5
litellm_params:
model: azure/azure_gpt-5
api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
api_version: preview
api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
merge_reasoning_content_in_choices: true
drop_params: True
additional_drop_params: ["temperature", "top_p", "presence_penalty", "frequency_penalty", "logprobs", "top_logprobs", "logit_bias", "max_tokens"]
reasoning:
effort: high
summary: detailed
model_info:
supports_reasoning: True
max_input_tokens: 128000
max_output_tokens: 64000
supports_tool_choice: True
supports_vision: True
supports_response_schema: True
supports_prompt_caching: True
background: True
mode: responses
input_cost_per_token: 0.00000125
output_cost_per_token: 0.000010
# --------------Other Settings--------------------
litellm_settings:
# Networking settings
request_timeout: 4000 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
num_retries: 3
# fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
drop_params: true
general_settings:
master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
# router_settings:
# fallbacks:
# [
# { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
# { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
# ]
# model_group_alias: { "gpt-4": "gemini-1.5-pro" }
# routing_strategy: simple-shuffle
|