Spaces:

keungliang
/

LiteLLM

Running

App Files Files Community

LiteLLM / config.yaml

keungliang

Update config.yaml

63371ee verified about 1 month ago

raw

history blame contribute delete

2.08 kB

	model_list:
	- model_name: azure_o3-pro
	litellm_params:
	model: azure/azure_o3-pro
	api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
	api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
	api_version: "preview"
	mode: responses
	drop_params: true
	additional_drop_params: ["temperature"]
	model_info:
	mode: responses
	max_tokens: 100000
	max_input_tokens: 200000
	max_output_tokens: 100000
	input_cost_per_token: 0.00002
	output_cost_per_token: 0.00008
	input_cost_per_token_batches: 0.00001
	output_cost_per_token_batches: 0.00004
	supports_function_calling: true
	supports_parallel_function_calling: false
	supports_vision: true
	supports_pdf_input: true
	supports_prompt_caching: true
	supports_response_schema: true
	supports_reasoning: true
	supports_tool_choice: true
	supported_endpoints:
	- /v1/responses
	- /v1/batch
	supported_modalities:
	- text
	- image
	supported_output_modalities:
	- text

	# --------------Other Settings--------------------

	litellm_settings:
	# Networking settings
	request_timeout: 4000 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
	num_retries: 3
	# fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
	allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
	cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
	drop_params: true

	general_settings:
	master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)

	# router_settings:
	# fallbacks:
	# [
	# { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
	# { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
	# ]
	# model_group_alias: { "gpt-4": "gemini-1.5-pro" }
	# routing_strategy: simple-shuffle