Spaces:

jmpaz
/

report-webui

Paused

App Files Files Community

report-webui / .venv /lib /python3.11 /site-packages /anthropic /resources /completions.py

jmpaz

Upload folder using huggingface_hub

54b9ca1 verified about 1 year ago

raw

history blame contribute delete

34.6 kB

	# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

	from __future__ import annotations

	from typing import List, Union, overload
	from typing_extensions import Literal

	import httpx

	from .. import _legacy_response
	from ..types import completion_create_params
	from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
	from .._utils import (
	required_args,
	maybe_transform,
	async_maybe_transform,
	)
	from .._compat import cached_property
	from .._resource import SyncAPIResource, AsyncAPIResource
	from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
	from .._streaming import Stream, AsyncStream
	from .._base_client import make_request_options
	from ..types.completion import Completion

	__all__ = ["Completions", "AsyncCompletions"]


	class Completions(SyncAPIResource):
	@cached_property
	def with_raw_response(self) -> CompletionsWithRawResponse:
	return CompletionsWithRawResponse(self)

	@cached_property
	def with_streaming_response(self) -> CompletionsWithStreamingResponse:
	return CompletionsWithStreamingResponse(self)

	@overload
	def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	stream: Literal[False] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Completion:
	"""[Legacy] Create a Text Completion.

	The Text Completions API is a legacy API.

	We recommend using the
	[Messages API](https://docs.anthropic.com/en/api/messages) going forward.

	Future models and features will not be compatible with Text Completions. See our
	[migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
	for guidance in migrating from Text Completions to Messages.

	Args:
	max_tokens_to_sample: The maximum number of tokens to generate before stopping.

	Note that our models may stop _before_ reaching this maximum. This parameter
	only specifies the absolute maximum number of tokens to generate.

	model: The model that will complete your prompt.

	See [models](https://docs.anthropic.com/en/docs/models-overview) for additional
	details and options.

	prompt: The prompt that you want Claude to complete.

	For proper response generation you will need to format your prompt using
	alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

	```
	"\n\nHuman: {userQuestion}\n\nAssistant:"
	```

	See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
	our guide to
	[prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
	details.

	metadata: An object describing metadata about the request.

	stop_sequences: Sequences that will cause the model to stop generating.

	Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
	sequences in the future. By providing the stop_sequences parameter, you may
	include additional strings that will cause the model to stop generating.

	stream: Whether to incrementally stream the response using server-sent events.

	See [streaming](https://docs.anthropic.com/en/api/streaming) for details.

	temperature: Amount of randomness injected into the response.

	Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
	for analytical / multiple choice, and closer to `1.0` for creative and
	generative tasks.

	Note that even with `temperature` of `0.0`, the results will not be fully
	deterministic.

	top_k: Only sample from the top K options for each subsequent token.

	Used to remove "long tail" low probability responses.
	[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	top_p: Use nucleus sampling.

	In nucleus sampling, we compute the cumulative distribution over all the options
	for each subsequent token in decreasing probability order and cut it off once it
	reaches a particular probability specified by `top_p`. You should either alter
	`temperature` or `top_p`, but not both.

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	extra_headers: Send extra headers

	extra_query: Add additional query parameters to the request

	extra_body: Add additional JSON properties to the request

	timeout: Override the client-level default timeout for this request, in seconds
	"""
	...

	@overload
	def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	stream: Literal[True],
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Stream[Completion]:
	"""[Legacy] Create a Text Completion.

	The Text Completions API is a legacy API.

	We recommend using the
	[Messages API](https://docs.anthropic.com/en/api/messages) going forward.

	Future models and features will not be compatible with Text Completions. See our
	[migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
	for guidance in migrating from Text Completions to Messages.

	Args:
	max_tokens_to_sample: The maximum number of tokens to generate before stopping.

	Note that our models may stop _before_ reaching this maximum. This parameter
	only specifies the absolute maximum number of tokens to generate.

	model: The model that will complete your prompt.

	See [models](https://docs.anthropic.com/en/docs/models-overview) for additional
	details and options.

	prompt: The prompt that you want Claude to complete.

	For proper response generation you will need to format your prompt using
	alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

	```
	"\n\nHuman: {userQuestion}\n\nAssistant:"
	```

	See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
	our guide to
	[prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
	details.

	stream: Whether to incrementally stream the response using server-sent events.

	See [streaming](https://docs.anthropic.com/en/api/streaming) for details.

	metadata: An object describing metadata about the request.

	stop_sequences: Sequences that will cause the model to stop generating.

	Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
	sequences in the future. By providing the stop_sequences parameter, you may
	include additional strings that will cause the model to stop generating.

	temperature: Amount of randomness injected into the response.

	Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
	for analytical / multiple choice, and closer to `1.0` for creative and
	generative tasks.

	Note that even with `temperature` of `0.0`, the results will not be fully
	deterministic.

	top_k: Only sample from the top K options for each subsequent token.

	Used to remove "long tail" low probability responses.
	[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	top_p: Use nucleus sampling.

	In nucleus sampling, we compute the cumulative distribution over all the options
	for each subsequent token in decreasing probability order and cut it off once it
	reaches a particular probability specified by `top_p`. You should either alter
	`temperature` or `top_p`, but not both.

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	extra_headers: Send extra headers

	extra_query: Add additional query parameters to the request

	extra_body: Add additional JSON properties to the request

	timeout: Override the client-level default timeout for this request, in seconds
	"""
	...

	@overload
	def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	stream: bool,
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Completion \| Stream[Completion]:
	"""[Legacy] Create a Text Completion.

	The Text Completions API is a legacy API.

	We recommend using the
	[Messages API](https://docs.anthropic.com/en/api/messages) going forward.

	Future models and features will not be compatible with Text Completions. See our
	[migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
	for guidance in migrating from Text Completions to Messages.

	Args:
	max_tokens_to_sample: The maximum number of tokens to generate before stopping.

	Note that our models may stop _before_ reaching this maximum. This parameter
	only specifies the absolute maximum number of tokens to generate.

	model: The model that will complete your prompt.

	See [models](https://docs.anthropic.com/en/docs/models-overview) for additional
	details and options.

	prompt: The prompt that you want Claude to complete.

	For proper response generation you will need to format your prompt using
	alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

	```
	"\n\nHuman: {userQuestion}\n\nAssistant:"
	```

	See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
	our guide to
	[prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
	details.

	stream: Whether to incrementally stream the response using server-sent events.

	See [streaming](https://docs.anthropic.com/en/api/streaming) for details.

	metadata: An object describing metadata about the request.

	stop_sequences: Sequences that will cause the model to stop generating.

	Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
	sequences in the future. By providing the stop_sequences parameter, you may
	include additional strings that will cause the model to stop generating.

	temperature: Amount of randomness injected into the response.

	Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
	for analytical / multiple choice, and closer to `1.0` for creative and
	generative tasks.

	Note that even with `temperature` of `0.0`, the results will not be fully
	deterministic.

	top_k: Only sample from the top K options for each subsequent token.

	Used to remove "long tail" low probability responses.
	[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	top_p: Use nucleus sampling.

	In nucleus sampling, we compute the cumulative distribution over all the options
	for each subsequent token in decreasing probability order and cut it off once it
	reaches a particular probability specified by `top_p`. You should either alter
	`temperature` or `top_p`, but not both.

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	extra_headers: Send extra headers

	extra_query: Add additional query parameters to the request

	extra_body: Add additional JSON properties to the request

	timeout: Override the client-level default timeout for this request, in seconds
	"""
	...

	@required_args(["max_tokens_to_sample", "model", "prompt"], ["max_tokens_to_sample", "model", "prompt", "stream"])
	def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	stream: Literal[False] \| Literal[True] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Completion \| Stream[Completion]:
	return self._post(
	"/v1/complete",
	body=maybe_transform(
	{
	"max_tokens_to_sample": max_tokens_to_sample,
	"model": model,
	"prompt": prompt,
	"metadata": metadata,
	"stop_sequences": stop_sequences,
	"stream": stream,
	"temperature": temperature,
	"top_k": top_k,
	"top_p": top_p,
	},
	completion_create_params.CompletionCreateParams,
	),
	options=make_request_options(
	extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
	),
	cast_to=Completion,
	stream=stream or False,
	stream_cls=Stream[Completion],
	)


	class AsyncCompletions(AsyncAPIResource):
	@cached_property
	def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
	return AsyncCompletionsWithRawResponse(self)

	@cached_property
	def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
	return AsyncCompletionsWithStreamingResponse(self)

	@overload
	async def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	stream: Literal[False] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Completion:
	"""[Legacy] Create a Text Completion.

	The Text Completions API is a legacy API.

	We recommend using the
	[Messages API](https://docs.anthropic.com/en/api/messages) going forward.

	Future models and features will not be compatible with Text Completions. See our
	[migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
	for guidance in migrating from Text Completions to Messages.

	Args:
	max_tokens_to_sample: The maximum number of tokens to generate before stopping.

	Note that our models may stop _before_ reaching this maximum. This parameter
	only specifies the absolute maximum number of tokens to generate.

	model: The model that will complete your prompt.

	See [models](https://docs.anthropic.com/en/docs/models-overview) for additional
	details and options.

	prompt: The prompt that you want Claude to complete.

	For proper response generation you will need to format your prompt using
	alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

	```
	"\n\nHuman: {userQuestion}\n\nAssistant:"
	```

	See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
	our guide to
	[prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
	details.

	metadata: An object describing metadata about the request.

	stop_sequences: Sequences that will cause the model to stop generating.

	Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
	sequences in the future. By providing the stop_sequences parameter, you may
	include additional strings that will cause the model to stop generating.

	stream: Whether to incrementally stream the response using server-sent events.

	See [streaming](https://docs.anthropic.com/en/api/streaming) for details.

	temperature: Amount of randomness injected into the response.

	Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
	for analytical / multiple choice, and closer to `1.0` for creative and
	generative tasks.

	Note that even with `temperature` of `0.0`, the results will not be fully
	deterministic.

	top_k: Only sample from the top K options for each subsequent token.

	Used to remove "long tail" low probability responses.
	[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	top_p: Use nucleus sampling.

	In nucleus sampling, we compute the cumulative distribution over all the options
	for each subsequent token in decreasing probability order and cut it off once it
	reaches a particular probability specified by `top_p`. You should either alter
	`temperature` or `top_p`, but not both.

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	extra_headers: Send extra headers

	extra_query: Add additional query parameters to the request

	extra_body: Add additional JSON properties to the request

	timeout: Override the client-level default timeout for this request, in seconds
	"""
	...

	@overload
	async def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	stream: Literal[True],
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> AsyncStream[Completion]:
	"""[Legacy] Create a Text Completion.

	The Text Completions API is a legacy API.

	We recommend using the
	[Messages API](https://docs.anthropic.com/en/api/messages) going forward.

	Future models and features will not be compatible with Text Completions. See our
	[migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
	for guidance in migrating from Text Completions to Messages.

	Args:
	max_tokens_to_sample: The maximum number of tokens to generate before stopping.

	Note that our models may stop _before_ reaching this maximum. This parameter
	only specifies the absolute maximum number of tokens to generate.

	model: The model that will complete your prompt.

	See [models](https://docs.anthropic.com/en/docs/models-overview) for additional
	details and options.

	prompt: The prompt that you want Claude to complete.

	For proper response generation you will need to format your prompt using
	alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

	```
	"\n\nHuman: {userQuestion}\n\nAssistant:"
	```

	See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
	our guide to
	[prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
	details.

	stream: Whether to incrementally stream the response using server-sent events.

	See [streaming](https://docs.anthropic.com/en/api/streaming) for details.

	metadata: An object describing metadata about the request.

	stop_sequences: Sequences that will cause the model to stop generating.

	Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
	sequences in the future. By providing the stop_sequences parameter, you may
	include additional strings that will cause the model to stop generating.

	temperature: Amount of randomness injected into the response.

	Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
	for analytical / multiple choice, and closer to `1.0` for creative and
	generative tasks.

	Note that even with `temperature` of `0.0`, the results will not be fully
	deterministic.

	top_k: Only sample from the top K options for each subsequent token.

	Used to remove "long tail" low probability responses.
	[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	top_p: Use nucleus sampling.

	In nucleus sampling, we compute the cumulative distribution over all the options
	for each subsequent token in decreasing probability order and cut it off once it
	reaches a particular probability specified by `top_p`. You should either alter
	`temperature` or `top_p`, but not both.

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	extra_headers: Send extra headers

	extra_query: Add additional query parameters to the request

	extra_body: Add additional JSON properties to the request

	timeout: Override the client-level default timeout for this request, in seconds
	"""
	...

	@overload
	async def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	stream: bool,
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Completion \| AsyncStream[Completion]:
	"""[Legacy] Create a Text Completion.

	The Text Completions API is a legacy API.

	We recommend using the
	[Messages API](https://docs.anthropic.com/en/api/messages) going forward.

	Future models and features will not be compatible with Text Completions. See our
	[migration guide](https://docs.anthropic.com/en/api/migrating-from-text-completions-to-messages)
	for guidance in migrating from Text Completions to Messages.

	Args:
	max_tokens_to_sample: The maximum number of tokens to generate before stopping.

	Note that our models may stop _before_ reaching this maximum. This parameter
	only specifies the absolute maximum number of tokens to generate.

	model: The model that will complete your prompt.

	See [models](https://docs.anthropic.com/en/docs/models-overview) for additional
	details and options.

	prompt: The prompt that you want Claude to complete.

	For proper response generation you will need to format your prompt using
	alternating `\n\nHuman:` and `\n\nAssistant:` conversational turns. For example:

	```
	"\n\nHuman: {userQuestion}\n\nAssistant:"
	```

	See [prompt validation](https://docs.anthropic.com/en/api/prompt-validation) and
	our guide to
	[prompt design](https://docs.anthropic.com/en/docs/intro-to-prompting) for more
	details.

	stream: Whether to incrementally stream the response using server-sent events.

	See [streaming](https://docs.anthropic.com/en/api/streaming) for details.

	metadata: An object describing metadata about the request.

	stop_sequences: Sequences that will cause the model to stop generating.

	Our models stop on `"\n\nHuman:"`, and may include additional built-in stop
	sequences in the future. By providing the stop_sequences parameter, you may
	include additional strings that will cause the model to stop generating.

	temperature: Amount of randomness injected into the response.

	Defaults to `1.0`. Ranges from `0.0` to `1.0`. Use `temperature` closer to `0.0`
	for analytical / multiple choice, and closer to `1.0` for creative and
	generative tasks.

	Note that even with `temperature` of `0.0`, the results will not be fully
	deterministic.

	top_k: Only sample from the top K options for each subsequent token.

	Used to remove "long tail" low probability responses.
	[Learn more technical details here](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277).

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	top_p: Use nucleus sampling.

	In nucleus sampling, we compute the cumulative distribution over all the options
	for each subsequent token in decreasing probability order and cut it off once it
	reaches a particular probability specified by `top_p`. You should either alter
	`temperature` or `top_p`, but not both.

	Recommended for advanced use cases only. You usually only need to use
	`temperature`.

	extra_headers: Send extra headers

	extra_query: Add additional query parameters to the request

	extra_body: Add additional JSON properties to the request

	timeout: Override the client-level default timeout for this request, in seconds
	"""
	...

	@required_args(["max_tokens_to_sample", "model", "prompt"], ["max_tokens_to_sample", "model", "prompt", "stream"])
	async def create(
	self,
	*,
	max_tokens_to_sample: int,
	model: Union[str, Literal["claude-2.0", "claude-2.1", "claude-instant-1.2"]],
	prompt: str,
	metadata: completion_create_params.Metadata \| NotGiven = NOT_GIVEN,
	stop_sequences: List[str] \| NotGiven = NOT_GIVEN,
	stream: Literal[False] \| Literal[True] \| NotGiven = NOT_GIVEN,
	temperature: float \| NotGiven = NOT_GIVEN,
	top_k: int \| NotGiven = NOT_GIVEN,
	top_p: float \| NotGiven = NOT_GIVEN,
	# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
	# The extra values given here take precedence over values defined on the client or passed to this method.
	extra_headers: Headers \| None = None,
	extra_query: Query \| None = None,
	extra_body: Body \| None = None,
	timeout: float \| httpx.Timeout \| None \| NotGiven = 600,
	) -> Completion \| AsyncStream[Completion]:
	return await self._post(
	"/v1/complete",
	body=await async_maybe_transform(
	{
	"max_tokens_to_sample": max_tokens_to_sample,
	"model": model,
	"prompt": prompt,
	"metadata": metadata,
	"stop_sequences": stop_sequences,
	"stream": stream,
	"temperature": temperature,
	"top_k": top_k,
	"top_p": top_p,
	},
	completion_create_params.CompletionCreateParams,
	),
	options=make_request_options(
	extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
	),
	cast_to=Completion,
	stream=stream or False,
	stream_cls=AsyncStream[Completion],
	)


	class CompletionsWithRawResponse:
	def __init__(self, completions: Completions) -> None:
	self._completions = completions

	self.create = _legacy_response.to_raw_response_wrapper(
	completions.create,
	)


	class AsyncCompletionsWithRawResponse:
	def __init__(self, completions: AsyncCompletions) -> None:
	self._completions = completions

	self.create = _legacy_response.async_to_raw_response_wrapper(
	completions.create,
	)


	class CompletionsWithStreamingResponse:
	def __init__(self, completions: Completions) -> None:
	self._completions = completions

	self.create = to_streamed_response_wrapper(
	completions.create,
	)


	class AsyncCompletionsWithStreamingResponse:
	def __init__(self, completions: AsyncCompletions) -> None:
	self._completions = completions

	self.create = async_to_streamed_response_wrapper(
	completions.create,
	)