Duibonduil's picture
Upload 11 files
7a3c0ee verified
from aworld.metrics.context_manager import MetricContext
from aworld.metrics.template import MetricTemplate
from aworld.metrics.metric import MetricType
tokens_usage_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="llm_token_usage",
unit="token",
description="Measures number of input and output tokens used"
)
chat_choice_counter = MetricTemplate(
type=MetricType.COUNTER,
name="llm_generation_choice_counter",
unit="choice",
description="Number of choices returned by chat completions call"
)
duration_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="llm_chat_duration",
unit="s",
description="AI chat duration",
)
chat_exception_counter = MetricTemplate(
type=MetricType.COUNTER,
name="llm_chat_exception_counter",
unit="time",
description="Number of exceptions occurred during chat completions",
)
streaming_time_to_first_token_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="llm_streaming_time_to_first_token",
unit="s",
description="Time to first token in streaming chat completions",
)
streaming_time_to_generate_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="streaming_time_to_generate",
unit="s",
description="Time between first token and completion in streaming chat completions",
)
def record_exception_metric(exception, duration):
'''
record chat exception to metrics
'''
if MetricContext.metric_initialized():
labels = {
"error.type": exception.__class__.__name__,
}
if duration_histogram:
MetricContext.histogram_record(
duration_histogram, duration, labels=labels)
if chat_exception_counter:
MetricContext.count(
chat_exception_counter, 1, labels=labels)
def record_streaming_time_to_first_token(duration, labels):
'''
Record duration of start time to first token in stream.
'''
if MetricContext.metric_initialized():
MetricContext.histogram_record(
streaming_time_to_first_token_histogram, duration, labels=labels)
def record_streaming_time_to_generate(first_token_to_generate_duration, labels):
'''
Record duration the first token to response to generation
'''
if MetricContext.metric_initialized():
MetricContext.histogram_record(
streaming_time_to_generate_histogram, first_token_to_generate_duration, labels=labels)
def record_chat_response_metric(attributes,
prompt_tokens,
completion_tokens,
duration,
choices=None
):
'''
Record chat response to metrics
'''
if MetricContext.metric_initialized():
if prompt_tokens and tokens_usage_histogram:
labels = {
**attributes,
"llm.prompt_usage_type": "prompt_tokens"
}
MetricContext.histogram_record(
tokens_usage_histogram, prompt_tokens, labels=labels)
if completion_tokens and tokens_usage_histogram:
labels = {
**attributes,
"llm.prompt_usage_type": "completion_tokens"
}
MetricContext.histogram_record(
tokens_usage_histogram, completion_tokens, labels=labels)
if duration and duration_histogram:
MetricContext.histogram_record(
duration_histogram, duration, labels=attributes)
if choices and chat_choice_counter:
MetricContext.count(chat_choice_counter,
len(choices), labels=attributes)
for choice in choices:
if choice.get("finish_reason"):
finish_reason_attr = {
**attributes,
"llm.finish_reason": choice.get("finish_reason")
}
MetricContext.count(
chat_choice_counter, 1, labels=finish_reason_attr)