Spaces:
Sleeping
Sleeping
from aworld.metrics.context_manager import MetricContext | |
from aworld.metrics.template import MetricTemplate | |
from aworld.metrics.metric import MetricType | |
tokens_usage_histogram = MetricTemplate( | |
type=MetricType.HISTOGRAM, | |
name="llm_token_usage", | |
unit="token", | |
description="Measures number of input and output tokens used" | |
) | |
chat_choice_counter = MetricTemplate( | |
type=MetricType.COUNTER, | |
name="llm_generation_choice_counter", | |
unit="choice", | |
description="Number of choices returned by chat completions call" | |
) | |
duration_histogram = MetricTemplate( | |
type=MetricType.HISTOGRAM, | |
name="llm_chat_duration", | |
unit="s", | |
description="AI chat duration", | |
) | |
chat_exception_counter = MetricTemplate( | |
type=MetricType.COUNTER, | |
name="llm_chat_exception_counter", | |
unit="time", | |
description="Number of exceptions occurred during chat completions", | |
) | |
streaming_time_to_first_token_histogram = MetricTemplate( | |
type=MetricType.HISTOGRAM, | |
name="llm_streaming_time_to_first_token", | |
unit="s", | |
description="Time to first token in streaming chat completions", | |
) | |
streaming_time_to_generate_histogram = MetricTemplate( | |
type=MetricType.HISTOGRAM, | |
name="streaming_time_to_generate", | |
unit="s", | |
description="Time between first token and completion in streaming chat completions", | |
) | |
def record_exception_metric(exception, duration): | |
''' | |
record chat exception to metrics | |
''' | |
if MetricContext.metric_initialized(): | |
labels = { | |
"error.type": exception.__class__.__name__, | |
} | |
if duration_histogram: | |
MetricContext.histogram_record( | |
duration_histogram, duration, labels=labels) | |
if chat_exception_counter: | |
MetricContext.count( | |
chat_exception_counter, 1, labels=labels) | |
def record_streaming_time_to_first_token(duration, labels): | |
''' | |
Record duration of start time to first token in stream. | |
''' | |
if MetricContext.metric_initialized(): | |
MetricContext.histogram_record( | |
streaming_time_to_first_token_histogram, duration, labels=labels) | |
def record_streaming_time_to_generate(first_token_to_generate_duration, labels): | |
''' | |
Record duration the first token to response to generation | |
''' | |
if MetricContext.metric_initialized(): | |
MetricContext.histogram_record( | |
streaming_time_to_generate_histogram, first_token_to_generate_duration, labels=labels) | |
def record_chat_response_metric(attributes, | |
prompt_tokens, | |
completion_tokens, | |
duration, | |
choices=None | |
): | |
''' | |
Record chat response to metrics | |
''' | |
if MetricContext.metric_initialized(): | |
if prompt_tokens and tokens_usage_histogram: | |
labels = { | |
**attributes, | |
"llm.prompt_usage_type": "prompt_tokens" | |
} | |
MetricContext.histogram_record( | |
tokens_usage_histogram, prompt_tokens, labels=labels) | |
if completion_tokens and tokens_usage_histogram: | |
labels = { | |
**attributes, | |
"llm.prompt_usage_type": "completion_tokens" | |
} | |
MetricContext.histogram_record( | |
tokens_usage_histogram, completion_tokens, labels=labels) | |
if duration and duration_histogram: | |
MetricContext.histogram_record( | |
duration_histogram, duration, labels=attributes) | |
if choices and chat_choice_counter: | |
MetricContext.count(chat_choice_counter, | |
len(choices), labels=attributes) | |
for choice in choices: | |
if choice.get("finish_reason"): | |
finish_reason_attr = { | |
**attributes, | |
"llm.finish_reason": choice.get("finish_reason") | |
} | |
MetricContext.count( | |
chat_choice_counter, 1, labels=finish_reason_attr) | |