Spaces:
Sleeping
Sleeping
File size: 4,128 Bytes
7a3c0ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
from aworld.metrics.context_manager import MetricContext
from aworld.metrics.template import MetricTemplate
from aworld.metrics.metric import MetricType
tokens_usage_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="llm_token_usage",
unit="token",
description="Measures number of input and output tokens used"
)
chat_choice_counter = MetricTemplate(
type=MetricType.COUNTER,
name="llm_generation_choice_counter",
unit="choice",
description="Number of choices returned by chat completions call"
)
duration_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="llm_chat_duration",
unit="s",
description="AI chat duration",
)
chat_exception_counter = MetricTemplate(
type=MetricType.COUNTER,
name="llm_chat_exception_counter",
unit="time",
description="Number of exceptions occurred during chat completions",
)
streaming_time_to_first_token_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="llm_streaming_time_to_first_token",
unit="s",
description="Time to first token in streaming chat completions",
)
streaming_time_to_generate_histogram = MetricTemplate(
type=MetricType.HISTOGRAM,
name="streaming_time_to_generate",
unit="s",
description="Time between first token and completion in streaming chat completions",
)
def record_exception_metric(exception, duration):
'''
record chat exception to metrics
'''
if MetricContext.metric_initialized():
labels = {
"error.type": exception.__class__.__name__,
}
if duration_histogram:
MetricContext.histogram_record(
duration_histogram, duration, labels=labels)
if chat_exception_counter:
MetricContext.count(
chat_exception_counter, 1, labels=labels)
def record_streaming_time_to_first_token(duration, labels):
'''
Record duration of start time to first token in stream.
'''
if MetricContext.metric_initialized():
MetricContext.histogram_record(
streaming_time_to_first_token_histogram, duration, labels=labels)
def record_streaming_time_to_generate(first_token_to_generate_duration, labels):
'''
Record duration the first token to response to generation
'''
if MetricContext.metric_initialized():
MetricContext.histogram_record(
streaming_time_to_generate_histogram, first_token_to_generate_duration, labels=labels)
def record_chat_response_metric(attributes,
prompt_tokens,
completion_tokens,
duration,
choices=None
):
'''
Record chat response to metrics
'''
if MetricContext.metric_initialized():
if prompt_tokens and tokens_usage_histogram:
labels = {
**attributes,
"llm.prompt_usage_type": "prompt_tokens"
}
MetricContext.histogram_record(
tokens_usage_histogram, prompt_tokens, labels=labels)
if completion_tokens and tokens_usage_histogram:
labels = {
**attributes,
"llm.prompt_usage_type": "completion_tokens"
}
MetricContext.histogram_record(
tokens_usage_histogram, completion_tokens, labels=labels)
if duration and duration_histogram:
MetricContext.histogram_record(
duration_histogram, duration, labels=attributes)
if choices and chat_choice_counter:
MetricContext.count(chat_choice_counter,
len(choices), labels=attributes)
for choice in choices:
if choice.get("finish_reason"):
finish_reason_attr = {
**attributes,
"llm.finish_reason": choice.get("finish_reason")
}
MetricContext.count(
chat_choice_counter, 1, labels=finish_reason_attr)
|