Spaces:
Sleeping
Sleeping

Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.
966ffcd
"""Cost tracking service for Azure OpenAI and Document Intelligence.""" | |
from dataclasses import dataclass | |
from typing import Dict, Optional, List | |
import logging | |
from datetime import datetime | |
import pandas as pd | |
logger = logging.getLogger(__name__) | |
class TokenCosts: | |
"""Costs per 1M tokens for different models.""" | |
GPT41: float = 2.0 # $10 per 1M input tokens for GPT-4.1 | |
GPT41_OUTPUT: float = 8.0 # $30 per 1M output tokens for GPT-4.1 | |
class DocumentIntelligenceCosts: | |
"""Costs for Document Intelligence.""" | |
READ: float = 1.5 # $1.5 per 1,000 pages | |
LAYOUT: float = 10.0 # $10 per 1,000 pages | |
CUSTOM: float = 30.0 # $30 per 1,000 pages | |
class LLMCall: | |
"""Represents a single LLM call with its details.""" | |
description: str | |
input_tokens: int | |
output_tokens: int | |
timestamp: datetime = datetime.now() | |
def input_cost(self) -> float: | |
return (self.input_tokens / 1_000_000) * TokenCosts.GPT41 | |
def output_cost(self) -> float: | |
return (self.output_tokens / 1_000_000) * TokenCosts.GPT41_OUTPUT | |
def total_cost(self) -> float: | |
return self.input_cost + self.output_cost | |
class CostTracker: | |
"""Tracks costs for Azure OpenAI and Document Intelligence usage.""" | |
def __init__(self): | |
self.token_costs = TokenCosts() | |
self.di_costs = DocumentIntelligenceCosts() | |
self.current_file_costs = { | |
"input_tokens": 0, | |
"output_tokens": 0, | |
"di_pages": 0, | |
"di_operations": {} # Track different DI operations | |
} | |
self.total_costs = { | |
"input_tokens": 0, | |
"output_tokens": 0, | |
"di_pages": 0, | |
"di_operations": {} | |
} | |
self.llm_input_tokens = 0 | |
self.llm_output_tokens = 0 | |
self.di_pages = 0 | |
self.llm_calls: List[LLMCall] = [] # Track individual LLM calls | |
def reset_current_file(self): | |
"""Reset costs for current file.""" | |
self.current_file_costs = { | |
"input_tokens": 0, | |
"output_tokens": 0, | |
"di_pages": 0, | |
"di_operations": {} | |
} | |
self.llm_calls = [] # Reset LLM calls for new file | |
def add_tokens(self, input_tokens: int, output_tokens: int, model: str = "GPT41"): | |
"""Add tokens for current file and total.""" | |
self.current_file_costs["input_tokens"] += input_tokens | |
self.current_file_costs["output_tokens"] += output_tokens | |
self.total_costs["input_tokens"] += input_tokens | |
self.total_costs["output_tokens"] += output_tokens | |
logger.info(f"Added tokens - Input: {input_tokens}, Output: {output_tokens} for model {model}") | |
def add_di_operation(self, operation: str, pages: int): | |
"""Add Document Intelligence operation costs.""" | |
if operation not in self.current_file_costs["di_operations"]: | |
self.current_file_costs["di_operations"][operation] = 0 | |
self.total_costs["di_operations"][operation] = 0 | |
self.current_file_costs["di_operations"][operation] += pages | |
self.current_file_costs["di_pages"] += pages | |
self.total_costs["di_operations"][operation] += pages | |
self.total_costs["di_pages"] += pages | |
logger.info(f"Added DI operation - {operation}: {pages} pages") | |
def calculate_current_file_costs(self) -> Dict: | |
"""Calculate costs for current file.""" | |
costs = { | |
"openai": { | |
"input_tokens": self.current_file_costs["input_tokens"], | |
"output_tokens": self.current_file_costs["output_tokens"], | |
"input_cost": (self.current_file_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41, | |
"output_cost": (self.current_file_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT, | |
"total_cost": 0, | |
"calls": [call.__dict__ for call in self.llm_calls] # Include detailed call information | |
}, | |
"document_intelligence": { | |
"total_pages": self.current_file_costs["di_pages"], | |
"operations": {}, | |
"total_cost": 0 | |
} | |
} | |
# Calculate OpenAI total cost | |
costs["openai"]["total_cost"] = costs["openai"]["input_cost"] + costs["openai"]["output_cost"] | |
# Calculate Document Intelligence costs | |
for operation, pages in self.current_file_costs["di_operations"].items(): | |
cost = (pages / 1000) * getattr(self.di_costs, operation.upper(), self.di_costs.READ) | |
costs["document_intelligence"]["operations"][operation] = { | |
"pages": pages, | |
"cost": cost | |
} | |
costs["document_intelligence"]["total_cost"] += cost | |
return costs | |
def get_total_costs(self) -> Dict: | |
"""Get total costs across all files.""" | |
return { | |
"openai": { | |
"input_tokens": self.total_costs["input_tokens"], | |
"output_tokens": self.total_costs["output_tokens"], | |
"input_cost": (self.total_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41, | |
"output_cost": (self.total_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT, | |
"total_cost": 0 | |
}, | |
"document_intelligence": { | |
"total_pages": self.total_costs["di_pages"], | |
"operations": {}, | |
"total_cost": 0 | |
} | |
} | |
def add_llm_tokens(self, input_tokens, output_tokens, description: str = "LLM Call"): | |
"""Add tokens for an LLM call with a description.""" | |
self.llm_input_tokens += input_tokens | |
self.llm_output_tokens += output_tokens | |
# Also update the current file costs | |
self.current_file_costs["input_tokens"] += input_tokens | |
self.current_file_costs["output_tokens"] += output_tokens | |
self.total_costs["input_tokens"] += input_tokens | |
self.total_costs["output_tokens"] += output_tokens | |
# Add to LLM calls list | |
self.llm_calls.append(LLMCall( | |
description=description, | |
input_tokens=input_tokens, | |
output_tokens=output_tokens | |
)) | |
def add_di_pages(self, num_pages): | |
self.di_pages += num_pages | |
# Also update the current file costs | |
self.current_file_costs["di_pages"] += num_pages | |
self.total_costs["di_pages"] += num_pages | |
# Add to LAYOUT operation by default since we're using layout analysis | |
if "LAYOUT" not in self.current_file_costs["di_operations"]: | |
self.current_file_costs["di_operations"]["LAYOUT"] = 0 | |
self.total_costs["di_operations"]["LAYOUT"] = 0 | |
self.current_file_costs["di_operations"]["LAYOUT"] += num_pages | |
self.total_costs["di_operations"]["LAYOUT"] += num_pages | |
def total_cost(self): | |
"""Calculate total cost using the same pricing as calculate_current_file_costs.""" | |
costs = self.calculate_current_file_costs() | |
return costs["openai"]["total_cost"] + costs["document_intelligence"]["total_cost"] | |
def get_detailed_costs_table(self) -> pd.DataFrame: | |
"""Return a DataFrame of detailed costs.""" | |
if not self.llm_calls: | |
return pd.DataFrame() | |
# Create list of dictionaries for DataFrame | |
rows = [] | |
for call in self.llm_calls: | |
rows.append({ | |
'Description': call.description, | |
'Input Tokens': call.input_tokens, | |
'Output Tokens': call.output_tokens, | |
'Input Cost': f"${call.input_cost:.4f}", | |
'Output Cost': f"${call.output_cost:.4f}", | |
'Total Cost': f"${call.total_cost:.4f}" | |
}) | |
# Calculate totals | |
total_input = sum(call.input_tokens for call in self.llm_calls) | |
total_output = sum(call.output_tokens for call in self.llm_calls) | |
total_input_cost = sum(call.input_cost for call in self.llm_calls) | |
total_output_cost = sum(call.output_cost for call in self.llm_calls) | |
total_cost = total_input_cost + total_output_cost | |
# Add total row | |
rows.append({ | |
'Description': 'TOTAL', | |
'Input Tokens': total_input, | |
'Output Tokens': total_output, | |
'Input Cost': f"${total_input_cost:.4f}", | |
'Output Cost': f"${total_output_cost:.4f}", | |
'Total Cost': f"${total_cost:.4f}" | |
}) | |
# Create DataFrame | |
df = pd.DataFrame(rows) | |
# Set column order | |
df = df[['Description', 'Input Tokens', 'Output Tokens', 'Input Cost', 'Output Cost', 'Total Cost']] | |
return df |