Spaces:

levalencia
/

doctorecord

Sleeping

doctorecord / src /services /cost_tracker.py

Add cost tracking functionality across various components, including Executor, Planner, and FieldMapperAgent. Integrate CostTracker to monitor LLM and document intelligence costs, enhancing logging for cost-related metrics and providing detailed cost breakdowns in the user interface.

966ffcd 2 months ago

raw

history blame contribute delete

8.91 kB

	"""Cost tracking service for Azure OpenAI and Document Intelligence."""

	from dataclasses import dataclass
	from typing import Dict, Optional, List
	import logging
	from datetime import datetime
	import pandas as pd

	logger = logging.getLogger(__name__)

	@dataclass
	class TokenCosts:
	"""Costs per 1M tokens for different models."""
	GPT41: float = 2.0 # $10 per 1M input tokens for GPT-4.1
	GPT41_OUTPUT: float = 8.0 # $30 per 1M output tokens for GPT-4.1

	@dataclass
	class DocumentIntelligenceCosts:
	"""Costs for Document Intelligence."""
	READ: float = 1.5 # $1.5 per 1,000 pages
	LAYOUT: float = 10.0 # $10 per 1,000 pages
	CUSTOM: float = 30.0 # $30 per 1,000 pages

	@dataclass
	class LLMCall:
	"""Represents a single LLM call with its details."""
	description: str
	input_tokens: int
	output_tokens: int
	timestamp: datetime = datetime.now()

	@property
	def input_cost(self) -> float:
	return (self.input_tokens / 1_000_000) * TokenCosts.GPT41

	@property
	def output_cost(self) -> float:
	return (self.output_tokens / 1_000_000) * TokenCosts.GPT41_OUTPUT

	@property
	def total_cost(self) -> float:
	return self.input_cost + self.output_cost

	class CostTracker:
	"""Tracks costs for Azure OpenAI and Document Intelligence usage."""

	def __init__(self):
	self.token_costs = TokenCosts()
	self.di_costs = DocumentIntelligenceCosts()
	self.current_file_costs = {
	"input_tokens": 0,
	"output_tokens": 0,
	"di_pages": 0,
	"di_operations": {} # Track different DI operations
	}
	self.total_costs = {
	"input_tokens": 0,
	"output_tokens": 0,
	"di_pages": 0,
	"di_operations": {}
	}
	self.llm_input_tokens = 0
	self.llm_output_tokens = 0
	self.di_pages = 0
	self.llm_calls: List[LLMCall] = [] # Track individual LLM calls

	def reset_current_file(self):
	"""Reset costs for current file."""
	self.current_file_costs = {
	"input_tokens": 0,
	"output_tokens": 0,
	"di_pages": 0,
	"di_operations": {}
	}
	self.llm_calls = [] # Reset LLM calls for new file

	def add_tokens(self, input_tokens: int, output_tokens: int, model: str = "GPT41"):
	"""Add tokens for current file and total."""
	self.current_file_costs["input_tokens"] += input_tokens
	self.current_file_costs["output_tokens"] += output_tokens
	self.total_costs["input_tokens"] += input_tokens
	self.total_costs["output_tokens"] += output_tokens

	logger.info(f"Added tokens - Input: {input_tokens}, Output: {output_tokens} for model {model}")

	def add_di_operation(self, operation: str, pages: int):
	"""Add Document Intelligence operation costs."""
	if operation not in self.current_file_costs["di_operations"]:
	self.current_file_costs["di_operations"][operation] = 0
	self.total_costs["di_operations"][operation] = 0

	self.current_file_costs["di_operations"][operation] += pages
	self.current_file_costs["di_pages"] += pages
	self.total_costs["di_operations"][operation] += pages
	self.total_costs["di_pages"] += pages

	logger.info(f"Added DI operation - {operation}: {pages} pages")

	def calculate_current_file_costs(self) -> Dict:
	"""Calculate costs for current file."""
	costs = {
	"openai": {
	"input_tokens": self.current_file_costs["input_tokens"],
	"output_tokens": self.current_file_costs["output_tokens"],
	"input_cost": (self.current_file_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
	"output_cost": (self.current_file_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
	"total_cost": 0,
	"calls": [call.__dict__ for call in self.llm_calls] # Include detailed call information
	},
	"document_intelligence": {
	"total_pages": self.current_file_costs["di_pages"],
	"operations": {},
	"total_cost": 0
	}
	}

	# Calculate OpenAI total cost
	costs["openai"]["total_cost"] = costs["openai"]["input_cost"] + costs["openai"]["output_cost"]

	# Calculate Document Intelligence costs
	for operation, pages in self.current_file_costs["di_operations"].items():
	cost = (pages / 1000) * getattr(self.di_costs, operation.upper(), self.di_costs.READ)
	costs["document_intelligence"]["operations"][operation] = {
	"pages": pages,
	"cost": cost
	}
	costs["document_intelligence"]["total_cost"] += cost

	return costs

	def get_total_costs(self) -> Dict:
	"""Get total costs across all files."""
	return {
	"openai": {
	"input_tokens": self.total_costs["input_tokens"],
	"output_tokens": self.total_costs["output_tokens"],
	"input_cost": (self.total_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
	"output_cost": (self.total_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
	"total_cost": 0
	},
	"document_intelligence": {
	"total_pages": self.total_costs["di_pages"],
	"operations": {},
	"total_cost": 0
	}
	}

	def add_llm_tokens(self, input_tokens, output_tokens, description: str = "LLM Call"):
	"""Add tokens for an LLM call with a description."""
	self.llm_input_tokens += input_tokens
	self.llm_output_tokens += output_tokens
	# Also update the current file costs
	self.current_file_costs["input_tokens"] += input_tokens
	self.current_file_costs["output_tokens"] += output_tokens
	self.total_costs["input_tokens"] += input_tokens
	self.total_costs["output_tokens"] += output_tokens
	# Add to LLM calls list
	self.llm_calls.append(LLMCall(
	description=description,
	input_tokens=input_tokens,
	output_tokens=output_tokens
	))

	def add_di_pages(self, num_pages):
	self.di_pages += num_pages
	# Also update the current file costs
	self.current_file_costs["di_pages"] += num_pages
	self.total_costs["di_pages"] += num_pages
	# Add to LAYOUT operation by default since we're using layout analysis
	if "LAYOUT" not in self.current_file_costs["di_operations"]:
	self.current_file_costs["di_operations"]["LAYOUT"] = 0
	self.total_costs["di_operations"]["LAYOUT"] = 0
	self.current_file_costs["di_operations"]["LAYOUT"] += num_pages
	self.total_costs["di_operations"]["LAYOUT"] += num_pages

	def total_cost(self):
	"""Calculate total cost using the same pricing as calculate_current_file_costs."""
	costs = self.calculate_current_file_costs()
	return costs["openai"]["total_cost"] + costs["document_intelligence"]["total_cost"]

	def get_detailed_costs_table(self) -> pd.DataFrame:
	"""Return a DataFrame of detailed costs."""
	if not self.llm_calls:
	return pd.DataFrame()

	# Create list of dictionaries for DataFrame
	rows = []
	for call in self.llm_calls:
	rows.append({
	'Description': call.description,
	'Input Tokens': call.input_tokens,
	'Output Tokens': call.output_tokens,
	'Input Cost': f"${call.input_cost:.4f}",
	'Output Cost': f"${call.output_cost:.4f}",
	'Total Cost': f"${call.total_cost:.4f}"
	})

	# Calculate totals
	total_input = sum(call.input_tokens for call in self.llm_calls)
	total_output = sum(call.output_tokens for call in self.llm_calls)
	total_input_cost = sum(call.input_cost for call in self.llm_calls)
	total_output_cost = sum(call.output_cost for call in self.llm_calls)
	total_cost = total_input_cost + total_output_cost

	# Add total row
	rows.append({
	'Description': 'TOTAL',
	'Input Tokens': total_input,
	'Output Tokens': total_output,
	'Input Cost': f"${total_input_cost:.4f}",
	'Output Cost': f"${total_output_cost:.4f}",
	'Total Cost': f"${total_cost:.4f}"
	})

	# Create DataFrame
	df = pd.DataFrame(rows)

	# Set column order
	df = df[['Description', 'Input Tokens', 'Output Tokens', 'Input Cost', 'Output Cost', 'Total Cost']]

	return df