File size: 8,910 Bytes
966ffcd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
"""Cost tracking service for Azure OpenAI and Document Intelligence."""

from dataclasses import dataclass
from typing import Dict, Optional, List
import logging
from datetime import datetime
import pandas as pd

logger = logging.getLogger(__name__)

@dataclass
class TokenCosts:
    """Costs per 1M tokens for different models."""
    GPT41: float = 2.0  # $10 per 1M input tokens for GPT-4.1
    GPT41_OUTPUT: float = 8.0  # $30 per 1M output tokens for GPT-4.1

@dataclass
class DocumentIntelligenceCosts:
    """Costs for Document Intelligence."""
    READ: float = 1.5  # $1.5 per 1,000 pages
    LAYOUT: float = 10.0  # $10 per 1,000 pages
    CUSTOM: float = 30.0  # $30 per 1,000 pages

@dataclass
class LLMCall:
    """Represents a single LLM call with its details."""
    description: str
    input_tokens: int
    output_tokens: int
    timestamp: datetime = datetime.now()

    @property
    def input_cost(self) -> float:
        return (self.input_tokens / 1_000_000) * TokenCosts.GPT41

    @property
    def output_cost(self) -> float:
        return (self.output_tokens / 1_000_000) * TokenCosts.GPT41_OUTPUT

    @property
    def total_cost(self) -> float:
        return self.input_cost + self.output_cost

class CostTracker:
    """Tracks costs for Azure OpenAI and Document Intelligence usage."""
    
    def __init__(self):
        self.token_costs = TokenCosts()
        self.di_costs = DocumentIntelligenceCosts()
        self.current_file_costs = {
            "input_tokens": 0,
            "output_tokens": 0,
            "di_pages": 0,
            "di_operations": {}  # Track different DI operations
        }
        self.total_costs = {
            "input_tokens": 0,
            "output_tokens": 0,
            "di_pages": 0,
            "di_operations": {}
        }
        self.llm_input_tokens = 0
        self.llm_output_tokens = 0
        self.di_pages = 0
        self.llm_calls: List[LLMCall] = []  # Track individual LLM calls
    
    def reset_current_file(self):
        """Reset costs for current file."""
        self.current_file_costs = {
            "input_tokens": 0,
            "output_tokens": 0,
            "di_pages": 0,
            "di_operations": {}
        }
        self.llm_calls = []  # Reset LLM calls for new file
    
    def add_tokens(self, input_tokens: int, output_tokens: int, model: str = "GPT41"):
        """Add tokens for current file and total."""
        self.current_file_costs["input_tokens"] += input_tokens
        self.current_file_costs["output_tokens"] += output_tokens
        self.total_costs["input_tokens"] += input_tokens
        self.total_costs["output_tokens"] += output_tokens
        
        logger.info(f"Added tokens - Input: {input_tokens}, Output: {output_tokens} for model {model}")
    
    def add_di_operation(self, operation: str, pages: int):
        """Add Document Intelligence operation costs."""
        if operation not in self.current_file_costs["di_operations"]:
            self.current_file_costs["di_operations"][operation] = 0
            self.total_costs["di_operations"][operation] = 0
            
        self.current_file_costs["di_operations"][operation] += pages
        self.current_file_costs["di_pages"] += pages
        self.total_costs["di_operations"][operation] += pages
        self.total_costs["di_pages"] += pages
        
        logger.info(f"Added DI operation - {operation}: {pages} pages")
    
    def calculate_current_file_costs(self) -> Dict:
        """Calculate costs for current file."""
        costs = {
            "openai": {
                "input_tokens": self.current_file_costs["input_tokens"],
                "output_tokens": self.current_file_costs["output_tokens"],
                "input_cost": (self.current_file_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
                "output_cost": (self.current_file_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
                "total_cost": 0,
                "calls": [call.__dict__ for call in self.llm_calls]  # Include detailed call information
            },
            "document_intelligence": {
                "total_pages": self.current_file_costs["di_pages"],
                "operations": {},
                "total_cost": 0
            }
        }
        
        # Calculate OpenAI total cost
        costs["openai"]["total_cost"] = costs["openai"]["input_cost"] + costs["openai"]["output_cost"]
        
        # Calculate Document Intelligence costs
        for operation, pages in self.current_file_costs["di_operations"].items():
            cost = (pages / 1000) * getattr(self.di_costs, operation.upper(), self.di_costs.READ)
            costs["document_intelligence"]["operations"][operation] = {
                "pages": pages,
                "cost": cost
            }
            costs["document_intelligence"]["total_cost"] += cost
        
        return costs
    
    def get_total_costs(self) -> Dict:
        """Get total costs across all files."""
        return {
            "openai": {
                "input_tokens": self.total_costs["input_tokens"],
                "output_tokens": self.total_costs["output_tokens"],
                "input_cost": (self.total_costs["input_tokens"] / 1_000_000) * self.token_costs.GPT41,
                "output_cost": (self.total_costs["output_tokens"] / 1_000_000) * self.token_costs.GPT41_OUTPUT,
                "total_cost": 0
            },
            "document_intelligence": {
                "total_pages": self.total_costs["di_pages"],
                "operations": {},
                "total_cost": 0
            }
        }

    def add_llm_tokens(self, input_tokens, output_tokens, description: str = "LLM Call"):
        """Add tokens for an LLM call with a description."""
        self.llm_input_tokens += input_tokens
        self.llm_output_tokens += output_tokens
        # Also update the current file costs
        self.current_file_costs["input_tokens"] += input_tokens
        self.current_file_costs["output_tokens"] += output_tokens
        self.total_costs["input_tokens"] += input_tokens
        self.total_costs["output_tokens"] += output_tokens
        # Add to LLM calls list
        self.llm_calls.append(LLMCall(
            description=description,
            input_tokens=input_tokens,
            output_tokens=output_tokens
        ))

    def add_di_pages(self, num_pages):
        self.di_pages += num_pages
        # Also update the current file costs
        self.current_file_costs["di_pages"] += num_pages
        self.total_costs["di_pages"] += num_pages
        # Add to LAYOUT operation by default since we're using layout analysis
        if "LAYOUT" not in self.current_file_costs["di_operations"]:
            self.current_file_costs["di_operations"]["LAYOUT"] = 0
            self.total_costs["di_operations"]["LAYOUT"] = 0
        self.current_file_costs["di_operations"]["LAYOUT"] += num_pages
        self.total_costs["di_operations"]["LAYOUT"] += num_pages

    def total_cost(self):
        """Calculate total cost using the same pricing as calculate_current_file_costs."""
        costs = self.calculate_current_file_costs()
        return costs["openai"]["total_cost"] + costs["document_intelligence"]["total_cost"]

    def get_detailed_costs_table(self) -> pd.DataFrame:
        """Return a DataFrame of detailed costs."""
        if not self.llm_calls:
            return pd.DataFrame()

        # Create list of dictionaries for DataFrame
        rows = []
        for call in self.llm_calls:
            rows.append({
                'Description': call.description,
                'Input Tokens': call.input_tokens,
                'Output Tokens': call.output_tokens,
                'Input Cost': f"${call.input_cost:.4f}",
                'Output Cost': f"${call.output_cost:.4f}",
                'Total Cost': f"${call.total_cost:.4f}"
            })
        
        # Calculate totals
        total_input = sum(call.input_tokens for call in self.llm_calls)
        total_output = sum(call.output_tokens for call in self.llm_calls)
        total_input_cost = sum(call.input_cost for call in self.llm_calls)
        total_output_cost = sum(call.output_cost for call in self.llm_calls)
        total_cost = total_input_cost + total_output_cost
        
        # Add total row
        rows.append({
            'Description': 'TOTAL',
            'Input Tokens': total_input,
            'Output Tokens': total_output,
            'Input Cost': f"${total_input_cost:.4f}",
            'Output Cost': f"${total_output_cost:.4f}",
            'Total Cost': f"${total_cost:.4f}"
        })
        
        # Create DataFrame
        df = pd.DataFrame(rows)
        
        # Set column order
        df = df[['Description', 'Input Tokens', 'Output Tokens', 'Input Cost', 'Output Cost', 'Total Cost']]
        
        return df