File size: 22,162 Bytes
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
 
 
 
 
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
ce91b95
 
 
 
 
 
 
 
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
 
 
 
1ec5d3f
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
 
 
 
 
 
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce91b95
1ec5d3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
import gradio as gr
import json
import time
from typing import Dict, Tuple, List
from bertmodel import predict_label
# from ecologits import EcoLogits  # Removed - using OpenRouter instead
# from openai import OpenAI  # Removed - using OpenRouter instead
from dotenv import load_dotenv
import os
import requests
import json

# Set environment variable to suppress tokenizers warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
# Model configurations with energy consumption and cost estimates
MODEL_CONFIGS = {
    "large": {
        "name": "Llama 3.1 405B",
        "energy_per_token": 0.238,  # Wh per token (11.9 Wh / 50 tokens)
        "cost_per_input_token": 0.000003,   # $3/M tokens (OpenRouter pricing)
        "cost_per_output_token": 0.000003,   # $3/M tokens (OpenRouter pricing)
        "icon": "πŸ¦™"
    },
    "small": {
        "name": "Mistral Small 24B",
        "energy_per_token": 0.00596,  # Wh per token (0.298 Wh / 50 tokens)
        "cost_per_input_token": 0.00000005,   # $0.05/M tokens
        "cost_per_output_token": 0.00000012,  # $0.12/M tokens
        "icon": "⚑"
    }
}

class ModelRouter:
    def __init__(self):
        self.routing_history = []
        print("[INIT] ModelRouter initialized")
    
    def classify_prompt(self, prompt: str) -> str:
        print(f"\n[CLASSIFY] Classifying prompt: '{prompt[:50]}...'")
        label = predict_label(prompt)
        print(f"[CLASSIFY] ModernBERT returned label: '{label}'")
        return label
    
    def select_model(self, prompt: str) -> str:
        """Select the most efficient model based on prompt classification."""
        prompt_type = self.classify_prompt(prompt)
        # Normalize
        key = prompt_type.strip().lower()
        print(f"[SELECT] Normalized label: '{key}'")

        # Map normalized labels to actual MODEL_CONFIGS keys
        if "small" in key:
            print(f"[SELECT] Selected: SMALL model (Mistral Small 24B)")
            return "small"
        else:
            print(f"[SELECT] Selected: LARGE model (Claude Opus 4)")
            return "large"
    

    def estimate_tokens(self, 
                        prompt: str, 
                        response: str | None = None,
                        max_response_tokens: int | None = None) -> int:
        """
        Estimate total token count: exact prompt tokens + 
        a target number of response tokens.
        """
        # Simple estimation: 4 characters = 1 token
        prompt_tokens = len(prompt) // 4
        print(f"[TOKENS] Prompt tokens: {prompt_tokens} (from {len(prompt)} chars)")

        if response is not None:
            response_tokens = len(response) // 4
        elif max_response_tokens is not None:
            # you’re reserving this many tokens for the model’s reply
            response_tokens = max_response_tokens
        else:
            # Estimate response will be similar length to prompt
            response_tokens = prompt_tokens

        total_tokens = prompt_tokens + response_tokens
        print(f"[TOKENS] Response tokens: {response_tokens}, Total: {total_tokens}")
        return total_tokens
    
    def estimate_large_model_energy(self, tokens: int) -> float:
        """
        Estimate large model energy consumption based on tokens.
        Using empirical estimates for energy consumption.
        """
        large_config = MODEL_CONFIGS["large"]
        return tokens * large_config["energy_per_token"]
    
    def calculate_savings(self, selected_model: str, prompt: str, response: str = None) -> Dict:
        """Calculate energy and cost savings compared to using the large model"""
        print(f"[SAVINGS] Calculating for model: {selected_model}")
        
        # Calculate input and output tokens separately
        input_tokens = max(1, len(prompt) // 4)  # Minimum 1 token
        
        if response:
            # Use actual response length if available
            output_tokens = max(1, len(response) // 4)
        else:
            # Estimate if no response yet (for preview)
            output_tokens = max(10, input_tokens)  # Assume at least 10 tokens response
        
        total_tokens = input_tokens + output_tokens
        
        print(f"[SAVINGS] Input tokens: {input_tokens}, Output tokens: {output_tokens}")
        
        selected_config = MODEL_CONFIGS[selected_model]
        large_config = MODEL_CONFIGS["large"]
        
        # Calculate actual usage
        actual_energy = total_tokens * selected_config["energy_per_token"]
        actual_cost = (input_tokens * selected_config["cost_per_input_token"] + 
                      output_tokens * selected_config["cost_per_output_token"])
        
        # Calculate large model usage
        large_energy = self.estimate_large_model_energy(total_tokens)
        large_cost = (input_tokens * large_config["cost_per_input_token"] + 
                     output_tokens * large_config["cost_per_output_token"])
        
        # Calculate savings (only positive if small model is selected)
        if selected_model == "small":
            energy_saved = large_energy - actual_energy
            cost_saved = large_cost - actual_cost
            energy_saved_percent = (energy_saved / large_energy) * 100 if large_energy > 0 else 0
            cost_saved_percent = (cost_saved / large_cost) * 100 if large_cost > 0 else 0
        else:
            # No savings if using the large model
            energy_saved = 0
            cost_saved = 0
            energy_saved_percent = 0
            cost_saved_percent = 0
        
        print(f"[SAVINGS] Selected: {selected_model}")
        print(f"[SAVINGS] Actual energy: {actual_energy:.4f} Wh, Large energy: {large_energy:.4f} Wh")
        print(f"[SAVINGS] Actual cost: ${actual_cost:.8f}, Large cost: ${large_cost:.8f}")
        print(f"[SAVINGS] Energy saved: {energy_saved:.4f} Wh ({energy_saved_percent:.1f}%)")
        print(f"[SAVINGS] Cost saved: ${cost_saved:.8f} ({cost_saved_percent:.1f}%)")
        
        return {
            "selected_model": selected_config["name"],
            "tokens": total_tokens,
            "actual_energy": actual_energy,
            "actual_cost": actual_cost,
            "large_energy": large_energy,
            "large_cost": large_cost,
            "energy_saved": energy_saved,
            "cost_saved": cost_saved,
            "energy_saved_percent": energy_saved_percent,
            "cost_saved_percent": cost_saved_percent,
            "is_large_model": selected_model == "large"  # Add flag for template
        }

print("[STARTUP] Initializing ModelRouter...")
router = ModelRouter()
print("[STARTUP] ModelRouter ready")
print(f"[STARTUP] Available models: {list(MODEL_CONFIGS.keys())}")
print(f"[STARTUP] OpenRouter API Key: {'SET' if OPENROUTER_API_KEY else 'NOT SET'}")

def process_message(message: str, history: List[List[str]]) -> Tuple[str, str, str]:
    """Process the user message and return response with savings info"""
    print(f"\n{'='*60}")
    print(f"[PROCESS] New message received: '{message[:100]}...'")
    
    # Route to appropriate model
    selected_model = router.select_model(message)
    model_config = MODEL_CONFIGS[selected_model]
    print(f"[PROCESS] Using model config: {model_config['name']}")
    
    # Initial savings estimate (will be recalculated after getting response)
    print(f"[PROCESS] Calculating initial savings estimate...")
    initial_savings = router.calculate_savings(selected_model, message)
    print(f"[PROCESS] Initial estimate: {initial_savings['energy_saved_percent']:.1f}% energy, {initial_savings['cost_saved_percent']:.1f}% cost")
    
    open_router_model_dict = {
        "large": "meta-llama/llama-3.1-405b-instruct",
        "small": "mistralai/mistral-small-24b-instruct-2501"
    }
    # Check if API key is available
    if not OPENROUTER_API_KEY:
        print(f"[API] No OpenRouter API key found - running in DEMO MODE")
        answer = f"[Demo Mode] This would be a response from {model_config['name']} to: {message[:50]}..."
    else:
        print(f"[API] OpenRouter API key found: {OPENROUTER_API_KEY[:10]}...")
        try:
            model_id = open_router_model_dict[selected_model]
            print(f"[API] Calling OpenRouter with model: {model_id}")
            
            request_data = {
                "model": model_id,
                "messages": [
                {
                    "role": "user",
                    "content": message
                }
                ]
            }
            print(f"[API] Request data: {json.dumps(request_data, indent=2)[:200]}...")
            
            response = requests.post(
            url="https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {OPENROUTER_API_KEY}",
                "Content-Type": "application/json"
            },
            data=json.dumps(request_data)
            )
            
            # Debug: print response status and content
            print(f"[API] Response Status Code: {response.status_code}")
            print(f"[API] Response Headers: {dict(response.headers)}")
            
            if response.status_code != 200:
                print(f"[API ERROR] Full response: {response.text}")
                answer = f"[API Error {response.status_code}] {response.text[:200]}..."
            else:
                data = response.json()
                print(f"[API] Response keys: {list(data.keys())}")
                
                if "choices" in data and len(data["choices"]) > 0:
                    answer = data["choices"][0]["message"]["content"]
                    print(f"[API] Successfully got response: {answer[:100]}...")
                else:
                    print(f"[API ERROR] Unexpected response format: {json.dumps(data, indent=2)}")
                    answer = f"[Error] Unexpected response format from OpenRouter API"
        except Exception as e:
            print(f"[API EXCEPTION] Error type: {type(e).__name__}")
            print(f"[API EXCEPTION] Error message: {str(e)}")
            import traceback
            print(f"[API EXCEPTION] Traceback:\n{traceback.format_exc()}")
            answer = f"[Error] Failed to get response from {model_config['name']}. Error: {str(e)}"
    
    # Recalculate savings with actual response
    print(f"[PROCESS] Recalculating savings with actual response...")
    savings = router.calculate_savings(selected_model, message, answer)
    print(f"[PROCESS] Final savings: {savings['energy_saved_percent']:.1f}% energy, {savings['cost_saved_percent']:.1f}% cost")
    
    # Format the response with model info
    response = f"{answer}\n\n<div style='background: #f0f9ff; border-left: 3px solid #0ea5e9; padding: 8px 12px; margin-top: 10px; border-radius: 4px;'><small style='color: #0369a1; font-weight: 500;'>{model_config['icon']} Answered by {model_config['name']}</small></div>"
    
    # Format model info
    model_info = f"""
<div style="background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); padding: 20px; border-radius: 12px; margin-bottom: 20px;">
    <div style="display: flex; align-items: center; margin-bottom: 10px;">
        <span style="font-size: 2em; margin-right: 10px;">{model_config['icon']}</span>
        <h3 style="margin: 0; color: #2c3e50;">{model_config['name']}</h3>
    </div>
    <p style="color: #5a6c7d; margin: 5px 0;">Optimal model selected for your query</p>
</div>
"""
    
    # Format savings information with conditional display based on model
    if savings['is_large_model']:
        # Show actual consumption for large model with warning colors
        savings_info = f"""
<div style="background: #ffffff; border: 1px solid #fed7aa; border-radius: 12px; padding: 20px;">
    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
        <div>
            <p style="color: #8795a1; margin: 0; font-size: 0.9em;">πŸ”₯ Energy Consumption</p>
            <p style="color: #ea580c; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
                {savings['actual_energy']:.1f} Wh
            </p>
            <p style="color: #7c2d12; font-size: 0.85em; margin: 0;">
                High energy usage
            </p>
        </div>
        <div>
            <p style="color: #8795a1; margin: 0; font-size: 0.9em;">πŸ’Έ Cost Impact</p>
            <p style="color: #dc2626; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
                ${savings['actual_cost']:.6f}
            </p>
            <p style="color: #991b1b; font-size: 0.85em; margin: 0;">
                Premium pricing
            </p>
        </div>
    </div>
</div>
"""
    else:
        # Show savings for small model with positive colors
        savings_info = f"""
<div style="background: #ffffff; border: 1px solid #e1e8ed; border-radius: 12px; padding: 20px;">
    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
        <div>
            <p style="color: #8795a1; margin: 0; font-size: 0.9em;">⚑ Energy Efficiency</p>
            <p style="color: #22c55e; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
                {savings['energy_saved_percent']:.1f}% saved
            </p>
            <p style="color: #5a6c7d; font-size: 0.85em; margin: 0;">
                {savings['energy_saved']:.1f} Wh reduction
            </p>
            <p style="color: #8795a1; font-size: 0.75em; margin: 3px 0 0 0; font-style: italic;">
                vs. using large model
            </p>
        </div>
        <div>
            <p style="color: #8795a1; margin: 0; font-size: 0.9em;">πŸ’° Cost Optimization</p>
            <p style="color: #3b82f6; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
                {savings['cost_saved_percent']:.1f}% saved
            </p>
            <p style="color: #5a6c7d; font-size: 0.85em; margin: 0;">
                ${savings['cost_saved']:.8f} reduction
            </p>
            <p style="color: #8795a1; font-size: 0.75em; margin: 3px 0 0 0; font-style: italic;">
                vs. using large model
            </p>
        </div>
    </div>
</div>
"""
    
    # Add to routing history
    router.routing_history.append({
        "timestamp": time.time(),
        "prompt": message,
        "model": selected_model,
        "savings": savings
    })
    
    print(f"[PROCESS] Response formatted, returning to UI")
    print(f"{'='*60}\n")
    
    return response, model_info, savings_info

def get_statistics() -> str:
    """Get cumulative statistics from routing history"""
    if not router.routing_history:
        return """
<div style="background: #f8fafc; border-radius: 12px; padding: 30px; text-align: center; color: #64748b;">
    <p style="margin: 0;">No queries processed yet</p>
    <p style="margin: 10px 0 0 0; font-size: 0.9em;">πŸ’¬ Start a conversation to see your impact metrics</p>
</div>
"""
    
    total_queries = len(router.routing_history)
    
    # Calculate user's total savings
    user_total_energy_saved = sum(entry["savings"]["energy_saved"] for entry in router.routing_history)
    user_total_cost_saved = sum(entry["savings"]["cost_saved"] for entry in router.routing_history)
    
    # Count how many times each model was used
    small_model_count = sum(1 for entry in router.routing_history if entry["model"] == "small")
    large_model_count = sum(1 for entry in router.routing_history if entry["model"] == "large")
    
    stats = f"""
<div style="background: #ffffff; border: 1px solid #e2e8f0; border-radius: 12px; padding: 25px;">
    <div style="text-align: center; margin-bottom: 20px;">
        <h4 style="color: #1e293b; font-size: 1.1em; margin: 0; font-weight: 600;">🌍 Your Total Impact</h4>
    </div>
    
    <div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 15px; margin-bottom: 15px;">
        <div style="background: #f0fdf4; border-radius: 8px; padding: 15px; text-align: center;">
            <p style="color: #166534; font-size: 0.9em; margin: 0;">🌱 Energy Saved</p>
            <p style="color: #15803d; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
                {user_total_energy_saved:.1f}
            </p>
            <p style="color: #166534; font-size: 0.8em; margin: 0;">Wh</p>
        </div>
        
        <div style="background: #eff6ff; border-radius: 8px; padding: 15px; text-align: center;">
            <p style="color: #1e40af; font-size: 0.9em; margin: 0;">πŸ’΅ Money Saved</p>
            <p style="color: #2563eb; font-size: 1.5em; font-weight: bold; margin: 5px 0;">
                ${user_total_cost_saved:.6f}
            </p>
            <p style="color: #1e40af; font-size: 0.8em; margin: 0;">USD</p>
        </div>
    </div>
    
    <div style="background: #fefce8; border-radius: 8px; padding: 12px; text-align: center;">
        <p style="color: #713f12; font-size: 0.9em; margin: 0;">
            <span style="font-weight: 600;">Model Usage:</span> Small model {small_model_count}x, Large model {large_model_count}x
        </p>
    </div>
</div>
"""
    
    return stats

# Custom CSS for a more professional look
custom_css = """
.gradio-container {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Helvetica', 'Arial', sans-serif;
}
.message {
    padding: 12px 16px !important;
    border-radius: 8px !important;
}
"""

# Create Gradio interface
with gr.Blocks(
    title="Do I really need a huge LLM?", 
    theme=gr.themes.Base(
        primary_hue="blue",
        secondary_hue="gray",
        neutral_hue="gray",
        font=["Inter", "system-ui", "sans-serif"]
    ),
    css=custom_css
) as demo:
    with gr.Row():
        with gr.Column(scale=3):
            gr.Markdown("""
            <div style="margin-bottom: 30px;">
                <h1 style="margin: 0; font-size: 2em; font-weight: 600; color: #0f172a;">
                    πŸ€” Do I *really* need a huge LLM?
                </h1>
                <p style="margin: 10px 0 0 0; color: #64748b; font-size: 1.1em;">
                    Let's find out! This tool automatically routes your queries to the right-sized model. 🎯
                </p>
            </div>
            """)
    
    with gr.Row():
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                height=500,
                show_label=False,
                container=True,
                elem_classes=["chat-container"]
            )
            
            with gr.Row():
                msg = gr.Textbox(
                    placeholder="πŸ’­ Type your message here...",
                    show_label=False,
                    scale=9,
                    container=False,
                    elem_classes=["message-input"]
                )
                submit = gr.Button(
                    "Send πŸš€",
                    variant="primary",
                    scale=1,
                    min_width=100
                )
        
        with gr.Column(scale=2):
            # Model selection display
            model_display = gr.HTML(
                value="""
                <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
                    <p style="margin: 0;">πŸ€– Model selection will appear here</p>
                </div>
                """,
                label="Selected Model"
            )
            
            # Savings metrics
            savings_display = gr.HTML(
                value="""
                <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
                    <p style="margin: 0;">πŸ“Š Efficiency metrics will appear here</p>
                </div>
                """,
                label="Efficiency Metrics"
            )
            
            # Cumulative stats
            stats_display = gr.HTML(
                value=get_statistics(),
                label="Your Impact Dashboard"
            )
    
    # Footer with minimal info
    with gr.Row():
        gr.Markdown("""
        <div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e2e8f0; text-align: center; color: #94a3b8; font-size: 0.85em;">
            <p style="margin: 5px 0;">πŸ” Comparing small vs large model efficiency β€’ πŸ“ˆ Real-time tracking β€’ 🌎 Environmental impact monitoring</p>
        </div>
        """)
    
    def respond(message, chat_history):
        response, model_info, savings = process_message(message, chat_history)
        chat_history.append((message, response))
        return "", chat_history, model_info, savings, get_statistics()
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot, model_display, savings_display, stats_display])
    submit.click(respond, [msg, chatbot], [msg, chatbot, model_display, savings_display, stats_display])
    
    # Clear button functionality
    def clear_chat():
        return None, """
        <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
            <p style="margin: 0;">Model selection will appear here</p>
        </div>
        """, """
        <div style="background: #f8fafc; border-radius: 12px; padding: 20px; text-align: center; color: #64748b;">
            <p style="margin: 0;">Efficiency metrics will appear here</p>
        </div>
        """, get_statistics()
    
    # Add clear functionality to the Enter key
    msg.submit(lambda: "", outputs=[msg])

if __name__ == "__main__":
    print(f"\n{'='*60}")
    print(f"      DO I REALLY NEED A HUGE LLM? - STARTUP")
    print(f"{'='*60}")
    print(f"[LAUNCH] Starting Gradio app...")
    print(f"[LAUNCH] Environment: TOKENIZERS_PARALLELISM={os.environ.get('TOKENIZERS_PARALLELISM')}")
    print(f"[LAUNCH] Models configured:")
    for k, v in MODEL_CONFIGS.items():
        print(f"         - {k}: {v['name']} ({v['icon']})")
    print(f"[LAUNCH] OpenRouter API Key: {'βœ“ SET' if OPENROUTER_API_KEY else 'βœ— NOT SET (Demo Mode)'}")
    print(f"{'='*60}\n")
    demo.launch(share=False)