File size: 23,016 Bytes
abd9b6a
 
 
ccd0c66
abd9b6a
 
 
ccd0c66
abd9b6a
ccd0c66
 
abd9b6a
 
 
 
58ec961
abd9b6a
 
 
ccd0c66
abd9b6a
 
 
 
 
 
 
ccd0c66
 
 
 
 
b4386ea
abd9b6a
 
ccd0c66
abd9b6a
 
ccd0c66
 
 
abd9b6a
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
ccd0c66
abd9b6a
ccd0c66
abd9b6a
ccd0c66
 
b4386ea
ccd0c66
 
 
 
 
b4386ea
ccd0c66
 
 
 
b4386ea
ccd0c66
 
 
 
 
 
abd9b6a
 
 
 
 
ccd0c66
58ec961
abd9b6a
58ec961
ccd0c66
58ec961
abd9b6a
ccd0c66
58ec961
abd9b6a
ccd0c66
58ec961
abd9b6a
ccd0c66
58ec961
 
 
 
 
 
 
 
 
 
abd9b6a
 
 
 
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
 
ccd0c66
abd9b6a
58ec961
ccd0c66
b4386ea
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58ec961
 
abd9b6a
 
ccd0c66
abd9b6a
ccd0c66
abd9b6a
ccd0c66
 
58ec961
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58ec961
ccd0c66
58ec961
ccd0c66
 
 
abd9b6a
 
ccd0c66
 
abd9b6a
 
 
 
ccd0c66
 
abd9b6a
ccd0c66
 
 
 
 
b4386ea
58ec961
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
ccd0c66
 
 
 
 
 
 
 
 
 
b4386ea
58ec961
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
ccd0c66
abd9b6a
ccd0c66
abd9b6a
ccd0c66
 
abd9b6a
ccd0c66
abd9b6a
 
 
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
 
 
 
 
 
 
 
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58ec961
 
abd9b6a
 
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
 
58ec961
ccd0c66
abd9b6a
ccd0c66
 
abd9b6a
ccd0c66
 
 
 
58ec961
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
58ec961
 
 
 
 
 
ccd0c66
 
 
 
 
abd9b6a
58ec961
ccd0c66
 
 
 
b4386ea
ff74120
b4386ea
ccd0c66
 
ff74120
 
ccd0c66
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
abd9b6a
 
 
58ec961
ccd0c66
 
 
 
b4386ea
 
 
ccd0c66
 
 
 
 
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
abd9b6a
 
 
ccd0c66
 
 
 
b4386ea
ff74120
b4386ea
ccd0c66
 
 
 
 
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
abd9b6a
 
 
ccd0c66
abd9b6a
ccd0c66
 
b4386ea
ff74120
b4386ea
ccd0c66
ff74120
ccd0c66
 
 
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
b4386ea
 
ccd0c66
 
abd9b6a
 
 
58ec961
ccd0c66
 
 
 
 
 
 
abd9b6a
58ec961
ccd0c66
58ec961
 
 
b4386ea
ccd0c66
abd9b6a
 
ccd0c66
abd9b6a
ccd0c66
abd9b6a
 
ccd0c66
 
 
 
 
 
 
 
abd9b6a
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
ccd0c66
 
 
 
 
 
 
 
 
 
 
 
abd9b6a
ccd0c66
 
 
 
 
 
 
 
abd9b6a
58ec961
abd9b6a
 
58ec961
abd9b6a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np

# Page configuration
st.set_page_config(
    page_title="AI Model Leaderboard",
    page_icon="πŸ†",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS with improved contrast
st.markdown("""
<style>
    .main {
        background-color: #f5f7ff;
    }
    .stTabs [data-baseweb="tab-list"] {
        gap: 24px;
    }
    .stTabs [data-baseweb="tab"] {
        height: 50px;
        white-space: pre-wrap;
        background-color: #ffffff;
        border-radius: 8px 8px 0px 0px;
        gap: 1px;
        padding-top: 10px;
        padding-bottom: 10px;
        color: #333333;
    }
    .stTabs [aria-selected="true"] {
        background-color: #4e8df5;
        color: white;
    }
    div[data-testid="stVerticalBlock"] > div:nth-child(1) {
        border-bottom: 3px solid #4e8df5;
        padding-bottom: 10px;
    }
    div[data-testid="stSidebarContent"] > div:nth-child(1) {
        border-bottom: none;
    }
    div.stButton > button:first-child {
        background-color: #4e8df5;
        color: white;
        font-size: 16px;
    }
    .highlight {
        background-color: #ffff99;
        padding: 0px 4px;
        border-radius: 3px;
    }
    .card {
        background-color: #ffffff;
        border-radius: 10px;
        padding: 20px;
        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
        margin-bottom: 20px;
    }
    .metric-title {
        font-size: 16px;
        color: #333333 !important;
        margin-bottom: 5px;
    }
    .metric-value {
        font-size: 30px;
        font-weight: bold;
        color: #333333 !important;
        margin-bottom: 10px;
    }
    .model-badge {
        background-color: #4e8df5;
        color: white !important;
        padding: 4px 12px;
        border-radius: 15px;
        font-weight: bold;
        display: inline-block;
        margin-right: 8px;
        margin-bottom: 8px;
    }
    .footer {
        text-align: center;
        margin-top: 30px;
        padding: 20px;
        border-top: 1px solid #ddd;
        color: #666;
    }
    /* Improved gradients for model cards with better contrast */
    .openella-card {
        background: linear-gradient(135deg, #ffffff 0%, #c9e6ff 100%);
    }
    .minimaid-l1-card {
        background: linear-gradient(135deg, #ffffff 0%, #ffd9b3 100%);
    }
    .minimaid-l2-card {
        background: linear-gradient(135deg, #ffffff 0%, #c9ffc9 100%);
    }
    .minimaid-l3-card {
        background: linear-gradient(135deg, #ffffff 0%, #d9c9ff 100%);
    }
    /* Improved table styles for better contrast */
    .table-text {
        color: #333333 !important;
        font-weight: 500;
    }
    .table-header {
        color: white !important;
        font-weight: bold;
    }
</style>
""", unsafe_allow_html=True)

# Title and introduction
st.title("πŸ† OpenElla & MiniMaid Models Leaderboard")
st.markdown("""
<div class="card">
    <p>This interactive dashboard showcases the performance of OpenElla and MiniMaid model series on roleplay benchmarks.
    Explore different metrics, compare models, and discover performance insights.</p>
</div>
""", unsafe_allow_html=True)

# Create sample data based on the images provided
data = {
    "Model": ["DeepSeek-RL-3B", "Dolphin-RL-GGUF", "Hermes-3-GGUF", "MiniMaid-L1", "OpenElla-Llama-3-2B", "MiniMaid-L2", "MiniMaid-L3"],
    "Length Score": [1.0, 1.0, 1.0, 0.9, 1.0, 1.0, 1.0],
    "Character Consistency": [1.0, 0.83, 0.83, 0.5, 0.83, 0.54, 0.54],
    "Immersion": [0.63, 0.46, 0.43, 0.13, 0.67, 0.6, 0.73],
    "Overall Score": [0.88, 0.76, 0.75, 0.51, 0.83, 0.71, 0.76],
    "Parameters (B)": [3.0, 7.0, 7.0, 1.0, 2.0, 1.5, 2.5],
    "Speed (tokens/s)": [180, 75, 70, 320, 250, 280, 220],
    "Family": ["DeepSeek", "Dolphin", "Hermes", "MiniMaid", "OpenElla", "MiniMaid", "MiniMaid"],
    "Release Date": ["2023-10", "2023-11", "2023-12", "2024-01", "2024-02", "2024-03", "2024-04"],
    "Description": [
        "General-purpose model with strong instruction following capabilities",
        "Dolphin-based model optimized for roleplay",
        "Fine-tuned Hermes model for creative tasks",
        "Lightweight model optimized for speed and efficiency",
        "Optimized for roleplay with high character consistency",
        "Improved version with better immersion capabilities",
        "Latest generation with the best immersion scores"
    ]
}

df = pd.DataFrame(data)

# Your models filter
your_models = ["OpenElla-Llama-3-2B", "MiniMaid-L1", "MiniMaid-L2", "MiniMaid-L3"]
# Instead of creating a separate column, we'll use the 'Family' column for coloring

# Sidebar
st.sidebar.markdown("<h2>Leaderboard Controls</h2>", unsafe_allow_html=True)

# Model selection
st.sidebar.markdown("### Models to Display")
all_models = st.sidebar.checkbox("All Models", value=True)
if all_models:
    selected_models = list(df["Model"])
else:
    selected_models = st.sidebar.multiselect(
        "Select Models",
        options=list(df["Model"]),
        default=your_models
    )

# Metric selection
st.sidebar.markdown("### Metrics to Display")
selected_metrics = st.sidebar.multiselect(
    "Select Metrics",
    options=["Length Score", "Character Consistency", "Immersion", "Overall Score"],
    default=["Overall Score"]
)

# Highlight your models
highlight_yours = st.sidebar.checkbox("Highlight Your Models", value=True)

# Sort options
sort_by = st.sidebar.selectbox(
    "Sort By",
    options=["Overall Score", "Character Consistency", "Immersion", "Length Score", "Parameters (B)", "Speed (tokens/s)"],
    index=0
)

ascending = st.sidebar.checkbox("Ascending Order", value=False)

# Filter data and ensure proper sorting
filtered_df = df[df["Model"].isin(selected_models)].sort_values(by=sort_by, ascending=ascending).reset_index(drop=True)

# Create tabs
tab1, tab2, tab3, tab4 = st.tabs(["πŸ“Š Leaderboard", "πŸ“ˆ Performance Charts", "πŸ” Model Details", "πŸ“˜ About"])

# Tab 1: Leaderboard
with tab1:
    st.markdown("## πŸ“Š Model Rankings")
    
    # Create a more visually appealing table with Plotly - using improved contrast
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=["Rank", "Model", "Overall Score", "Character Consistency", "Immersion", "Length Score"],
            fill_color='#4e8df5',
            align='center',
            font=dict(color='white', size=16),
            height=40
        ),
        cells=dict(
            values=[
                list(range(1, len(filtered_df) + 1)),
                filtered_df["Model"],
                filtered_df["Overall Score"].apply(lambda x: f"{x:.2f}"),
                filtered_df["Character Consistency"].apply(lambda x: f"{x:.2f}"),
                filtered_df["Immersion"].apply(lambda x: f"{x:.2f}"),
                filtered_df["Length Score"].apply(lambda x: f"{x:.2f}")
            ],
            fill_color=[['#e6f7ff' if model in your_models and highlight_yours else '#f0f0f0' for model in filtered_df["Model"]]],
            align='center',
            font=dict(color='#333333', size=14),
            height=35
        )
    )])
    
    fig.update_layout(
        margin=dict(l=0, r=0, t=0, b=0),
        height=min(100 + len(filtered_df) * 35, 500)
    )
    
    st.plotly_chart(fig, use_container_width=True)
    
    # Performance overview
    st.markdown("## πŸ’― Performance Overview")
    
    if "Overall Score" in selected_metrics:
        fig = px.bar(
            filtered_df, 
            x="Model", 
            y="Overall Score",
            color="Family" if highlight_yours else None,
            color_discrete_map={"OpenElla": "#4e8df5", "MiniMaid": "#f5854e", "DeepSeek": "#666666", "Dolphin": "#666666", "Hermes": "#666666"},
            text_auto='.2f',
            title="Overall Roleplay Performance",
            height=400
        )
        fig.update_traces(textposition='outside')
        fig.update_layout(
            xaxis_title="",
            yaxis_title="Score",
            yaxis=dict(range=[0, 1.1]),
            plot_bgcolor="white",
            legend_title_text="",
            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5)
        )
        st.plotly_chart(fig, use_container_width=True)
    
    # Metrics comparison
    if len(selected_metrics) > 0 and len(selected_metrics) < 4:
        cols = st.columns(len(selected_metrics))
        for i, metric in enumerate(selected_metrics):
            if metric != "Overall Score":  # Skip if already shown above
                with cols[i]:
                    fig = px.bar(
                        filtered_df, 
                        x="Model", 
                        y=metric,
                        color="Family" if highlight_yours else None,
                        color_discrete_map={"OpenElla": "#4e8df5", "MiniMaid": "#f5854e", "DeepSeek": "#666666", "Dolphin": "#666666", "Hermes": "#666666"},
                        text_auto='.2f',
                        title=f"{metric}",
                        height=350
                    )
                    fig.update_traces(textposition='outside')
                    fig.update_layout(
                        xaxis_title="",
                        yaxis_title="Score",
                        yaxis=dict(range=[0, 1.1]),
                        plot_bgcolor="white",
                        showlegend=False
                    )
                    st.plotly_chart(fig, use_container_width=True)

# Tab 2: Performance Charts
with tab2:
    st.markdown("## πŸ“ˆ Performance Charts")
    
    # Radar chart for model comparison
    st.markdown("### Model Comparison (Radar Chart)")
    
    fig = go.Figure()
    
    categories = ["Length Score", "Character Consistency", "Immersion", "Overall Score"]
    
    # Add traces for each model
    for model in filtered_df["Model"]:
        model_data = filtered_df[filtered_df["Model"] == model]
        values = model_data[categories].values.flatten().tolist()
        # Close the radar by repeating the first value
        values = values + [values[0]]
        
        is_your_model = model in your_models
        line_width = 3 if is_your_model else 1.5
        opacity = 0.9 if is_your_model else 0.6
        
        fig.add_trace(go.Scatterpolar(
            r=values,
            theta=categories + [categories[0]],
            fill='toself',
            name=model,
            line=dict(width=line_width),
            opacity=opacity
        ))
    
    fig.update_layout(
        polar=dict(
            radialaxis=dict(
                visible=True,
                range=[0, 1]
            )
        ),
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=-0.2, xanchor="center", x=0.5),
        height=600
    )
    
    st.plotly_chart(fig, use_container_width=True)
    
    # Scatter plot: Parameters vs Performance
    st.markdown("### Efficiency Analysis")
    
    fig = px.scatter(
        filtered_df, 
        x="Parameters (B)", 
        y="Overall Score",
        size="Speed (tokens/s)",
        color="Family",
        hover_name="Model",
        text="Model",
        size_max=40,
        height=500,
        color_discrete_map={"OpenElla": "#4e8df5", "MiniMaid": "#f5854e", "DeepSeek": "#666666", "Dolphin": "#666666", "Hermes": "#666666"}
    )
    
    fig.update_traces(
        textposition='top center',
        marker=dict(line=dict(width=2, color='DarkSlateGrey')),
    )
    
    fig.update_layout(
        title="Model Size vs Performance",
        xaxis_title="Parameters (Billions)",
        yaxis_title="Overall Score",
        yaxis=dict(range=[0.4, 1.0]),
        legend_title="Model Family",
        plot_bgcolor="white"
    )
    
    st.plotly_chart(fig, use_container_width=True)
    
    # Heatmap of all metrics - improved color scale for better readability
    st.markdown("### Metrics Heatmap")
    
    metrics = ["Length Score", "Character Consistency", "Immersion", "Overall Score"]
    heatmap_df = filtered_df.set_index("Model")[metrics]
    
    fig = px.imshow(
        heatmap_df.values,
        x=metrics,
        y=heatmap_df.index,
        color_continuous_scale="Blues",  # Deeper blues for better contrast
        labels=dict(x="Metric", y="Model", color="Score"),
        text_auto=".2f",
        height=500
    )
    
    fig.update_layout(
        xaxis_title="",
        yaxis_title="",
        coloraxis_colorbar=dict(title="Score"),
        plot_bgcolor="white"
    )
    
    # Ensure text is visible on all cells
    fig.update_traces(
        texttemplate="%{text}",
        textfont={"color":"black"}
    )
    
    st.plotly_chart(fig, use_container_width=True)

# Tab 3: Model Details
with tab3:
    st.markdown("## πŸ” Model Details")
    
    # OpenElla card with improved contrast
    if "OpenElla-Llama-3-2B" in selected_models:
        st.markdown("""
        <div class="card openella-card">
            <h3>OpenElla-Llama-3-2B</h3>
            <div class="model-badge" style="color: white;">OpenElla</div>
            <div class="model-badge" style="color: white;">3B Parameters</div>
            <div class="model-badge" style="color: white;">Released: February 2024</div>
            <hr>
            <p>OpenElla-Llama-3-2B is optimized for roleplay with excellent character consistency
            and good immersion capabilities. Built on the Llama 3.2 architecture, this model
            delivers impressively balanced performance despite its compact 3B parameter size.</p>
            <div style="display: flex; margin-top: 15px;">
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Overall Score</div>
                    <div class="metric-value" style="color: #333333;">0.83</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Character Consistency</div>
                    <div class="metric-value" style="color: #333333;">0.83</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Immersion</div>
                    <div class="metric-value" style="color: #333333;">0.67</div>
                </div>
            </div>
        </div>
        """, unsafe_allow_html=True)
    
    # MiniMaid model cards with improved contrast
    if "MiniMaid-L1" in selected_models:
        st.markdown("""
        <div class="card minimaid-l1-card">
            <h3>MiniMaid-L1</h3>
            <div class="model-badge" style="color: white;">MiniMaid</div>
            <div class="model-badge" style="color: white;">1B Parameters</div>
            <div class="model-badge" style="color: white;">Released: January 2024</div>
            <hr>
            <p>MiniMaid-L1 is the first generation of the MiniMaid series, designed for maximum speed and efficiency.
            With only 1B parameters, it's optimized for low-resource environments while still maintaining
            good length handling capabilities.</p>
            <div style="display: flex; margin-top: 15px;">
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Overall Score</div>
                    <div class="metric-value" style="color: #333333;">0.51</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Character Consistency</div>
                    <div class="metric-value" style="color: #333333;">0.50</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Speed</div>
                    <div class="metric-value" style="color: #333333;">320 t/s</div>
                </div>
            </div>
        </div>
        """, unsafe_allow_html=True)
    
    if "MiniMaid-L2" in selected_models:
        st.markdown("""
        <div class="card minimaid-l2-card">
            <h3>MiniMaid-L2</h3>
            <div class="model-badge" style="color: white;">MiniMaid</div>
            <div class="model-badge" style="color: white;">1B Parameters</div>
            <div class="model-badge" style="color: white;">Released: March 2024</div>
            <hr>
            <p>MiniMaid-L2 represents a significant improvement over L1, with enhanced immersion capabilities
            and better overall roleplay performance. The model retains excellent efficiency while delivering
            more engaging and consistent character portrayals.</p>
            <div style="display: flex; margin-top: 15px;">
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Overall Score</div>
                    <div class="metric-value" style="color: #333333;">0.71</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Immersion</div>
                    <div class="metric-value" style="color: #333333;">0.60</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Speed</div>
                    <div class="metric-value" style="color: #333333;">280 t/s</div>
                </div>
            </div>
        </div>
        """, unsafe_allow_html=True)
    
    if "MiniMaid-L3" in selected_models:
        st.markdown("""
        <div class="card minimaid-l3-card">
            <h3>MiniMaid-L3</h3>
            <div class="model-badge" style="color: white;">MiniMaid</div>
            <div class="model-badge" style="color: white;">1B Parameters</div>
            <div class="model-badge" style="color: white;">Released: April 2024</div>
            <hr>
            <p>MiniMaid-L3 is the latest and most advanced model in the MiniMaid series. With 1B parameters,
            it achieves the highest immersion score of all models while maintaining excellent length handling.
            This model represents the pinnacle of the MiniMaid series' development.</p>
            <div style="display: flex; margin-top: 15px;">
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Overall Score</div>
                    <div class="metric-value" style="color: #333333;">0.76</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Immersion</div>
                    <div class="metric-value" style="color: #333333;">0.73</div>
                </div>
                <div style="flex: 1; text-align: center;">
                    <div class="metric-title" style="color: #333333;">Length Score</div>
                    <div class="metric-value" style="color: #333333;">1.00</div>
                </div>
            </div>
        </div>
        """, unsafe_allow_html=True)
    
    # Other models with improved contrast
    other_models = [m for m in selected_models if m not in your_models]
    if other_models:
        st.markdown("### Other Models")
        cols = st.columns(min(3, len(other_models)))
        for i, model in enumerate(other_models):
            model_data = df[df["Model"] == model].iloc[0]
            with cols[i % min(3, len(other_models))]:
                st.markdown(f"""
                <div class="card" style="background-color: #f0f0f0;">
                    <h4>{model}</h4>
                    <div class="model-badge" style="color: white !important; background-color: #666666;">{model_data['Family']}</div>
                    <div class="model-badge" style="color: white !important; background-color: #666666;">{model_data['Parameters (B)']}B</div>
                    <p style="color: #333333;">{model_data['Description']}</p>
                    <p style="color: #333333;"><b>Overall Score:</b> {model_data['Overall Score']:.2f}</p>
                </div>
                """, unsafe_allow_html=True)

# Tab 4: About
with tab4:
    st.markdown("## πŸ“˜ About This Leaderboard")
    
    st.markdown("""
    <div class="card">
        <h3>Understanding the Metrics</h3>
        <p><b>Length Score</b>: Measures the model's ability to generate appropriately lengthy responses without being too verbose or too brief.</p>
        <p><b>Character Consistency</b>: Evaluates how well the model maintains character personality, backstory, and traits throughout the conversation.</p>
        <p><b>Immersion</b>: Assesses the model's ability to create an engaging, believable experience that draws users into the roleplay scenario.</p>
        <p><b>Overall Score</b>: A weighted combination of the above metrics, representing the model's general roleplay capability.</p>
    </div>
    """, unsafe_allow_html=True)
    
    st.markdown("""
    <div class="card">
        <h3>Evaluation Methodology</h3>
        <p>Models were evaluated using a comprehensive roleplay benchmark suite consisting of:</p>
        <ul>
            <li>20 diverse character archetypes</li>
            <li>15 different scenarios per character</li>
            <li>5 conversation turns per scenario</li>
        </ul>
        <p>Responses were scored by a panel of expert evaluators using standardized rubrics for each metric.</p>
    </div>
    """, unsafe_allow_html=True)
    
    st.markdown("""
    <div class="card">
        <h3>MiniMaid Series Development</h3>
        <p>The MiniMaid series represents an evolution in efficient roleplay models:</p>
        <ul>
            <li><b>MiniMaid-L1</b>: Initial release focusing on speed and efficiency</li>
            <li><b>MiniMaid-L2</b>: Improved version with better immersion and consistency</li>
            <li><b>MiniMaid-L3</b>: Latest generation with enhanced immersion capabilities</li>
        </ul>
        <p>Each iteration builds upon the strengths of the previous version while addressing identified weaknesses.</p>
    </div>
    """, unsafe_allow_html=True)
    
    st.markdown("""
    <div class="card">
        <h3>OpenElla Development</h3>
        <p>OpenElla represents a parallel development track focused on maximizing roleplay quality in a compact model size.</p>
        <p>Built on the Llama 3 architecture, OpenElla achieves exceptional character consistency and overall performance
        despite its relatively small 2B parameter size.</p>
    </div>
    """, unsafe_allow_html=True)

# Footer with better visibility
st.markdown("""
<div class="footer">
    <p style="color: #444444;">Created with ❀️ for Hugging Face Spaces | Last updated: April 2025</p>
</div>
""", unsafe_allow_html=True)