polish-llm-benchmarks / index.html
djstrong's picture
Update index.html
b8c32c9 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Polish LLM Benchmarks</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
line-height: 1.6;
color: #333;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 20px;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
overflow: hidden;
}
header {
background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%);
color: white;
text-align: center;
padding: 60px 20px;
}
h1 {
font-size: 3rem;
font-weight: 700;
margin-bottom: 10px;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
}
.subtitle {
font-size: 1.2rem;
opacity: 0.9;
font-weight: 300;
}
main {
padding: 60px 40px;
}
.section {
margin-bottom: 50px;
}
.section:last-child {
margin-bottom: 0;
}
h2 {
font-size: 2rem;
color: #2c3e50;
margin-bottom: 30px;
text-align: center;
position: relative;
padding-bottom: 15px;
}
h2::after {
content: '';
position: absolute;
bottom: 0;
left: 50%;
transform: translateX(-50%);
width: 60px;
height: 3px;
background: linear-gradient(135deg, #667eea, #764ba2);
border-radius: 3px;
}
.benchmark-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 20px;
margin-top: 30px;
}
.benchmark-card {
background: #f8f9fa;
border-radius: 15px;
padding: 25px;
transition: all 0.3s ease;
border: 2px solid transparent;
position: relative;
overflow: hidden;
}
.benchmark-card::before {
content: '';
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 4px;
background: linear-gradient(135deg, #667eea, #764ba2);
}
.benchmark-card:hover {
transform: translateY(-5px);
box-shadow: 0 15px 30px rgba(0, 0, 0, 0.1);
border-color: #667eea;
}
.benchmark-link {
text-decoration: none;
color: #2c3e50;
font-weight: 600;
font-size: 1.1rem;
display: block;
transition: color 0.3s ease;
}
.benchmark-link:hover {
color: #667eea;
}
.polish-tag {
display: inline-block;
background: linear-gradient(135deg, #e74c3c, #c0392b);
color: white;
padding: 4px 12px;
border-radius: 20px;
font-size: 0.8rem;
font-weight: 600;
margin-top: 10px;
text-transform: uppercase;
letter-spacing: 0.5px;
margin-right: 8px;
}
.speakleash-tag {
display: inline-block;
background: linear-gradient(135deg, #3498db, #2980b9);
color: white;
padding: 4px 12px;
border-radius: 20px;
font-size: 0.8rem;
font-weight: 600;
margin-top: 10px;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.description {
color: #666;
font-size: 0.9rem;
margin-top: 8px;
font-style: italic;
}
footer {
background: #2c3e50;
color: white;
text-align: center;
padding: 30px 20px;
font-size: 0.9rem;
}
@media (max-width: 768px) {
body {
padding: 10px;
}
h1 {
font-size: 2rem;
}
main {
padding: 30px 20px;
}
.benchmark-grid {
grid-template-columns: 1fr;
gap: 15px;
}
.benchmark-card {
padding: 20px;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>Polish LLM Benchmarks</h1>
<p class="subtitle">Comprehensive evaluation platforms for Polish language models</p>
</header>
<main>
<section class="section">
<h2>Polish Language Benchmarks</h2>
<div class="benchmark-grid">
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/open_pl_llm_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Open PL LLM Leaderboard
</a>
<span class="polish-tag">Polish</span>
<span class="speakleash-tag">SpeakLeash</span>
<p class="description">Comprehensive leaderboard for Polish language models</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/mt-bench-pl" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Polish MT-Bench
</a>
<span class="polish-tag">Polish</span>
<span class="speakleash-tag">SpeakLeash</span>
<p class="description">Multi-turn conversation benchmark for Polish</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/polish_eq-bench" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Polish EQ-Bench
</a>
<span class="polish-tag">Polish</span>
<span class="speakleash-tag">SpeakLeash</span>
<p class="description">Emotional intelligence benchmark for Polish models</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/cptu_bench" class="benchmark-link" target="_blank" rel="noopener noreferrer">
CPTUB Leaderboard
</a>
<span class="polish-tag">Polish</span>
<span class="speakleash-tag">SpeakLeash</span>
<p class="description">Comprehensive Polish Text Understanding Benchmark</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/polish_medical_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Polish Medical Leaderboard
</a>
<span class="polish-tag">Polish</span>
<span class="speakleash-tag">SpeakLeash</span>
<p class="description">Medical domain benchmark for Polish language models</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/sdadas/plcc" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Polish Linguistic and Cultural Competency Benchmark (PLCC)
</a>
<span class="polish-tag">Polish</span>
<p class="description">Evaluates linguistic and cultural understanding in Polish</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/amu-cai/LLMZSZL_Leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer">
LLMzSzŁ (LLMs Behind the School Desk)
</a>
<span class="polish-tag">Polish</span>
<p class="description">Educational benchmark for Polish language models</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/Polish_Cultural_Vision_Benchmark" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Polish Cultural Vision Benchmark
</a>
<span class="polish-tag">Polish</span>
<span class="speakleash-tag">SpeakLeash</span>
<p class="description">Vision Language Model benchmark for Polish cultural understanding</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/gaius-lex/pl-legal-rag" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Polish Legal RAG Leaderboard
</a>
<span class="polish-tag">Polish</span>
<p class="description">Explore and compare model performance on Polish legal QA tasks</p>
</div>
</div>
</section>
<section class="section">
<h2>International Benchmarks (Bielik Evaluated)</h2>
<div class="benchmark-grid">
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/european_leaderboard_bielik" class="benchmark-link" target="_blank" rel="noopener noreferrer">
European LLM Leaderboard
</a>
<p class="description">Multi-language European language model evaluation</p>
</div>
<div class="benchmark-card">
<a href="https://euroeval.com/leaderboards/Multilingual/european/" class="benchmark-link" target="_blank" rel="noopener noreferrer">
EuroEval
</a>
<p class="description">European multilingual model evaluation platform</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Open LLM Leaderboard
</a>
<p class="description">Original comprehensive LLM evaluation leaderboard</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard/" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Open LLM Leaderboard v2
</a>
<p class="description">Updated version of the Open LLM Leaderboard</p>
</div>
<div class="benchmark-card">
<a href="https://mixeval.github.io/" class="benchmark-link" target="_blank" rel="noopener noreferrer">
MixEval
</a>
<p class="description">Mixed evaluation benchmark for language models</p>
</div>
<div class="benchmark-card">
<a href="https://gorilla.cs.berkeley.edu/leaderboard.html" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Berkeley Function-Calling Leaderboard
</a>
<p class="description">Evaluates function calling capabilities of LLMs</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/speakleash/european_leaderboard_bielik" class="benchmark-link" target="_blank" rel="noopener noreferrer">
FLORES200 Translation Benchmark
</a>
<p class="description">Large-scale multilingual translation evaluation</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/CZLC/BenCzechMark" class="benchmark-link" target="_blank" rel="noopener noreferrer">
BenCzechMark
</a>
<p class="description">Czech language model benchmark suite</p>
</div>
<div class="benchmark-card">
<a href="https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer">
Portuguese Benchmark (Open PT LLM Leaderboard)
</a>
<p class="description">Portuguese language model evaluation platform</p>
</div>
</div>
</section>
</main>
<footer>
<p>&copy; 2024 Polish LLM Benchmarks. All benchmarks are maintained by their respective organizations.</p>
</footer>
</div>
</body>
</html>