|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>Polish LLM Benchmarks</title> |
|
<style> |
|
* { |
|
margin: 0; |
|
padding: 0; |
|
box-sizing: border-box; |
|
} |
|
|
|
body { |
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
line-height: 1.6; |
|
color: #333; |
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
min-height: 100vh; |
|
padding: 20px; |
|
} |
|
|
|
.container { |
|
max-width: 1200px; |
|
margin: 0 auto; |
|
background: white; |
|
border-radius: 20px; |
|
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1); |
|
overflow: hidden; |
|
} |
|
|
|
header { |
|
background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%); |
|
color: white; |
|
text-align: center; |
|
padding: 60px 20px; |
|
} |
|
|
|
h1 { |
|
font-size: 3rem; |
|
font-weight: 700; |
|
margin-bottom: 10px; |
|
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3); |
|
} |
|
|
|
.subtitle { |
|
font-size: 1.2rem; |
|
opacity: 0.9; |
|
font-weight: 300; |
|
} |
|
|
|
main { |
|
padding: 60px 40px; |
|
} |
|
|
|
.section { |
|
margin-bottom: 50px; |
|
} |
|
|
|
.section:last-child { |
|
margin-bottom: 0; |
|
} |
|
|
|
h2 { |
|
font-size: 2rem; |
|
color: #2c3e50; |
|
margin-bottom: 30px; |
|
text-align: center; |
|
position: relative; |
|
padding-bottom: 15px; |
|
} |
|
|
|
h2::after { |
|
content: ''; |
|
position: absolute; |
|
bottom: 0; |
|
left: 50%; |
|
transform: translateX(-50%); |
|
width: 60px; |
|
height: 3px; |
|
background: linear-gradient(135deg, #667eea, #764ba2); |
|
border-radius: 3px; |
|
} |
|
|
|
.benchmark-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); |
|
gap: 20px; |
|
margin-top: 30px; |
|
} |
|
|
|
.benchmark-card { |
|
background: #f8f9fa; |
|
border-radius: 15px; |
|
padding: 25px; |
|
transition: all 0.3s ease; |
|
border: 2px solid transparent; |
|
position: relative; |
|
overflow: hidden; |
|
} |
|
|
|
.benchmark-card::before { |
|
content: ''; |
|
position: absolute; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 4px; |
|
background: linear-gradient(135deg, #667eea, #764ba2); |
|
} |
|
|
|
.benchmark-card:hover { |
|
transform: translateY(-5px); |
|
box-shadow: 0 15px 30px rgba(0, 0, 0, 0.1); |
|
border-color: #667eea; |
|
} |
|
|
|
.benchmark-link { |
|
text-decoration: none; |
|
color: #2c3e50; |
|
font-weight: 600; |
|
font-size: 1.1rem; |
|
display: block; |
|
transition: color 0.3s ease; |
|
} |
|
|
|
.benchmark-link:hover { |
|
color: #667eea; |
|
} |
|
|
|
.polish-tag { |
|
display: inline-block; |
|
background: linear-gradient(135deg, #e74c3c, #c0392b); |
|
color: white; |
|
padding: 4px 12px; |
|
border-radius: 20px; |
|
font-size: 0.8rem; |
|
font-weight: 600; |
|
margin-top: 10px; |
|
text-transform: uppercase; |
|
letter-spacing: 0.5px; |
|
margin-right: 8px; |
|
} |
|
|
|
.speakleash-tag { |
|
display: inline-block; |
|
background: linear-gradient(135deg, #3498db, #2980b9); |
|
color: white; |
|
padding: 4px 12px; |
|
border-radius: 20px; |
|
font-size: 0.8rem; |
|
font-weight: 600; |
|
margin-top: 10px; |
|
text-transform: uppercase; |
|
letter-spacing: 0.5px; |
|
} |
|
|
|
.description { |
|
color: #666; |
|
font-size: 0.9rem; |
|
margin-top: 8px; |
|
font-style: italic; |
|
} |
|
|
|
footer { |
|
background: #2c3e50; |
|
color: white; |
|
text-align: center; |
|
padding: 30px 20px; |
|
font-size: 0.9rem; |
|
} |
|
|
|
@media (max-width: 768px) { |
|
body { |
|
padding: 10px; |
|
} |
|
|
|
h1 { |
|
font-size: 2rem; |
|
} |
|
|
|
main { |
|
padding: 30px 20px; |
|
} |
|
|
|
.benchmark-grid { |
|
grid-template-columns: 1fr; |
|
gap: 15px; |
|
} |
|
|
|
.benchmark-card { |
|
padding: 20px; |
|
} |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<header> |
|
<h1>Polish LLM Benchmarks</h1> |
|
<p class="subtitle">Comprehensive evaluation platforms for Polish language models</p> |
|
</header> |
|
|
|
<main> |
|
<section class="section"> |
|
<h2>Polish Language Benchmarks</h2> |
|
<div class="benchmark-grid"> |
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/open_pl_llm_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Open PL LLM Leaderboard |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<span class="speakleash-tag">SpeakLeash</span> |
|
<p class="description">Comprehensive leaderboard for Polish language models</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/mt-bench-pl" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Polish MT-Bench |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<span class="speakleash-tag">SpeakLeash</span> |
|
<p class="description">Multi-turn conversation benchmark for Polish</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/polish_eq-bench" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Polish EQ-Bench |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<span class="speakleash-tag">SpeakLeash</span> |
|
<p class="description">Emotional intelligence benchmark for Polish models</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/cptu_bench" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
CPTUB Leaderboard |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<span class="speakleash-tag">SpeakLeash</span> |
|
<p class="description">Comprehensive Polish Text Understanding Benchmark</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/polish_medical_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Polish Medical Leaderboard |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<span class="speakleash-tag">SpeakLeash</span> |
|
<p class="description">Medical domain benchmark for Polish language models</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/sdadas/plcc" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Polish Linguistic and Cultural Competency Benchmark (PLCC) |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<p class="description">Evaluates linguistic and cultural understanding in Polish</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/amu-cai/LLMZSZL_Leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
LLMzSzŁ (LLMs Behind the School Desk) |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<p class="description">Educational benchmark for Polish language models</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/Polish_Cultural_Vision_Benchmark" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Polish Cultural Vision Benchmark |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<span class="speakleash-tag">SpeakLeash</span> |
|
<p class="description">Vision Language Model benchmark for Polish cultural understanding</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/gaius-lex/pl-legal-rag" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Polish Legal RAG Leaderboard |
|
</a> |
|
<span class="polish-tag">Polish</span> |
|
<p class="description">Explore and compare model performance on Polish legal QA tasks</p> |
|
</div> |
|
</div> |
|
</section> |
|
|
|
<section class="section"> |
|
<h2>International Benchmarks (Bielik Evaluated)</h2> |
|
<div class="benchmark-grid"> |
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/european_leaderboard_bielik" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
European LLM Leaderboard |
|
</a> |
|
<p class="description">Multi-language European language model evaluation</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://euroeval.com/leaderboards/Multilingual/european/" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
EuroEval |
|
</a> |
|
<p class="description">European multilingual model evaluation platform</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Open LLM Leaderboard |
|
</a> |
|
<p class="description">Original comprehensive LLM evaluation leaderboard</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard/" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Open LLM Leaderboard v2 |
|
</a> |
|
<p class="description">Updated version of the Open LLM Leaderboard</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://mixeval.github.io/" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
MixEval |
|
</a> |
|
<p class="description">Mixed evaluation benchmark for language models</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://gorilla.cs.berkeley.edu/leaderboard.html" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Berkeley Function-Calling Leaderboard |
|
</a> |
|
<p class="description">Evaluates function calling capabilities of LLMs</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/speakleash/european_leaderboard_bielik" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
FLORES200 Translation Benchmark |
|
</a> |
|
<p class="description">Large-scale multilingual translation evaluation</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/CZLC/BenCzechMark" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
BenCzechMark |
|
</a> |
|
<p class="description">Czech language model benchmark suite</p> |
|
</div> |
|
|
|
<div class="benchmark-card"> |
|
<a href="https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard" class="benchmark-link" target="_blank" rel="noopener noreferrer"> |
|
Portuguese Benchmark (Open PT LLM Leaderboard) |
|
</a> |
|
<p class="description">Portuguese language model evaluation platform</p> |
|
</div> |
|
</div> |
|
</section> |
|
</main> |
|
|
|
<footer> |
|
<p>© 2024 Polish LLM Benchmarks. All benchmarks are maintained by their respective organizations.</p> |
|
</footer> |
|
</div> |
|
</body> |
|
</html> |