Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +9 -0
src/streamlit_app.py
CHANGED
@@ -66,6 +66,7 @@ score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"]
|
|
66 |
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
|
67 |
|
68 |
# one page description
|
|
|
69 |
# st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
|
70 |
# Build raw HTML table
|
71 |
cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
|
@@ -154,6 +155,14 @@ Letβs build better evaluations for expert-level AI β together ππ€
|
|
154 |
"""
|
155 |
)
|
156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
# # βββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
159 |
# tab1, tab2, tab3, tab4 = st.tabs(["π Leaderboard", "π Benchmark Details", "π€ Submit Your Model", "π§© Community Contributions Welcome"])
|
|
|
66 |
max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
|
67 |
|
68 |
# one page description
|
69 |
+
st.markdown("## Leaderboard")
|
70 |
# st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
|
71 |
# Build raw HTML table
|
72 |
cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
|
|
|
155 |
"""
|
156 |
)
|
157 |
|
158 |
+
## π Citation
|
159 |
+
```bibtex
|
160 |
+
@article{ruan2025expertlongbench,
|
161 |
+
title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
|
162 |
+
author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
|
163 |
+
journal={arXiv preprint arXiv:2506.01241},
|
164 |
+
year={2025}
|
165 |
+
}
|
166 |
|
167 |
# # βββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
168 |
# tab1, tab2, tab3, tab4 = st.tabs(["π Leaderboard", "π Benchmark Details", "π€ Submit Your Model", "π§© Community Contributions Welcome"])
|