Spaces:
Running
Running
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +12 -14
src/streamlit_app.py
CHANGED
@@ -54,8 +54,8 @@ st.markdown(
|
|
54 |
def load_data(path):
|
55 |
df = pd.read_json(path, lines=True)
|
56 |
score_cols = [f"T{i}" for i in range(1, 12)]
|
57 |
-
|
58 |
-
df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
|
59 |
# Compute rank per column (1 = best)
|
60 |
for col in score_cols + ["Avg"]:
|
61 |
df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
|
@@ -199,18 +199,16 @@ Letβs build better evaluations for expert-level AI β together ππ€
|
|
199 |
"""
|
200 |
)
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
# """
|
213 |
-
# )
|
214 |
|
215 |
# # βββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
216 |
# tab1, tab2, tab3, tab4 = st.tabs(["π Leaderboard", "π Benchmark Details", "π€ Submit Your Model", "π§© Community Contributions Welcome"])
|
|
|
54 |
def load_data(path):
|
55 |
df = pd.read_json(path, lines=True)
|
56 |
score_cols = [f"T{i}" for i in range(1, 12)]
|
57 |
+
df["Avg"] = df[score_cols].mean(axis=1).round(1)
|
58 |
+
# df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
|
59 |
# Compute rank per column (1 = best)
|
60 |
for col in score_cols + ["Avg"]:
|
61 |
df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
|
|
|
199 |
"""
|
200 |
)
|
201 |
|
202 |
+
st.markdown("## π Citation")
|
203 |
+
st.write("""
|
204 |
+
```bibtex
|
205 |
+
@article{ruan2025expertlongbench,
|
206 |
+
title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
|
207 |
+
author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
|
208 |
+
journal={arXiv preprint arXiv:2506.01241},
|
209 |
+
year={2025}
|
210 |
+
}
|
211 |
+
""")
|
|
|
|
|
212 |
|
213 |
# # βββ Tabs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
214 |
# tab1, tab2, tab3, tab4 = st.tabs(["π Leaderboard", "π Benchmark Details", "π€ Submit Your Model", "π§© Community Contributions Welcome"])
|