Spaces:

launch
/

ExpertLongBench

Running

JieRuan commited on 4 days ago

Commit

fa0bf47

verified ·

1 Parent(s): 8540c68

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -66,6 +66,7 @@ score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"]
 max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
 # one page description
 # st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
 # Build raw HTML table
 cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
@@ -154,6 +155,14 @@ Let’s build better evaluations for expert-level AI — together 🚀🤝
 """
 )
 # # ─── Tabs ──────────────────────────────────────────────────────────────────────
 # tab1, tab2, tab3, tab4 = st.tabs(["🏆 Leaderboard", "🔍 Benchmark Details", "🤖 Submit Your Model", "🧩 Community Contributions Welcome"])

 max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
 # one page description
+st.markdown("## Leaderboard")
 # st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
 # Build raw HTML table
 cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
 """
 )
+## 📚 Citation
+```bibtex
+@article{ruan2025expertlongbench,
+  title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
+  author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
+  journal={arXiv preprint arXiv:2506.01241},
+  year={2025}
+}
 # # ─── Tabs ──────────────────────────────────────────────────────────────────────
 # tab1, tab2, tab3, tab4 = st.tabs(["🏆 Leaderboard", "🔍 Benchmark Details", "🤖 Submit Your Model", "🧩 Community Contributions Welcome"])