JieRuan commited on
Commit
fa0bf47
Β·
verified Β·
1 Parent(s): 8540c68

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +9 -0
src/streamlit_app.py CHANGED
@@ -66,6 +66,7 @@ score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"]
66
  max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
67
 
68
  # one page description
 
69
  # st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
70
  # Build raw HTML table
71
  cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
@@ -154,6 +155,14 @@ Let’s build better evaluations for expert-level AI β€” together πŸš€πŸ€
154
  """
155
  )
156
 
 
 
 
 
 
 
 
 
157
 
158
  # # ─── Tabs ──────────────────────────────────────────────────────────────────────
159
  # tab1, tab2, tab3, tab4 = st.tabs(["πŸ† Leaderboard", "πŸ” Benchmark Details", "πŸ€– Submit Your Model", "🧩 Community Contributions Welcome"])
 
66
  max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}
67
 
68
  # one page description
69
+ st.markdown("## Leaderboard")
70
  # st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.")
71
  # Build raw HTML table
72
  cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
 
155
  """
156
  )
157
 
158
+ ## πŸ“š Citation
159
+ ```bibtex
160
+ @article{ruan2025expertlongbench,
161
+ title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
162
+ author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
163
+ journal={arXiv preprint arXiv:2506.01241},
164
+ year={2025}
165
+ }
166
 
167
  # # ─── Tabs ──────────────────────────────────────────────────────────────────────
168
  # tab1, tab2, tab3, tab4 = st.tabs(["πŸ† Leaderboard", "πŸ” Benchmark Details", "πŸ€– Submit Your Model", "🧩 Community Contributions Welcome"])