JieRuan commited on
Commit
e78a002
Β·
verified Β·
1 Parent(s): 75b257e

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +12 -14
src/streamlit_app.py CHANGED
@@ -54,8 +54,8 @@ st.markdown(
54
  def load_data(path):
55
  df = pd.read_json(path, lines=True)
56
  score_cols = [f"T{i}" for i in range(1, 12)]
57
- # df["Avg"] = df[score_cols].mean(axis=1).round(1)
58
- df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
59
  # Compute rank per column (1 = best)
60
  for col in score_cols + ["Avg"]:
61
  df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
@@ -199,18 +199,16 @@ Let’s build better evaluations for expert-level AI β€” together πŸš€πŸ€
199
  """
200
  )
201
 
202
- # st.markdown(## πŸ“š Citation)
203
- # st.write(
204
- # """
205
- # ```bibtex
206
- # @article{ruan2025expertlongbench,
207
- # title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
208
- # author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
209
- # journal={arXiv preprint arXiv:2506.01241},
210
- # year={2025}
211
- # }
212
- # """
213
- # )
214
 
215
  # # ─── Tabs ──────────────────────────────────────────────────────────────────────
216
  # tab1, tab2, tab3, tab4 = st.tabs(["πŸ† Leaderboard", "πŸ” Benchmark Details", "πŸ€– Submit Your Model", "🧩 Community Contributions Welcome"])
 
54
  def load_data(path):
55
  df = pd.read_json(path, lines=True)
56
  score_cols = [f"T{i}" for i in range(1, 12)]
57
+ df["Avg"] = df[score_cols].mean(axis=1).round(1)
58
+ # df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
59
  # Compute rank per column (1 = best)
60
  for col in score_cols + ["Avg"]:
61
  df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
 
199
  """
200
  )
201
 
202
+ st.markdown("## πŸ“š Citation")
203
+ st.write("""
204
+ ```bibtex
205
+ @article{ruan2025expertlongbench,
206
+ title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
207
+ author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
208
+ journal={arXiv preprint arXiv:2506.01241},
209
+ year={2025}
210
+ }
211
+ """)
 
 
212
 
213
  # # ─── Tabs ──────────────────────────────────────────────────────────────────────
214
  # tab1, tab2, tab3, tab4 = st.tabs(["πŸ† Leaderboard", "πŸ” Benchmark Details", "πŸ€– Submit Your Model", "🧩 Community Contributions Welcome"])