Spaces:

launch
/

ExpertLongBench

Running

JieRuan commited on 2 days ago

Commit

e78a002

verified ·

1 Parent(s): 75b257e

Update src/streamlit_app.py

Files changed (1) hide show

src/streamlit_app.py CHANGED Viewed

@@ -54,8 +54,8 @@ st.markdown(
 def load_data(path):
     df = pd.read_json(path, lines=True)
     score_cols = [f"T{i}" for i in range(1, 12)]
-    # df["Avg"] = df[score_cols].mean(axis=1).round(1)
-    df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
     # Compute rank per column (1 = best)
     for col in score_cols + ["Avg"]:
         df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
@@ -199,18 +199,16 @@ Let’s build better evaluations for expert-level AI — together 🚀🤝
 """
 )
-# st.markdown(## 📚 Citation)
-# st.write(
-# """
-# ```bibtex
-# @article{ruan2025expertlongbench,
-#   title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
-#   author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
-#   journal={arXiv preprint arXiv:2506.01241},
-#   year={2025}
-# }
-# """
-# )
 # # ─── Tabs ──────────────────────────────────────────────────────────────────────
 # tab1, tab2, tab3, tab4 = st.tabs(["🏆 Leaderboard", "🔍 Benchmark Details", "🤖 Submit Your Model", "🧩 Community Contributions Welcome"])

 def load_data(path):
     df = pd.read_json(path, lines=True)
     score_cols = [f"T{i}" for i in range(1, 12)]
+    df["Avg"] = df[score_cols].mean(axis=1).round(1)
+    # df["Avg"] = np.ceil(df[score_cols].mean(axis=1) * 10) / 10
     # Compute rank per column (1 = best)
     for col in score_cols + ["Avg"]:
         df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
 """
 )
+st.markdown("## 📚 Citation")
+st.write("""
+```bibtex
+@article{ruan2025expertlongbench,
+  title={ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation Tasks with Structured Checklists},
+  author={Ruan, Jie and Nair, Inderjeet and Cao, Shuyang and Liu, Amy and Munir, Sheza and Pollens-Dempsey, Micah and Chiang, Tiffany and Kates, Lucy and David, Nicholas and Chen, Sihan and others},
+  journal={arXiv preprint arXiv:2506.01241},
+  year={2025}
+}
+""")
 # # ─── Tabs ──────────────────────────────────────────────────────────────────────
 # tab1, tab2, tab3, tab4 = st.tabs(["🏆 Leaderboard", "🔍 Benchmark Details", "🤖 Submit Your Model", "🧩 Community Contributions Welcome"])