Spaces:

nvidia
/

lotus-vlm-bias-leaderboard

Running

App Files Files Community

lotus-vlm-bias-leaderboard / src /about.py

huckiyang

[rank] adding rank

88d5920 5 days ago

raw

history blame contribute delete

2.76 kB

	from dataclasses import dataclass
	from enum import Enum

	@dataclass
	class Task:
	benchmark: str
	metric: str
	col_name: str


	# Select your tasks here
	# ---------------------------------------------------
	class Tasks(Enum):
	# task_key in the json file, metric_key in the json file, name to display in the leaderboard
	task0 = Task("anli_r1", "acc", "ANLI")
	task1 = Task("logiqa", "acc_norm", "LogiQA")

	NUM_FEWSHOT = 0 # Change with your few shot
	# ---------------------------------------------------



	# Your leaderboard name
	TITLE = """
	<div align="center">
	<h1>🪷 LOTUS: Detailed LVLM Evaluation from Quality to Societal Bias</h1>
	</div>
	"""

	# What does your leaderboard evaluate?
	INTRODUCTION_TEXT = """

	We introduce LOTUS, a leaderboard for evaluating detailed captions, addressing three main gaps in existing evaluations: lack of standardized criteria, bias-aware assessments, and user preference considerations.
	LOTUS comprehensively evaluates various aspects, including caption quality (e.g., alignment, descriptiveness), risks (e.g., hallucination), and societal biases (e.g., gender bias) while enabling preference-oriented evaluations by tailoring criteria to diverse user preferences.

	"""

	# Which evaluations are you running? how can people reproduce what you have?
	LLM_BENCHMARKS_TEXT = """
	Details about the LLM benchmarks will go here.
	"""

	EVALUATION_QUEUE_TEXT = """
	## Evaluation Queue and Submission

	Models are evaluated on the private test set of COCO Captions, and results are protected from misuse.

	To submit your model for evaluation, please follow these steps:

	1. Prepare your model's predictions: Ensure your model's predictions are in the same format as the COCO Captions [dataset](https://cocodataset.org/#format-data).
	2. Submit via our form: Fill out the [submission form](https://forms.gle/your_form_link_here) with your model details and prediction file.
	3. Wait for results: The evaluation may take some time. Results will be updated on the leaderboard.

	For any issues or questions, please open an issue on our [GitHub repository](https://github.com/lotus-benchmark/lotus).
	"""

	CITATION_BUTTON_LABEL = "BibTeX" # Keep this label or change if needed

	CITATION_BUTTON_TEXT = """
	@inproceedings{hirota2025lotus,
	author = {Yusuke Hirota and Boyi Li and Ryo Hachiuma and Yueh-Hua Wu and Boris Ivanovic and Yuta Nakashima and Marco Pavone and Yejin Choi and Yu-Chiang Frank Wang and Huck Yang},
	title = {LOTUS: {A} Leaderboard for Detailed Image Captioning from Quality to Societal Bias and User Preferences},
	booktitle = {Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (ACL)},
	year = {2025}
	}
	"""