Spaces:

intelli-zen
/

OpenGeminiAPI

Sleeping

App Files Files Community

OpenGeminiAPI / examples /api_eval /eval_gemini.py

HoneyTian

update

0ec61d2 about 2 months ago

raw

history blame contribute delete

3.1 kB

	#!/usr/bin/python3
	# -- coding: utf-8 --
	import argparse
	import json

	from openai import OpenAI

	from project_settings import environment, project_path


	def get_args():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"--gemini_api_key",
	default=environment.get(key="GEMINI_API_KEY"),
	type=str
	)
	parser.add_argument(
	"--model_name",
	# default="gemini-2.5-pro",
	default="gemini-2.5-flash",
	type=str
	)
	parser.add_argument(
	"--eval_data",
	default=(project_path / "data/arc-easy.jsonl").as_posix(),
	type=str
	)
	parser.add_argument(
	"--eval_result",
	default=(project_path / "data/eval_math_result.jsonl").as_posix(),
	type=str
	)
	args = parser.parse_args()
	return args


	def main():
	args = get_args()

	client = OpenAI(
	api_key=args.gemini_api_key,
	base_url="https://generativelanguage.googleapis.com/v1beta"
	)

	total = 0
	total_correct = 0
	with open(args.eval_data, "r", encoding="utf-8") as fin, open(args.eval_result, "a+", encoding="utf-8") as fout:
	for row in fin:
	row = json.loads(row)
	idx = row["id"]
	question = row["question"]
	choices = row["choices"]
	answer_key = row["answerkey"]

	instruct = "Complete this single-choice question."

	choices_str = ""
	for choice in choices:
	label = choice["label"]
	text = choice["text"]
	choices_str += f"If you think the answer is `{text}` output: `{label}`\n"

	prompt = f"""
	{instruct}

	Question:
	{question}

	Choices:
	{choices_str}

	Remember to output ONLY the corresponding letter.
	Your output is:
	""".strip()
	# print(prompt)
	response = client.chat.completions.create(
	model="gemini-2.5-pro",
	messages=[{"role": "user", "content": prompt}],
	stream=False,
	# max_tokens=1,
	temperature=0.0,
	# logit_bias={
	# 32: 100,
	# 33: 100,
	# 34: 100,
	# 35: 100,
	# 36: 100,
	# }
	)

	prediction = response.choices[0].message.content

	correct = 1 if prediction == answer_key else 0

	total += 1
	total_correct += correct
	score = total_correct / total

	row_ = {
	"id": idx,
	"question": question,
	"choices": choices,
	"ground_true": answer_key,
	"prediction": prediction,
	"correct": correct,
	"total": total,
	"total_correct": total_correct,
	"score": score,
	}
	row_ = json.dumps(row_, ensure_ascii=False)
	fout.write(f"{row_}\n")

	print(f"score: {score}")

	return


	if __name__ == "__main__":
	main()