Spaces:

intelli-zen
/

OpenGeminiAPI

Sleeping

File size: 4,850 Bytes

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import json
import os
import sys
import time

pwd = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(pwd, "../../"))

from google import genai
from google.genai import types

from project_settings import environment, project_path


def get_args():
    """
    python3 eval_gemini_google.py --model_name gemini-2.5-pro --eval_result eval_math_result_gemini-2.5-pro.jsonl
    python3 eval_gemini_google.py --model_name gemini-2.5-flash --eval_result eval_math_result_gemini-2.5-flash.jsonl
    python3 eval_gemini_google.py --model_name gemini-2.5-flash-lite-preview-06-17 --eval_result eval_math_result_gemini-2.5-flash-lite-preview-06-17.jsonl
    :return:
    """
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--google_application_credentials",
        default=(project_path / "dotenv/potent-veld-462405-t3-8091a29b2894.json").as_posix(),
        type=str
    )
    parser.add_argument(
        "--model_name",
        # default="gemini-2.5-pro",
        # default="gemini-2.5-flash",
        default="gemini-2.5-flash-lite-preview-06-17",
        type=str
    )
    parser.add_argument(
        "--eval_data",
        default=(project_path / "data/arc-easy.jsonl").as_posix(),
        type=str
    )
    parser.add_argument(
        "--eval_result",
        default=(project_path / "data/eval_math_result.jsonl").as_posix(),
        type=str
    )
    args = parser.parse_args()
    return args


def main():
    args = get_args()

    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = args.google_application_credentials

    client = genai.Client(
        vertexai=True,
        project="potent-veld-462405-t3",
        location="global",
    )
    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        max_output_tokens=8192,
        response_modalities=["TEXT"],
    )

    total = 0
    total_correct = 0

    # finished
    finished_idx_set = set()
    if os.path.exists(args.eval_result):
        with open(args.eval_result, "r", encoding="utf-8") as f:
            for row in f:
                row = json.loads(row)
                idx = row["id"]
                total = row["total"]
                total_correct = row["total_correct"]
                finished_idx_set.add(idx)
    print(f"finished count: {len(finished_idx_set)}")

    with open(args.eval_data, "r", encoding="utf-8") as fin, open(args.eval_result, "a+", encoding="utf-8") as fout:
        for row in fin:
            if total > 20:
                break

            row = json.loads(row)
            idx = row["id"]
            question = row["question"]
            choices = row["choices"]
            answer_key = row["answerkey"]

            if idx in finished_idx_set:
                continue
            finished_idx_set.add(idx)

            instruct = "Complete this single-choice question."

            choices_str = ""
            for choice in choices:
                label = choice["label"]
                text = choice["text"]
                choices_str += f"If you think the answer is `{text}` output: `{label}`\n"

            prompt = f"""
{instruct}

Question: 
{question}

Choices: 
{choices_str}

Remember to output ONLY the corresponding letter.
Your output is:
            """.strip()
            # print(prompt)

            contents = [
                types.Content(
                    role="user",
                    parts=[
                        types.Part.from_text(text=prompt)
                    ]
                )
            ]
            time_begin = time.time()
            response: types.GenerateContentResponse = client.models.generate_content(
                model=args.model_name,
                contents=contents,
                config=generate_content_config,
            )
            time_cost = time.time() - time_begin
            print(time_cost)
            try:
                prediction = response.candidates[0].content.parts[0].text
            except TypeError:
                continue
            correct = 1 if prediction == answer_key else 0

            total += 1
            total_correct += correct
            score = total_correct / total

            row_ = {
                "id": idx,
                "question": question,
                "choices": choices,
                "ground_true": answer_key,
                "prediction": prediction,
                "correct": correct,
                "total": total,
                "total_correct": total_correct,
                "score": score,
                "time_cost": time_cost,
            }
            row_ = json.dumps(row_, ensure_ascii=False)
            fout.write(f"{row_}\n")

            # print(f"score: {score}")

    return


if __name__ == "__main__":
    main()