#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse from datetime import datetime import json import os from pathlib import Path import re import sys import time import tempfile from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装 pwd = os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.join(pwd, "../")) from google import genai from google.genai import types from project_settings import environment, project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--model_name", # default="gemini-2.5-pro", # The model does not support setting thinking_budget to 0. default="gemini-2.5-flash", # default="gemini-2.5-flash-lite-preview-06-17", # default="llama-4-maverick-17b-128e-instruct-maas", # default="llama-4-scout-17b-16e-instruct-maas", type=str ) parser.add_argument( "--eval_dataset_name", # default="agent-lingoace-zh-80-chat.jsonl", # default="agent-bingoplus-ph-200-chat.jsonl", default="agent-cod-zh-70-chat.jsonl", type=str ) parser.add_argument( "--eval_dataset_dir", default=(project_path / "data/dataset").as_posix(), type=str ) parser.add_argument( "--eval_data_dir", default=(project_path / "data/eval_data").as_posix(), type=str ) parser.add_argument( "--client", default="shenzhen_sase", type=str ) parser.add_argument( "--service", default="google_potent_veld_462405_t3", type=str ) parser.add_argument( "--create_time_str", default="null", # default="20250731_162116", type=str ) parser.add_argument( "--interval", default=1, type=int ) args = parser.parse_args() return args def main(): args = get_args() service = environment.get(args.service, dtype=json.loads) project_id = service["project_id"] google_application_credentials = Path(tempfile.gettempdir()) / f"llm_eval_system/{project_id}.json" google_application_credentials.parent.mkdir(parents=True, exist_ok=True) with open(google_application_credentials.as_posix(), "w", encoding="utf-8") as f: content = json.dumps(service, ensure_ascii=False, indent=4) f.write(f"{content}\n") os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = google_application_credentials.as_posix() eval_dataset_dir = Path(args.eval_dataset_dir) eval_dataset_dir.mkdir(parents=True, exist_ok=True) eval_data_dir = Path(args.eval_data_dir) eval_data_dir.mkdir(parents=True, exist_ok=True) if args.create_time_str == "null": tz = ZoneInfo("Asia/Shanghai") now = datetime.now(tz) create_time_str = now.strftime("%Y%m%d_%H%M%S") # create_time_str = "20250729-interval-5" else: create_time_str = args.create_time_str eval_dataset = eval_dataset_dir / args.eval_dataset_name output_file = eval_data_dir / f"gemini_google/google/{args.model_name}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}.raw" output_file.parent.mkdir(parents=True, exist_ok=True) client = genai.Client( vertexai=True, project=project_id, # location="global", location="us-east5", ) generate_content_config = types.GenerateContentConfig( top_p=0.95, temperature=0.6, # max_output_tokens=1, response_modalities=["TEXT"], thinking_config=types.ThinkingConfig( thinking_budget=0 ) ) total = 0 # finished finished_idx_set = set() if os.path.exists(output_file.as_posix()): with open(output_file.as_posix(), "r", encoding="utf-8") as f: for row in f: row = json.loads(row) idx = row["idx"] total = row["total"] finished_idx_set.add(idx) print(f"finished count: {len(finished_idx_set)}") with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout: for row in fin: row = json.loads(row) idx = row["idx"] prompt = row["prompt"] response = row["response"] if idx in finished_idx_set: continue finished_idx_set.add(idx) # prompt splits = prompt[::-1].split("\n\n", maxsplit=1) conversation = splits[0] system_prompt = splits[1] conversation = conversation[::-1].strip() system_prompt = system_prompt[::-1].strip() pattern = "^(Client|Assistant): (.*?)(?=\n(?:Client|Assistant):)" match = re.findall(pattern=pattern, string=conversation, flags=re.I|re.DOTALL|re.MULTILINE) messages_ = list() for m in match: role = m[0].lower() content = m[1] if role in ("client", "Client"): role = "user" elif role in ("assistant", "Assistant"): role = "assistant" else: raise AssertionError messages_.append({ "role": role, "content": content }) messages = [ {"role": "system", "content": system_prompt}, *messages_ ] # print(json.dumps(messages, ensure_ascii=False, indent=4)) # exit(0) contents = [ types.Content( role="user" if m["role"] == "user" else "model", parts=[ types.Part.from_text(text=m["content"]) ] ) for m in messages ] time.sleep(args.interval) print(f"sleep: {args.interval}") time_begin = time.time() llm_response: types.GenerateContentResponse = client.models.generate_content( model=args.model_name, contents=contents, config=generate_content_config, ) time_cost = time.time() - time_begin print(f"time_cost: {time_cost}") try: prediction = llm_response.candidates[0].content.parts[0].text except TypeError as e: print(f"request failed, error type: {type(e)}, error text: {str(e)}") continue total += 1 row_ = { "idx": idx, "prompt": prompt, "response": response, "prediction": prediction, "total": total, "time_cost": time_cost, } row_ = json.dumps(row_, ensure_ascii=False) fout.write(f"{row_}\n") fout.flush() return if __name__ == "__main__": main()