#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import json import os from pathlib import Path import sys pwd = os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.join(pwd, "../")) import openai from openai import OpenAI import pandas as pd from project_settings import environment, project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--dataset_dir", default=(project_path / "data/llm-log/eval-llm-dataset/choice").as_posix(), type=str ) parser.add_argument("--ollama_host", default="10.75.27.247", type=str) parser.add_argument("--ollama_port", default=11434, type=int) parser.add_argument("--model_id", # default="qwen2:7b-instruct-fp16", # default="llama3:8b-instruct-fp16", # default="gemma3:4b", default="gemma3:1b", type=str) parser.add_argument("--api_key", default="ollama", type=str) args = parser.parse_args() return args def main(): args = get_args() dataset_dir = Path(args.dataset_dir) base_url = f"http://{args.ollama_host}:{args.ollama_port}/v1" client = OpenAI( api_key=args.api_key, base_url=base_url, ) result = list() count = 0 for sample_dir in dataset_dir.glob("*"): if count >= 1000: break print(f"count: {count}, process: {sample_dir.as_posix()}") count += 1 system_prompt_file = sample_dir / f"system_prompt.txt" user_prompt_file = sample_dir / f"user_prompt.txt" response_file = sample_dir / f"response.txt" with open(system_prompt_file.as_posix(), "r", encoding="utf-8") as f: system_prompt = f.read() with open(user_prompt_file.as_posix(), "r", encoding="utf-8") as f: user_prompt = f.read() with open(response_file.as_posix(), "r", encoding="utf-8") as f: response = f.read() completion = client.chat.completions.create( model=args.model_id, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], temperature=0.8, top_p=0.8 ) js = completion.model_dump_json() js = json.loads(js) predict = js["choices"][0]["message"]["content"] row = { "system_prompt": system_prompt, "user_prompt": user_prompt, "response": response, "predict": predict.strip(), } result.append(row) df = pd.DataFrame(result) df.to_excel("result.xlsx", index=False) return if __name__ == "__main__": main()