HoneyTian's picture
first commit
4464055
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import json
import os
from pathlib import Path
import sys
pwd = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(pwd, "../"))
import openai
from openai import OpenAI
import pandas as pd
from project_settings import environment, project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset_dir",
default=(project_path / "data/llm-log/eval-llm-dataset/choice").as_posix(),
type=str
)
parser.add_argument("--ollama_host", default="10.75.27.247", type=str)
parser.add_argument("--ollama_port", default=11434, type=int)
parser.add_argument("--model_id",
# default="qwen2:7b-instruct-fp16",
# default="llama3:8b-instruct-fp16",
# default="gemma3:4b",
default="gemma3:1b",
type=str)
parser.add_argument("--api_key", default="ollama", type=str)
args = parser.parse_args()
return args
def main():
args = get_args()
dataset_dir = Path(args.dataset_dir)
base_url = f"http://{args.ollama_host}:{args.ollama_port}/v1"
client = OpenAI(
api_key=args.api_key,
base_url=base_url,
)
result = list()
count = 0
for sample_dir in dataset_dir.glob("*"):
if count >= 1000:
break
print(f"count: {count}, process: {sample_dir.as_posix()}")
count += 1
system_prompt_file = sample_dir / f"system_prompt.txt"
user_prompt_file = sample_dir / f"user_prompt.txt"
response_file = sample_dir / f"response.txt"
with open(system_prompt_file.as_posix(), "r", encoding="utf-8") as f:
system_prompt = f.read()
with open(user_prompt_file.as_posix(), "r", encoding="utf-8") as f:
user_prompt = f.read()
with open(response_file.as_posix(), "r", encoding="utf-8") as f:
response = f.read()
completion = client.chat.completions.create(
model=args.model_id,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.8,
top_p=0.8
)
js = completion.model_dump_json()
js = json.loads(js)
predict = js["choices"][0]["message"]["content"]
row = {
"system_prompt": system_prompt,
"user_prompt": user_prompt,
"response": response,
"predict": predict.strip(),
}
result.append(row)
df = pd.DataFrame(result)
df.to_excel("result.xlsx", index=False)
return
if __name__ == "__main__":
main()