llm_eval_system / llm_eval_script /siliconflow_chat.py
HoneyTian's picture
update
adb1e77
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
https://cloud.siliconflow.cn/sft-d1rosn8o8n4s73ftpa1g/playground/chat/17885302852
https://docs.siliconflow.cn/cn/userguide/capabilities/reasoning
Model Name:
Pro/deepseek-ai/DeepSeek-R1
Tips:
(1)thinking_budget: Must be greater than or equal to 1
(2)The selected model requires paid balance. Your paid balance is insufficient. Please top up and try again.
Model Name:
tencent/Hunyuan-A13B-Instruct
Tips:
(1)它在回答时总是会先思考,最后给出答案.这适合知识问答,但不符合我们Agent的需求. 后来强制其只能输出 A-E 中的一个字符(max_tokens=4),以完成评估.
max_tokens=4,
logit_bias={
32: 100,
33: 100,
34: 100,
35: 100,
36: 100,
37: 100,
},
Model Name:
deepseek-ai/DeepSeek-R1
Tips:
(1)为了让它只输出一个字符,设置 max_tokens=3
Model Name:
Qwen/Qwen3-8B
deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
Tips:
(1)为了让它只输出一个字符,设置 max_tokens=1
Model Name:
baidu/ERNIE-4.5-300B-A47B
Tips:
(1)它可能使用的是bpe 分词, logit_bias 注释掉。
"""
import argparse
from datetime import datetime
import json
import os
from pathlib import Path
import sys
import time
from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
pwd = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(pwd, "../"))
from openai import OpenAI
from project_settings import environment, project_path
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name",
# default="Pro/deepseek-ai/DeepSeek-R1",
# default="tencent/Hunyuan-A13B-Instruct",
default="deepseek-ai/DeepSeek-V3",
# default="Qwen/Qwen3-8B",
# default="deepseek-ai/DeepSeek-R1",
# default="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
# default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
# default="baidu/ERNIE-4.5-300B-A47B",
type=str
)
parser.add_argument(
"--eval_dataset_name",
default="agent-bingoplus-ph-200-chat.jsonl",
# default="agent-lingoace-zh-80-chat.jsonl",
type=str
)
parser.add_argument(
"--eval_dataset_dir",
default=(project_path / "data/dataset").as_posix(),
type=str
)
parser.add_argument(
"--eval_data_dir",
default=(project_path / "data/eval_data").as_posix(),
type=str
)
parser.add_argument(
"--client",
default="shenzhen_sase",
type=str
)
parser.add_argument(
"--service",
default="siliconflow_api_key",
type=str
)
parser.add_argument(
"--create_time_str",
default="null",
# default="20250728_113641",
type=str
)
parser.add_argument(
"--interval",
default=1,
type=int
)
args = parser.parse_args()
return args
def main():
args = get_args()
eval_dataset_dir = Path(args.eval_dataset_dir)
eval_dataset_dir.mkdir(parents=True, exist_ok=True)
eval_data_dir = Path(args.eval_data_dir)
eval_data_dir.mkdir(parents=True, exist_ok=True)
if args.create_time_str == "null":
tz = ZoneInfo("Asia/Shanghai")
now = datetime.now(tz)
create_time_str = now.strftime("%Y%m%d_%H%M%S")
# create_time_str = "20250724_090615"
else:
create_time_str = args.create_time_str
eval_dataset = eval_dataset_dir / args.eval_dataset_name
model_name_ = args.model_name.replace("/", "#")
output_file = eval_data_dir / f"siliconflow/siliconflow/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}.raw"
output_file.parent.mkdir(parents=True, exist_ok=True)
api_key = environment.get(args.service, dtype=str)
client = OpenAI(
base_url="https://api.siliconflow.cn/v1/",
# Read your Ark API Key from the environment variable.
api_key=api_key
)
total = 0
# finished
finished_idx_set = set()
if os.path.exists(output_file.as_posix()):
with open(output_file.as_posix(), "r", encoding="utf-8") as f:
for row in f:
row = json.loads(row)
idx = row["idx"]
total = row["total"]
finished_idx_set.add(idx)
print(f"finished count: {len(finished_idx_set)}")
with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout:
for row in fin:
row = json.loads(row)
idx = row["idx"]
prompt = row["prompt"]
response = row["response"]
if idx in finished_idx_set:
continue
finished_idx_set.add(idx)
try:
time.sleep(args.interval)
print(f"sleep: {args.interval}")
time_begin = time.time()
completion = client.chat.completions.create(
model=args.model_name,
messages=[
{"role": "user", "content": prompt},
],
stream=False,
max_tokens=4096,
# max_tokens=1,
temperature=0.6,
top_p=0.95,
# logit_bias={
# 32: 100,
# 33: 100,
# 34: 100,
# 35: 100,
# 36: 100,
# 37: 100,
# 38: 100,
# 39: 100,
# },
extra_body={
"thinking_budget": 1
}
)
time_cost = time.time() - time_begin
print(f"time_cost: {time_cost}")
except Exception as e:
print(f"request failed, error type: {type(e)}, error text: {str(e)}")
continue
prediction = completion.choices[0].message.content
total += 1
row_ = {
"idx": idx,
"prompt": prompt,
"response": response,
"prediction": prediction,
"total": total,
"time_cost": time_cost,
}
row_ = json.dumps(row_, ensure_ascii=False)
fout.write(f"{row_}\n")
fout.flush()
return
if __name__ == "__main__":
main()