OraCatQAQ commited on
Commit
f48a33c
·
1 Parent(s): e40cdb9

Add reasoning_content field to ChatMessage model and update response handling in generate_openai_response and stream_openai_response functions to include reasoning content. Enhance create_chat_completion to accumulate and return reasoning content in responses.

Browse files
Files changed (1) hide show
  1. app.py +56 -18
app.py CHANGED
@@ -103,6 +103,7 @@ class ChatMessage(BaseModel):
103
  role: str
104
  content: str
105
  name: Optional[str] = None
 
106
 
107
  class ChatCompletionRequest(BaseModel):
108
  model: str
@@ -217,10 +218,10 @@ def format_messages_for_deepsider(messages: List[ChatMessage]) -> str:
217
 
218
  return prompt.strip()
219
 
220
- async def generate_openai_response(full_response: str, request_id: str, model: str) -> Dict:
221
  """生成符合OpenAI API响应格式的完整响应"""
222
  timestamp = int(time.time())
223
- return {
224
  "id": f"chatcmpl-{request_id}",
225
  "object": "chat.completion",
226
  "created": timestamp,
@@ -236,19 +237,26 @@ async def generate_openai_response(full_response: str, request_id: str, model: s
236
  }
237
  ],
238
  "usage": {
239
- "prompt_tokens": 0, # 无法准确计算
240
- "completion_tokens": 0, # 无法准确计算
241
- "total_tokens": 0 # 无法准确计算
242
  }
243
  }
 
 
 
 
 
 
244
 
245
  async def stream_openai_response(response, request_id: str, model: str, api_key, token_index):
246
  """流式返回OpenAI API格式的响应"""
247
  timestamp = int(time.time())
248
  full_response = ""
 
249
 
250
  try:
251
- # 修改1:使用iter_content替代iter_lines
252
  buffer = bytearray()
253
  for chunk in response.iter_content(chunk_size=None):
254
  if chunk:
@@ -259,14 +267,14 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
259
 
260
  for line in lines[:-1]:
261
  if line.startswith('data: '):
262
- # 修改2:增加异常捕获和日志
263
  try:
264
  data = json.loads(line[6:])
265
- # 修改3:增加调试日志
266
  logger.debug(f"Received data: {data}")
267
 
268
  if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
269
  content = data.get('data', {}).get('content', '')
 
 
270
  if content:
271
  full_response += content
272
  chunk = {
@@ -285,6 +293,26 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
285
  ]
286
  }
287
  yield f"data: {json.dumps(chunk)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
 
289
  elif data.get('code') == 203:
290
  # 生成完成信号
@@ -316,12 +344,6 @@ async def stream_openai_response(response, request_id: str, model: str, api_key,
316
  except Exception as e:
317
  logger.error(f"流式响应处理出错: {str(e)}")
318
 
319
- # 尝试使用下一个Token
320
- tokens = api_key.split(',')
321
- if len(tokens) > 1:
322
- logger.info(f"尝试使用下一个Token重试请求")
323
- # 目前我们不在这里实现自动重试,只记录错误
324
-
325
  # 返回错误信息
326
  error_chunk = {
327
  "id": f"chatcmpl-{request_id}",
@@ -366,7 +388,7 @@ async def list_models(api_key: str = Depends(verify_api_key)):
366
  @app.post("/v1/chat/completions")
367
  async def create_chat_completion(
368
  request: Request,
369
- api_key: str = Depends(verify_api_key) # 这里返回的是 DEEPSIDER_TOKEN
370
  ):
371
  """创建聊天完成API - 支持普通请求和流式请求"""
372
  # 解析请求体
@@ -390,8 +412,16 @@ async def create_chat_completion(
390
  "timezone": "Asia/Shanghai"
391
  }
392
 
393
- # 获取请求头(使用 DEEPSIDER_TOKEN)
394
- headers = get_headers(api_key) # api_key 现在是 DEEPSIDER_TOKEN
 
 
 
 
 
 
 
 
395
 
396
  try:
397
  response = requests.post(
@@ -432,6 +462,8 @@ async def create_chat_completion(
432
  else:
433
  # 收集完整响应
434
  full_response = ""
 
 
435
  for line in response.iter_lines():
436
  if not line:
437
  continue
@@ -442,14 +474,20 @@ async def create_chat_completion(
442
 
443
  if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
444
  content = data.get('data', {}).get('content', '')
 
 
445
  if content:
446
  full_response += content
 
 
 
 
447
 
448
  except json.JSONDecodeError:
449
  pass
450
 
451
  # 返回OpenAI格式的完整响应
452
- return await generate_openai_response(full_response, request_id, chat_request.model)
453
 
454
  except requests.Timeout as e:
455
  logger.error(f"请求超时: {str(e)}")
 
103
  role: str
104
  content: str
105
  name: Optional[str] = None
106
+ reasoning_content: Optional[str] = None # 添加思维链内容字段
107
 
108
  class ChatCompletionRequest(BaseModel):
109
  model: str
 
218
 
219
  return prompt.strip()
220
 
221
+ async def generate_openai_response(full_response: str, request_id: str, model: str, reasoning_content: str = None) -> Dict:
222
  """生成符合OpenAI API响应格式的完整响应"""
223
  timestamp = int(time.time())
224
+ response_data = {
225
  "id": f"chatcmpl-{request_id}",
226
  "object": "chat.completion",
227
  "created": timestamp,
 
237
  }
238
  ],
239
  "usage": {
240
+ "prompt_tokens": 0,
241
+ "completion_tokens": 0,
242
+ "total_tokens": 0
243
  }
244
  }
245
+
246
+ # 如果有思维链内容,添加到响应中
247
+ if reasoning_content:
248
+ response_data["choices"][0]["message"]["reasoning_content"] = reasoning_content
249
+
250
+ return response_data
251
 
252
  async def stream_openai_response(response, request_id: str, model: str, api_key, token_index):
253
  """流式返回OpenAI API格式的响应"""
254
  timestamp = int(time.time())
255
  full_response = ""
256
+ full_reasoning = "" # 添加思维链内容累积变量
257
 
258
  try:
259
+ # 使用iter_content替代iter_lines
260
  buffer = bytearray()
261
  for chunk in response.iter_content(chunk_size=None):
262
  if chunk:
 
267
 
268
  for line in lines[:-1]:
269
  if line.startswith('data: '):
 
270
  try:
271
  data = json.loads(line[6:])
 
272
  logger.debug(f"Received data: {data}")
273
 
274
  if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
275
  content = data.get('data', {}).get('content', '')
276
+ reasoning_content = data.get('data', {}).get('reasoning_content', '')
277
+
278
  if content:
279
  full_response += content
280
  chunk = {
 
293
  ]
294
  }
295
  yield f"data: {json.dumps(chunk)}\n\n"
296
+
297
+ # 处理思维链内容
298
+ if reasoning_content:
299
+ full_reasoning += reasoning_content
300
+ reasoning_chunk = {
301
+ "id": f"chatcmpl-{request_id}",
302
+ "object": "chat.completion.chunk",
303
+ "created": timestamp,
304
+ "model": model,
305
+ "choices": [
306
+ {
307
+ "index": 0,
308
+ "delta": {
309
+ "reasoning_content": reasoning_content
310
+ },
311
+ "finish_reason": None
312
+ }
313
+ ]
314
+ }
315
+ yield f"data: {json.dumps(reasoning_chunk)}\n\n"
316
 
317
  elif data.get('code') == 203:
318
  # 生成完成信号
 
344
  except Exception as e:
345
  logger.error(f"流式响应处理出错: {str(e)}")
346
 
 
 
 
 
 
 
347
  # 返回错误信息
348
  error_chunk = {
349
  "id": f"chatcmpl-{request_id}",
 
388
  @app.post("/v1/chat/completions")
389
  async def create_chat_completion(
390
  request: Request,
391
+ api_key: str = Depends(verify_api_key)
392
  ):
393
  """创建聊天完成API - 支持普通请求和流式请求"""
394
  # 解析请求体
 
412
  "timezone": "Asia/Shanghai"
413
  }
414
 
415
+ # 添加其他可选参数
416
+ if chat_request.temperature is not None:
417
+ payload["temperature"] = chat_request.temperature
418
+ if chat_request.top_p is not None:
419
+ payload["top_p"] = chat_request.top_p
420
+ if chat_request.max_tokens is not None:
421
+ payload["max_tokens"] = chat_request.max_tokens
422
+
423
+ # 获取请求头
424
+ headers = get_headers(api_key)
425
 
426
  try:
427
  response = requests.post(
 
462
  else:
463
  # 收集完整响应
464
  full_response = ""
465
+ full_reasoning = "" # 思维链内容累积变量
466
+
467
  for line in response.iter_lines():
468
  if not line:
469
  continue
 
474
 
475
  if data.get('code') == 202 and data.get('data', {}).get('type') == "chat":
476
  content = data.get('data', {}).get('content', '')
477
+ reasoning_content = data.get('data', {}).get('reasoning_content', '')
478
+
479
  if content:
480
  full_response += content
481
+
482
+ # 收集思维链内容
483
+ if reasoning_content:
484
+ full_reasoning += reasoning_content
485
 
486
  except json.JSONDecodeError:
487
  pass
488
 
489
  # 返回OpenAI格式的完整响应
490
+ return await generate_openai_response(full_response, request_id, chat_request.model, full_reasoning)
491
 
492
  except requests.Timeout as e:
493
  logger.error(f"请求超时: {str(e)}")