Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -361,10 +361,56 @@ def two_stage_qa(question, candidate_paragraphs_str, max_seq_len_mc=512, max_seq
|
|
361 |
logger.error(f"從 qa_features_dataset 選擇列時出錯: {e}. Features: {qa_features_dataset.features}")
|
362 |
return f"錯誤: 準備模型輸入時出錯 (列選擇)。 Error: {e}", "N/A", "N/A"
|
363 |
|
364 |
-
logger.info(
|
365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
366 |
qa_dataloader = DataLoader(
|
367 |
-
|
|
|
|
|
368 |
)
|
369 |
|
370 |
all_start_logits = []
|
|
|
361 |
logger.error(f"從 qa_features_dataset 選擇列時出錯: {e}. Features: {qa_features_dataset.features}")
|
362 |
return f"錯誤: 準備模型輸入時出錯 (列選擇)。 Error: {e}", "N/A", "N/A"
|
363 |
|
364 |
+
logger.info("--- 手動檢查 features_for_dataloader 以模擬 default_data_collator ---")
|
365 |
+
if len(features_for_dataloader) > 0:
|
366 |
+
# default_data_collator 會接收一個 features 列表,這裡我們模擬只有一個 feature 的情況
|
367 |
+
# 因為對於第一個 test_item,qa_features_dataset (以及 features_for_dataloader) 只有一行
|
368 |
+
|
369 |
+
# features_list_for_collator 將是 [features_for_dataloader[0]]
|
370 |
+
# 如果 qa_batch_size > 1 且 features_for_dataloader 行數也 > 1,這裡會更複雜
|
371 |
+
# 但錯誤發生在第一個批次,所以檢查第一個特徵就夠了。
|
372 |
+
|
373 |
+
single_feature_to_collate = features_for_dataloader[0]
|
374 |
+
keys_to_tensorize_by_collator = ["input_ids", "attention_mask", "token_type_ids"]
|
375 |
+
|
376 |
+
for k_collate in keys_to_tensorize_by_collator:
|
377 |
+
if k_collate in single_feature_to_collate:
|
378 |
+
value_to_tensorize = single_feature_to_collate[k_collate]
|
379 |
+
logger.info(f" 準備轉換鍵 '{k_collate}' 的值: {str(value_to_tensorize)[:100]}...") # 打印部分值
|
380 |
+
if value_to_tensorize is None:
|
381 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate} 的值是 None!")
|
382 |
+
return f"錯誤: 預整理時發現 {k_collate} 為 None", "N/A", "N/A"
|
383 |
+
if not isinstance(value_to_tensorize, list):
|
384 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate} 的值不是列表,類型為 {type(value_to_tensorize)}!")
|
385 |
+
return f"錯誤: 預整理時發現 {k_collate} 不是列表", "N/A", "N/A"
|
386 |
+
if not value_to_tensorize: # 空列表
|
387 |
+
logger.warning(f" Pre-Collate: {k_collate} 的值是空列表。")
|
388 |
+
|
389 |
+
problem_found_in_list = False
|
390 |
+
for elem_idx, elem_val in enumerate(value_to_tensorize):
|
391 |
+
if elem_val is None:
|
392 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate}[{elem_idx}] 是 None!")
|
393 |
+
problem_found_in_list = True
|
394 |
+
break
|
395 |
+
if not isinstance(elem_val, int):
|
396 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate}[{elem_idx}] 不是整數,值: {elem_val}, 類型: {type(elem_val)}!")
|
397 |
+
problem_found_in_list = True
|
398 |
+
break
|
399 |
+
if problem_found_in_list:
|
400 |
+
return f"錯誤: 預整理時在 {k_collate} 內部發現問題", "N/A", "N/A"
|
401 |
+
|
402 |
+
logger.info(f" 鍵 '{k_collate}' 的預整理檢查通過。")
|
403 |
+
else:
|
404 |
+
logger.warning(f" 鍵 '{k_collate}' 不在 features_for_dataloader[0] 中。")
|
405 |
+
else:
|
406 |
+
logger.error("features_for_dataloader 為空,無法進行手動檢查。")
|
407 |
+
return "錯誤: features_for_dataloader 為空", "N/A", "N/A"
|
408 |
+
|
409 |
+
|
410 |
qa_dataloader = DataLoader(
|
411 |
+
features_for_dataloader,
|
412 |
+
collate_fn=default_data_collator,
|
413 |
+
batch_size=8 # 或者 args.qa_batch_size
|
414 |
)
|
415 |
|
416 |
all_start_logits = []
|