Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -188,7 +188,31 @@ def prepare_features_for_qa_inference(examples, tokenizer, pad_on_right, max_seq
|
|
188 |
raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表為 None!")
|
189 |
if any(x is None for x in lst):
|
190 |
raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表內部包含 None!內容: {lst[:20]}")
|
191 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
return final_batch
|
193 |
|
194 |
# postprocess_qa_predictions 函數也需要從 utils_qa.py 複製或導入
|
|
|
188 |
raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表為 None!")
|
189 |
if any(x is None for x in lst):
|
190 |
raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表內部包含 None!內容: {lst[:20]}")
|
191 |
+
|
192 |
+
for key_to_check in ["input_ids", "attention_mask", "token_type_ids"]:
|
193 |
+
if key_to_check in final_batch:
|
194 |
+
new_list_of_lists = []
|
195 |
+
for single_feature_list in final_batch[key_to_check]:
|
196 |
+
if single_feature_list is None: # 如果整個特徵的這個字段是 None
|
197 |
+
# logger.error(f"Critical error: {key_to_check} list for a feature is None. Reconstructing a default.")
|
198 |
+
# 根據 key_to_check 類型創建一個安全的默認值
|
199 |
+
if key_to_check == "input_ids":
|
200 |
+
safe_list = [tokenizer.cls_token_id or 101, tokenizer.sep_token_id or 102] + \
|
201 |
+
[tokenizer.pad_token_id or 0] * (max_seq_len - 2)
|
202 |
+
new_list_of_lists.append(safe_list[:max_seq_len])
|
203 |
+
elif key_to_check == "attention_mask":
|
204 |
+
safe_list = [1,1] + [0] * (max_seq_len-2)
|
205 |
+
new_list_of_lists.append(safe_list[:max_seq_len])
|
206 |
+
elif key_to_check == "token_type_ids":
|
207 |
+
new_list_of_lists.append([0] * max_seq_len)
|
208 |
+
elif not all(isinstance(x, int) for x in single_feature_list): # 如果列表內包含非整數
|
209 |
+
# logger.error(f"Critical error: {key_to_check} list for a feature contains non-integers: {single_feature_list[:10]}. Fixing.")
|
210 |
+
default_val = tokenizer.pad_token_id if key_to_check == "input_ids" else 0
|
211 |
+
new_list_of_lists.append([default_val if not isinstance(x, int) else x for x in single_feature_list])
|
212 |
+
else:
|
213 |
+
new_list_of_lists.append(single_feature_list) # 原本就是好的
|
214 |
+
final_batch[key_to_check] = new_list_of_lists
|
215 |
+
|
216 |
return final_batch
|
217 |
|
218 |
# postprocess_qa_predictions 函數也需要從 utils_qa.py 複製或導入
|