TheWeeeed commited on
Commit
525e038
·
verified ·
1 Parent(s): 7c3e363

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -1
app.py CHANGED
@@ -188,7 +188,31 @@ def prepare_features_for_qa_inference(examples, tokenizer, pad_on_right, max_seq
188
  raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表為 None!")
189
  if any(x is None for x in lst):
190
  raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表內部包含 None!內容: {lst[:20]}")
191
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  return final_batch
193
 
194
  # postprocess_qa_predictions 函數也需要從 utils_qa.py 複製或導入
 
188
  raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表為 None!")
189
  if any(x is None for x in lst):
190
  raise ValueError(f"在 prepare_features_for_qa_inference 中,{key_to_check} 的第 {i} 個特徵列表內部包含 None!內容: {lst[:20]}")
191
+
192
+ for key_to_check in ["input_ids", "attention_mask", "token_type_ids"]:
193
+ if key_to_check in final_batch:
194
+ new_list_of_lists = []
195
+ for single_feature_list in final_batch[key_to_check]:
196
+ if single_feature_list is None: # 如果整個特徵的這個字段是 None
197
+ # logger.error(f"Critical error: {key_to_check} list for a feature is None. Reconstructing a default.")
198
+ # 根據 key_to_check 類型創建一個安全的默認值
199
+ if key_to_check == "input_ids":
200
+ safe_list = [tokenizer.cls_token_id or 101, tokenizer.sep_token_id or 102] + \
201
+ [tokenizer.pad_token_id or 0] * (max_seq_len - 2)
202
+ new_list_of_lists.append(safe_list[:max_seq_len])
203
+ elif key_to_check == "attention_mask":
204
+ safe_list = [1,1] + [0] * (max_seq_len-2)
205
+ new_list_of_lists.append(safe_list[:max_seq_len])
206
+ elif key_to_check == "token_type_ids":
207
+ new_list_of_lists.append([0] * max_seq_len)
208
+ elif not all(isinstance(x, int) for x in single_feature_list): # 如果列表內包含非整數
209
+ # logger.error(f"Critical error: {key_to_check} list for a feature contains non-integers: {single_feature_list[:10]}. Fixing.")
210
+ default_val = tokenizer.pad_token_id if key_to_check == "input_ids" else 0
211
+ new_list_of_lists.append([default_val if not isinstance(x, int) else x for x in single_feature_list])
212
+ else:
213
+ new_list_of_lists.append(single_feature_list) # 原本就是好的
214
+ final_batch[key_to_check] = new_list_of_lists
215
+
216
  return final_batch
217
 
218
  # postprocess_qa_predictions 函數也需要從 utils_qa.py 複製或導入