darpanaswal commited on
Commit
80473ca
·
verified ·
1 Parent(s): 92e8f21

Update cross_encoder_reranking_train.py

Browse files
Files changed (1) hide show
  1. cross_encoder_reranking_train.py +5 -5
cross_encoder_reranking_train.py CHANGED
@@ -145,19 +145,19 @@ def extract_text(content_dict, text_type="full"):
145
  filtered_dict = process_single_patent(content_dict)
146
  all_text = []
147
  # Start with abstract for better context at the beginning
148
- if "pa01" in content_dict:
149
- all_text.append(content_dict["pa01"])
150
 
151
  # For claims, paragraphs and features, we take only the top-10 most relevant
152
  # Add claims
153
  for claim in filtered_dict["claims"][:10]:
154
  all_text.append(claim)
155
- # Add paragraphs
156
- for paragraph in filtered_dict["paragraphs"][:10]:
157
- all_text.append(paragraph)
158
  # Add features
159
  for feature in filtered_dict["features"][:10]:
160
  all_text.append(feature)
 
 
 
161
 
162
  return " ".join(all_text)
163
 
 
145
  filtered_dict = process_single_patent(content_dict)
146
  all_text = []
147
  # Start with abstract for better context at the beginning
148
+ # if "pa01" in content_dict:
149
+ # all_text.append(content_dict["pa01"])
150
 
151
  # For claims, paragraphs and features, we take only the top-10 most relevant
152
  # Add claims
153
  for claim in filtered_dict["claims"][:10]:
154
  all_text.append(claim)
 
 
 
155
  # Add features
156
  for feature in filtered_dict["features"][:10]:
157
  all_text.append(feature)
158
+ # Add paragraphs
159
+ for paragraph in filtered_dict["paragraphs"][:10]:
160
+ all_text.append(paragraph)
161
 
162
  return " ".join(all_text)
163