Spaces:
Configuration error
Configuration error
Update cross_encoder_reranking_train.py
Browse files
cross_encoder_reranking_train.py
CHANGED
@@ -145,19 +145,19 @@ def extract_text(content_dict, text_type="full"):
|
|
145 |
filtered_dict = process_single_patent(content_dict)
|
146 |
all_text = []
|
147 |
# Start with abstract for better context at the beginning
|
148 |
-
if "pa01" in content_dict:
|
149 |
-
|
150 |
|
151 |
# For claims, paragraphs and features, we take only the top-10 most relevant
|
152 |
# Add claims
|
153 |
for claim in filtered_dict["claims"][:10]:
|
154 |
all_text.append(claim)
|
155 |
-
# Add paragraphs
|
156 |
-
for paragraph in filtered_dict["paragraphs"][:10]:
|
157 |
-
all_text.append(paragraph)
|
158 |
# Add features
|
159 |
for feature in filtered_dict["features"][:10]:
|
160 |
all_text.append(feature)
|
|
|
|
|
|
|
161 |
|
162 |
return " ".join(all_text)
|
163 |
|
|
|
145 |
filtered_dict = process_single_patent(content_dict)
|
146 |
all_text = []
|
147 |
# Start with abstract for better context at the beginning
|
148 |
+
# if "pa01" in content_dict:
|
149 |
+
# all_text.append(content_dict["pa01"])
|
150 |
|
151 |
# For claims, paragraphs and features, we take only the top-10 most relevant
|
152 |
# Add claims
|
153 |
for claim in filtered_dict["claims"][:10]:
|
154 |
all_text.append(claim)
|
|
|
|
|
|
|
155 |
# Add features
|
156 |
for feature in filtered_dict["features"][:10]:
|
157 |
all_text.append(feature)
|
158 |
+
# Add paragraphs
|
159 |
+
for paragraph in filtered_dict["paragraphs"][:10]:
|
160 |
+
all_text.append(paragraph)
|
161 |
|
162 |
return " ".join(all_text)
|
163 |
|