Spaces:
Configuration error
Configuration error
Update cross_encoder_reranking_train.py
Browse files
cross_encoder_reranking_train.py
CHANGED
@@ -32,7 +32,7 @@ def rank_by_centrality(texts):
|
|
32 |
ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
|
33 |
return [text for text, _ in ranked]
|
34 |
|
35 |
-
def cluster_and_rank(texts, threshold=0.
|
36 |
if len(texts) < 2:
|
37 |
return texts
|
38 |
|
@@ -145,8 +145,8 @@ def extract_text(content_dict, text_type="full"):
|
|
145 |
filtered_dict = process_single_patent(content_dict)
|
146 |
all_text = []
|
147 |
# Start with abstract for better context at the beginning
|
148 |
-
if "pa01" in content_dict:
|
149 |
-
|
150 |
|
151 |
# For claims, paragraphs and features, we take only the top-10 most relevant
|
152 |
# Add claims
|
|
|
32 |
ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
|
33 |
return [text for text, _ in ranked]
|
34 |
|
35 |
+
def cluster_and_rank(texts, threshold=0.75):
|
36 |
if len(texts) < 2:
|
37 |
return texts
|
38 |
|
|
|
145 |
filtered_dict = process_single_patent(content_dict)
|
146 |
all_text = []
|
147 |
# Start with abstract for better context at the beginning
|
148 |
+
# if "pa01" in content_dict:
|
149 |
+
# all_text.append(content_dict["pa01"])
|
150 |
|
151 |
# For claims, paragraphs and features, we take only the top-10 most relevant
|
152 |
# Add claims
|