darpanaswal commited on
Commit
101821f
·
verified ·
1 Parent(s): f7855d2

Update cross_encoder_reranking_train.py

Browse files
Files changed (1) hide show
  1. cross_encoder_reranking_train.py +3 -3
cross_encoder_reranking_train.py CHANGED
@@ -32,7 +32,7 @@ def rank_by_centrality(texts):
32
  ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
33
  return [text for text, _ in ranked]
34
 
35
- def cluster_and_rank(texts, threshold=0.6):
36
  if len(texts) < 2:
37
  return texts
38
 
@@ -145,8 +145,8 @@ def extract_text(content_dict, text_type="full"):
145
  filtered_dict = process_single_patent(content_dict)
146
  all_text = []
147
  # Start with abstract for better context at the beginning
148
- if "pa01" in content_dict:
149
- all_text.append(content_dict["pa01"])
150
 
151
  # For claims, paragraphs and features, we take only the top-10 most relevant
152
  # Add claims
 
32
  ranked = sorted(zip(texts, centrality_scores), key=lambda x: x[1], reverse=True)
33
  return [text for text, _ in ranked]
34
 
35
+ def cluster_and_rank(texts, threshold=0.75):
36
  if len(texts) < 2:
37
  return texts
38
 
 
145
  filtered_dict = process_single_patent(content_dict)
146
  all_text = []
147
  # Start with abstract for better context at the beginning
148
+ # if "pa01" in content_dict:
149
+ # all_text.append(content_dict["pa01"])
150
 
151
  # For claims, paragraphs and features, we take only the top-10 most relevant
152
  # Add claims