Spaces:

Shuu12121
/

CodeSearch-ModernBERT-Owl-Demo

Sleeping

App Files Files Community

Shuu12121 commited on Apr 16

Commit

00bd88d

verified ·

1 Parent(s): 6396265

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -17

app.py CHANGED Viewed

@@ -14,29 +14,32 @@ model.eval()
 dataset = load_dataset("code_x_glue_tc_nl_code_search_adv", trust_remote_code=True, split="test")
 # --- Query & Candidate Generator ---
-def get_random_query(seed: int = 42):
     random.seed(seed)
     idx = random.randint(0, len(dataset) - 1)
-    sample = dataset[idx]
-    return sample["code"], sample["docstring"]
 @GPU
 def code_search_demo(seed: int):
-    code_str, doc_str = get_random_query(seed)
-    query_emb = model.encode(doc_str, convert_to_tensor=True)
-    # ランダムに10件取得し、正解 index を含めるようにする（※現実には全件評価がおすすめ）
-    candidates = dataset.shuffle(seed=seed).select(range(10))
-    correct_label = dataset[seed]["label"]  # 正解 index（全体に対する）
-    correct_code = dataset[correct_label]["code"]
-    candidate_codes = [c["code"] for c in candidates]
-    candidate_embeddings = model.encode(candidate_codes, convert_to_tensor=True)
     cos_scores = util.cos_sim(query_emb, candidate_embeddings)[0]
-    results = sorted(zip(candidate_codes, cos_scores), key=lambda x: x[1], reverse=True)
-    # 正解コードが Top-K に含まれているかを確認
     top_k = 10
     correct_in_top_k = any(code.strip() == correct_code.strip() for code, _ in results[:top_k])
     mrr = 0.0
@@ -45,7 +48,6 @@ def code_search_demo(seed: int):
             mrr = 1.0 / rank
             break
-    # 出力構築
     output = f"### 🔍 Query Docstring\n\n{doc_str}\n\n"
     output += f"**✅ 正解は Top-{top_k} に含まれているか？**: {'🟢 Yes' if correct_in_top_k else '🔴 No'}\n\n"
     output += f"**📈 MRR@{top_k}**: {mrr:.4f}\n\n"
@@ -60,8 +62,6 @@ def code_search_demo(seed: int):
     return output
-    return output
 # --- Gradio UI ---
 demo = gr.Interface(
     fn=code_search_demo,

 dataset = load_dataset("code_x_glue_tc_nl_code_search_adv", trust_remote_code=True, split="test")
 # --- Query & Candidate Generator ---
+def get_query_and_candidates(seed: int = 42):
     random.seed(seed)
     idx = random.randint(0, len(dataset) - 1)
+    query = dataset[idx]
+    correct_code = query["code"]
+    doc_str = query["docstring"]
+    # 正例 + ランダム負例（正例を除く）
+    candidate_pool = [example for i, example in enumerate(dataset) if i != idx]
+    negatives = random.sample(candidate_pool, k=9)  # 9件の負例
+    candidates = [correct_code] + [neg["code"] for neg in negatives]
+    random.shuffle(candidates)
+    return doc_str, correct_code, candidates
 @GPU
 def code_search_demo(seed: int):
+    doc_str, correct_code, candidates = get_query_and_candidates(seed)
+    query_emb = model.encode(doc_str, convert_to_tensor=True)
+    candidate_embeddings = model.encode(candidates, convert_to_tensor=True)
     cos_scores = util.cos_sim(query_emb, candidate_embeddings)[0]
+    results = sorted(zip(candidates, cos_scores), key=lambda x: x[1], reverse=True)
     top_k = 10
     correct_in_top_k = any(code.strip() == correct_code.strip() for code, _ in results[:top_k])
     mrr = 0.0
             mrr = 1.0 / rank
             break
     output = f"### 🔍 Query Docstring\n\n{doc_str}\n\n"
     output += f"**✅ 正解は Top-{top_k} に含まれているか？**: {'🟢 Yes' if correct_in_top_k else '🔴 No'}\n\n"
     output += f"**📈 MRR@{top_k}**: {mrr:.4f}\n\n"
     return output
 # --- Gradio UI ---
 demo = gr.Interface(
     fn=code_search_demo,