Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,27 +7,27 @@ from huggingface_hub import hf_hub_download
|
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
import os
|
9 |
|
10 |
-
HF_TOKEN = os.environ.get(
|
11 |
if not HF_TOKEN:
|
12 |
-
raise ValueError(
|
13 |
|
14 |
-
EMBEDDING_MODEL =
|
15 |
embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
|
16 |
|
17 |
-
db_filename =
|
18 |
-
db_repo =
|
19 |
db_path = os.path.join(os.getcwd(), db_filename)
|
20 |
|
21 |
if not os.path.exists(db_path):
|
22 |
-
db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type
|
23 |
|
24 |
def find_best_hpo_match(finding, region, threshold):
|
25 |
-
query = f
|
26 |
query_embedding = embedder.encode(query)
|
27 |
|
28 |
conn = sqlite3.connect(db_path)
|
29 |
cursor = conn.cursor()
|
30 |
-
cursor.execute(
|
31 |
|
32 |
best_match, best_score = None, -1
|
33 |
|
@@ -37,7 +37,7 @@ def find_best_hpo_match(finding, region, threshold):
|
|
37 |
|
38 |
if similarity > best_score:
|
39 |
best_score = similarity
|
40 |
-
best_match = {
|
41 |
|
42 |
conn.close()
|
43 |
|
@@ -46,7 +46,7 @@ def find_best_hpo_match(finding, region, threshold):
|
|
46 |
def get_genes_for_hpo(hpo_id):
|
47 |
conn = sqlite3.connect(db_path)
|
48 |
cursor = conn.cursor()
|
49 |
-
cursor.execute(
|
50 |
result = cursor.fetchone()
|
51 |
conn.close()
|
52 |
|
@@ -54,39 +54,40 @@ def get_genes_for_hpo(hpo_id):
|
|
54 |
|
55 |
def hpo_mapper_ui(finding, region, threshold):
|
56 |
if not finding:
|
57 |
-
return
|
58 |
|
59 |
match = find_best_hpo_match(finding, region, threshold)
|
60 |
if match:
|
61 |
genes = get_genes_for_hpo(match['hpo_id'])
|
62 |
-
return match['hpo_id'], match['hpo_name'],
|
63 |
else:
|
64 |
-
return
|
65 |
|
66 |
-
image_path =
|
67 |
|
68 |
demo = gr.Interface(
|
69 |
fn=hpo_mapper_ui,
|
70 |
inputs=[
|
71 |
-
gr.Textbox(label
|
72 |
-
gr.Textbox(label
|
73 |
-
gr.Slider(0.5, 1.0, 0.01, value=0.74, label
|
74 |
],
|
75 |
outputs=[
|
76 |
-
gr.Textbox(label
|
77 |
-
gr.Textbox(label
|
78 |
-
gr.Textbox(label
|
79 |
],
|
80 |
-
title
|
81 |
description=(
|
82 |
-
f
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
88 |
)
|
89 |
)
|
90 |
|
91 |
-
if __name__ ==
|
92 |
-
demo.launch()
|
|
|
7 |
from sentence_transformers import SentenceTransformer
|
8 |
import os
|
9 |
|
10 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
11 |
if not HF_TOKEN:
|
12 |
+
raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable.")
|
13 |
|
14 |
+
EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
|
15 |
embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
|
16 |
|
17 |
+
db_filename = "hpo_genes.db"
|
18 |
+
db_repo = "UoS-HGIG/hpo_genes"
|
19 |
db_path = os.path.join(os.getcwd(), db_filename)
|
20 |
|
21 |
if not os.path.exists(db_path):
|
22 |
+
db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
|
23 |
|
24 |
def find_best_hpo_match(finding, region, threshold):
|
25 |
+
query = f"{finding} {region}" if region else finding
|
26 |
query_embedding = embedder.encode(query)
|
27 |
|
28 |
conn = sqlite3.connect(db_path)
|
29 |
cursor = conn.cursor()
|
30 |
+
cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")
|
31 |
|
32 |
best_match, best_score = None, -1
|
33 |
|
|
|
37 |
|
38 |
if similarity > best_score:
|
39 |
best_score = similarity
|
40 |
+
best_match = {"hpo_id": hpo_id, "hpo_name": hpo_name}
|
41 |
|
42 |
conn.close()
|
43 |
|
|
|
46 |
def get_genes_for_hpo(hpo_id):
|
47 |
conn = sqlite3.connect(db_path)
|
48 |
cursor = conn.cursor()
|
49 |
+
cursor.execute("SELECT genes FROM hpo_gene WHERE hpo_id = ?", (hpo_id,))
|
50 |
result = cursor.fetchone()
|
51 |
conn.close()
|
52 |
|
|
|
54 |
|
55 |
def hpo_mapper_ui(finding, region, threshold):
|
56 |
if not finding:
|
57 |
+
return "Please enter a clinical pathological finding.", "", ""
|
58 |
|
59 |
match = find_best_hpo_match(finding, region, threshold)
|
60 |
if match:
|
61 |
genes = get_genes_for_hpo(match['hpo_id'])
|
62 |
+
return match['hpo_id'], match['hpo_name'], ", ".join(genes)
|
63 |
else:
|
64 |
+
return "", "No match found.", ""
|
65 |
|
66 |
+
image_path = "https://huggingface.co/UoS-HGIG/MIMIC/resolve/main/images/hpo.png"
|
67 |
|
68 |
demo = gr.Interface(
|
69 |
fn=hpo_mapper_ui,
|
70 |
inputs=[
|
71 |
+
gr.Textbox(label="Clinical Pathological Finding"),
|
72 |
+
gr.Textbox(label="Anatomical Region (optional)"),
|
73 |
+
gr.Slider(0.5, 1.0, 0.01, value=0.74, label="Similarity Threshold")
|
74 |
],
|
75 |
outputs=[
|
76 |
+
gr.Textbox(label="HPO ID"),
|
77 |
+
gr.Textbox(label="HPO Term"),
|
78 |
+
gr.Textbox(label="Associated Genes")
|
79 |
],
|
80 |
+
title="Human Phenotype Ontology (HPO) Mapper",
|
81 |
description=(
|
82 |
+
f"\n\n"
|
83 |
+
"## HPO to gene mappings obtained from [jax](https://hpo.jax.org/data/annotations)\n"
|
84 |
+
"Enter a clinical pathological finding and optionally a region to map it to the closest Human Phenotype Ontology (HPO) term and retrieve associated genes.\n\n"
|
85 |
+
"**Reference:**\n"
|
86 |
+
"Application of Generative Artificial Intelligence to Utilise Unstructured Clinical Data for Acceleration of Inflammatory Bowel Disease Research\n"
|
87 |
+
"Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher M Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n"
|
88 |
+
"medRxiv 2025.03.07.25323569; doi: [https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)"
|
89 |
)
|
90 |
)
|
91 |
|
92 |
+
if __name__ == "__main__":
|
93 |
+
demo.launch()
|