akadhim commited on
Commit
c3ec480
·
verified ·
1 Parent(s): f0f8dd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -29
app.py CHANGED
@@ -7,27 +7,27 @@ from huggingface_hub import hf_hub_download
7
  from sentence_transformers import SentenceTransformer
8
  import os
9
 
10
- HF_TOKEN = os.environ.get(\"HF_TOKEN\")
11
  if not HF_TOKEN:
12
- raise ValueError(\"Missing Hugging Face API token. Please set HF_TOKEN as an environment variable.\")
13
 
14
- EMBEDDING_MODEL = \"nomic-ai/nomic-embed-text-v1.5\"
15
  embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
16
 
17
- db_filename = \"hpo_genes.db\"
18
- db_repo = \"UoS-HGIG/hpo_genes\"
19
  db_path = os.path.join(os.getcwd(), db_filename)
20
 
21
  if not os.path.exists(db_path):
22
- db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type=\"dataset\", use_auth_token=HF_TOKEN)
23
 
24
  def find_best_hpo_match(finding, region, threshold):
25
- query = f\"{finding} {region}\" if region else finding
26
  query_embedding = embedder.encode(query)
27
 
28
  conn = sqlite3.connect(db_path)
29
  cursor = conn.cursor()
30
- cursor.execute(\"SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings\")
31
 
32
  best_match, best_score = None, -1
33
 
@@ -37,7 +37,7 @@ def find_best_hpo_match(finding, region, threshold):
37
 
38
  if similarity > best_score:
39
  best_score = similarity
40
- best_match = {\"hpo_id\": hpo_id, \"hpo_name\": hpo_name}
41
 
42
  conn.close()
43
 
@@ -46,7 +46,7 @@ def find_best_hpo_match(finding, region, threshold):
46
  def get_genes_for_hpo(hpo_id):
47
  conn = sqlite3.connect(db_path)
48
  cursor = conn.cursor()
49
- cursor.execute(\"SELECT genes FROM hpo_gene WHERE hpo_id = ?\", (hpo_id,))
50
  result = cursor.fetchone()
51
  conn.close()
52
 
@@ -54,39 +54,40 @@ def get_genes_for_hpo(hpo_id):
54
 
55
  def hpo_mapper_ui(finding, region, threshold):
56
  if not finding:
57
- return \"Please enter a clinical pathological finding.\", \"\", \"\"
58
 
59
  match = find_best_hpo_match(finding, region, threshold)
60
  if match:
61
  genes = get_genes_for_hpo(match['hpo_id'])
62
- return match['hpo_id'], match['hpo_name'], \", \".join(genes)
63
  else:
64
- return \"\", \"No match found.\", \"\"
65
 
66
- image_path = \"https://huggingface.co/UoS-HGIG/MIMIC/resolve/main/images/hpo.png\"
67
 
68
  demo = gr.Interface(
69
  fn=hpo_mapper_ui,
70
  inputs=[
71
- gr.Textbox(label=\"Clinical Pathological Finding\"),
72
- gr.Textbox(label=\"Anatomical Region (optional)\"),
73
- gr.Slider(0.5, 1.0, 0.01, value=0.74, label=\"Similarity Threshold\")
74
  ],
75
  outputs=[
76
- gr.Textbox(label=\"HPO ID\"),
77
- gr.Textbox(label=\"HPO Term\"),
78
- gr.Textbox(label=\"Associated Genes\")
79
  ],
80
- title=\"Human Phenotype Ontology (HPO) Mapper\",
81
  description=(
82
- f\"![]({image_path})\\n\\n\"
83
- \"## HPO to gene mappings obtained from [https://hpo.jax.org/data/annotations](https://hpo.jax.org/data/annotations)\\n\"
84
- \"Enter a clinical pathological finding and optionally a region to map it to the closest Human Phenotype Ontology (HPO) term and retrieve associated genes.\\n\\n\"
85
- \"**Reference:**\\n\"
86
- \"Application of Generative Artificial Intelligence to Utilise Unstructured Clinical Data for Acceleration of Inflammatory Bowel Disease Research\\n\"
87
- \"Alex Z Kadhim et al., [doi:10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)\"
 
88
  )
89
  )
90
 
91
- if __name__ == \"__main__\":
92
- demo.launch()
 
7
  from sentence_transformers import SentenceTransformer
8
  import os
9
 
10
+ HF_TOKEN = os.environ.get("HF_TOKEN")
11
  if not HF_TOKEN:
12
+ raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable.")
13
 
14
+ EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
15
  embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
16
 
17
+ db_filename = "hpo_genes.db"
18
+ db_repo = "UoS-HGIG/hpo_genes"
19
  db_path = os.path.join(os.getcwd(), db_filename)
20
 
21
  if not os.path.exists(db_path):
22
+ db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
23
 
24
  def find_best_hpo_match(finding, region, threshold):
25
+ query = f"{finding} {region}" if region else finding
26
  query_embedding = embedder.encode(query)
27
 
28
  conn = sqlite3.connect(db_path)
29
  cursor = conn.cursor()
30
+ cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")
31
 
32
  best_match, best_score = None, -1
33
 
 
37
 
38
  if similarity > best_score:
39
  best_score = similarity
40
+ best_match = {"hpo_id": hpo_id, "hpo_name": hpo_name}
41
 
42
  conn.close()
43
 
 
46
  def get_genes_for_hpo(hpo_id):
47
  conn = sqlite3.connect(db_path)
48
  cursor = conn.cursor()
49
+ cursor.execute("SELECT genes FROM hpo_gene WHERE hpo_id = ?", (hpo_id,))
50
  result = cursor.fetchone()
51
  conn.close()
52
 
 
54
 
55
  def hpo_mapper_ui(finding, region, threshold):
56
  if not finding:
57
+ return "Please enter a clinical pathological finding.", "", ""
58
 
59
  match = find_best_hpo_match(finding, region, threshold)
60
  if match:
61
  genes = get_genes_for_hpo(match['hpo_id'])
62
+ return match['hpo_id'], match['hpo_name'], ", ".join(genes)
63
  else:
64
+ return "", "No match found.", ""
65
 
66
+ image_path = "https://huggingface.co/UoS-HGIG/MIMIC/resolve/main/images/hpo.png"
67
 
68
  demo = gr.Interface(
69
  fn=hpo_mapper_ui,
70
  inputs=[
71
+ gr.Textbox(label="Clinical Pathological Finding"),
72
+ gr.Textbox(label="Anatomical Region (optional)"),
73
+ gr.Slider(0.5, 1.0, 0.01, value=0.74, label="Similarity Threshold")
74
  ],
75
  outputs=[
76
+ gr.Textbox(label="HPO ID"),
77
+ gr.Textbox(label="HPO Term"),
78
+ gr.Textbox(label="Associated Genes")
79
  ],
80
+ title="Human Phenotype Ontology (HPO) Mapper",
81
  description=(
82
+ f"![Workflow](https://huggingface.co/UoS-HGIG/MIMIC/blob/main/images/hpo.png)\n\n"
83
+ "## HPO to gene mappings obtained from [jax](https://hpo.jax.org/data/annotations)\n"
84
+ "Enter a clinical pathological finding and optionally a region to map it to the closest Human Phenotype Ontology (HPO) term and retrieve associated genes.\n\n"
85
+ "**Reference:**\n"
86
+ "Application of Generative Artificial Intelligence to Utilise Unstructured Clinical Data for Acceleration of Inflammatory Bowel Disease Research\n"
87
+ "Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher M Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n"
88
+ "medRxiv 2025.03.07.25323569; doi: [https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)"
89
  )
90
  )
91
 
92
+ if __name__ == "__main__":
93
+ demo.launch()