roni commited on
Commit
743e6bd
·
1 Parent(s): 626d43b

getting the missing gene names from uniprot

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. protein_viz.py +24 -8
app.py CHANGED
@@ -49,7 +49,7 @@ def format_search_result(raw_result):
49
  value = raw_result["score"]
50
  gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain)
51
  key = f"PDB: {prot}.{chain}"
52
- if gene_name != "Unknown":
53
  key += f" | Gene: {gene_name} | Organism: {species}"
54
  return key, value
55
 
 
49
  value = raw_result["score"]
50
  gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain)
51
  key = f"PDB: {prot}.{chain}"
52
+ if gene_name is not None:
53
  key += f" | Gene: {gene_name} | Organism: {species}"
54
  return key, value
55
 
protein_viz.py CHANGED
@@ -55,19 +55,35 @@ def get_polymer_entity_id(chain_id, pdb_id):
55
 
56
 
57
  def get_gene_name_from_polymer_entity(pdb_id, entity_id):
 
58
  if entity_id:
59
  url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
60
  response = requests.get(url, timeout=1)
61
  if response.ok:
62
  res_data = response.json()
63
- try:
64
- source_organism = res_data["rcsb_entity_source_organism"][0]
65
- gene_name = source_organism["rcsb_gene_name"][0]["value"]
66
- species = source_organism["scientific_name"]
67
- return gene_name, species
68
- except KeyError:
69
- pass
70
- return "Unknown", "Unknown"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  def get_protein_name(pdb_id: str):
 
55
 
56
 
57
  def get_gene_name_from_polymer_entity(pdb_id, entity_id):
58
+ gene_name, species = None, None
59
  if entity_id:
60
  url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
61
  response = requests.get(url, timeout=1)
62
  if response.ok:
63
  res_data = response.json()
64
+ uniprot_id = _extract_uniprot_id(res_data)
65
+ source_organism = res_data.get("rcsb_entity_source_organism", [{}])[0]
66
+ gene_name = source_organism.get("rcsb_gene_name", [{}])[0].get("value")
67
+ species = source_organism.get("scientific_name")
68
+ if gene_name is None and uniprot_id is not None:
69
+ gene_name = get_gene_name_from_uniprot(uniprot_id)
70
+ return gene_name, species
71
+
72
+
73
+ def get_gene_name_from_uniprot(uniprot_id):
74
+ gene_name = None
75
+ url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}"
76
+ response = requests.get(url, timeout=1.0)
77
+ if response.ok:
78
+ uniprot_data = response.json()
79
+ gene_name = uniprot_data.get("genes", [{}])[0].get("geneName", {}).get("value")
80
+ return gene_name
81
+
82
+
83
+ def _extract_uniprot_id(res_data):
84
+ ids = res_data.get("rcsb_polymer_entity_container_identifiers", {})
85
+ uniprot_id = ids.get("uniprot_ids", [None])[0]
86
+ return uniprot_id
87
 
88
 
89
  def get_protein_name(pdb_id: str):