Spaces:
Running
Running
roni
commited on
Commit
·
743e6bd
1
Parent(s):
626d43b
getting the missing gene names from uniprot
Browse files- app.py +1 -1
- protein_viz.py +24 -8
app.py
CHANGED
@@ -49,7 +49,7 @@ def format_search_result(raw_result):
|
|
49 |
value = raw_result["score"]
|
50 |
gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain)
|
51 |
key = f"PDB: {prot}.{chain}"
|
52 |
-
if gene_name
|
53 |
key += f" | Gene: {gene_name} | Organism: {species}"
|
54 |
return key, value
|
55 |
|
|
|
49 |
value = raw_result["score"]
|
50 |
gene_name, species = get_gene_name(pdb_id=prot, chain_id=chain)
|
51 |
key = f"PDB: {prot}.{chain}"
|
52 |
+
if gene_name is not None:
|
53 |
key += f" | Gene: {gene_name} | Organism: {species}"
|
54 |
return key, value
|
55 |
|
protein_viz.py
CHANGED
@@ -55,19 +55,35 @@ def get_polymer_entity_id(chain_id, pdb_id):
|
|
55 |
|
56 |
|
57 |
def get_gene_name_from_polymer_entity(pdb_id, entity_id):
|
|
|
58 |
if entity_id:
|
59 |
url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
|
60 |
response = requests.get(url, timeout=1)
|
61 |
if response.ok:
|
62 |
res_data = response.json()
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
def get_protein_name(pdb_id: str):
|
|
|
55 |
|
56 |
|
57 |
def get_gene_name_from_polymer_entity(pdb_id, entity_id):
|
58 |
+
gene_name, species = None, None
|
59 |
if entity_id:
|
60 |
url = f"https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/{entity_id}"
|
61 |
response = requests.get(url, timeout=1)
|
62 |
if response.ok:
|
63 |
res_data = response.json()
|
64 |
+
uniprot_id = _extract_uniprot_id(res_data)
|
65 |
+
source_organism = res_data.get("rcsb_entity_source_organism", [{}])[0]
|
66 |
+
gene_name = source_organism.get("rcsb_gene_name", [{}])[0].get("value")
|
67 |
+
species = source_organism.get("scientific_name")
|
68 |
+
if gene_name is None and uniprot_id is not None:
|
69 |
+
gene_name = get_gene_name_from_uniprot(uniprot_id)
|
70 |
+
return gene_name, species
|
71 |
+
|
72 |
+
|
73 |
+
def get_gene_name_from_uniprot(uniprot_id):
|
74 |
+
gene_name = None
|
75 |
+
url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}"
|
76 |
+
response = requests.get(url, timeout=1.0)
|
77 |
+
if response.ok:
|
78 |
+
uniprot_data = response.json()
|
79 |
+
gene_name = uniprot_data.get("genes", [{}])[0].get("geneName", {}).get("value")
|
80 |
+
return gene_name
|
81 |
+
|
82 |
+
|
83 |
+
def _extract_uniprot_id(res_data):
|
84 |
+
ids = res_data.get("rcsb_polymer_entity_container_identifiers", {})
|
85 |
+
uniprot_id = ids.get("uniprot_ids", [None])[0]
|
86 |
+
return uniprot_id
|
87 |
|
88 |
|
89 |
def get_protein_name(pdb_id: str):
|