Spaces:
Sleeping
Sleeping
from typing import Any, Optional, List | |
import numpy as np | |
from rdkit import Chem, DataStructs | |
from rdkit.Chem import rdFingerprintGenerator | |
def get_fp( | |
smiles: str, | |
fp_generator: Optional[Any] = None, | |
return_np: bool = True, | |
) -> Optional[np.ndarray]: | |
""" | |
Get the Morgan fingerprint of a molecule from its SMILES representation. | |
Parameters: | |
smiles (str): The SMILES string of the molecule. | |
fp_generator (Any, optional): The fingerprint generator to use. If None, a default generator is used. | |
return_np (bool): Whether to return the fingerprint as a NumPy array. Defaults to True. | |
Returns: | |
Optional[np.ndarray]: The Morgan fingerprint of the molecule as a NumPy array, or None if the SMILES is invalid. | |
""" | |
mol = Chem.MolFromSmiles(smiles) | |
if mol is None: | |
return None | |
if fp_generator is None: | |
fp_generator = rdFingerprintGenerator.GetMorganGenerator( | |
radius=16, | |
fpSize=1024, | |
useBondTypes=True, | |
includeChirality=True, | |
) | |
if return_np: | |
return fp_generator.GetFingerprintAsNumPy(mol) | |
else: | |
return fp_generator.GetFingerprint(mol) | |
def average_tanimoto_distance( | |
smiles: str, | |
fingerprints: List[DataStructs.ExplicitBitVect], | |
morgan_fp_generator: Optional[Any] = None, | |
) -> float: | |
""" | |
Compute the average Tanimoto distance between a query SMILES and a list of RDKit fingerprints. | |
Parameters: | |
smiles (str): SMILES string of the query molecule. | |
fingerprints (list): List of RDKit fingerprint objects (e.g., ExplicitBitVect). | |
morgan_fp_generator: RDKit Morgan fingerprint generator. | |
Returns: | |
float: Average Tanimoto distance (1 - similarity) between the query and the fingerprints. | |
""" | |
query_fp = get_fp(smiles, morgan_fp_generator, return_np=False) | |
if query_fp is None: | |
raise ValueError(f"Invalid SMILES string: {smiles}") | |
distances = DataStructs.BulkTanimotoSimilarity(query_fp, fingerprints, returnDistance=True) | |
return np.array(distances).mean() | |
def numpy_to_rdkit_fp(arr: np.ndarray) -> DataStructs.ExplicitBitVect: | |
""" | |
Convert a NumPy array to an RDKit ExplicitBitVect. | |
""" | |
return DataStructs.CreateFromBitString(''.join(arr.astype(str))) |