Spaces:
Sleeping
Sleeping
File size: 2,333 Bytes
9dd777e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
from typing import Any, Optional, List
import numpy as np
from rdkit import Chem, DataStructs
from rdkit.Chem import rdFingerprintGenerator
def get_fp(
smiles: str,
fp_generator: Optional[Any] = None,
return_np: bool = True,
) -> Optional[np.ndarray]:
"""
Get the Morgan fingerprint of a molecule from its SMILES representation.
Parameters:
smiles (str): The SMILES string of the molecule.
fp_generator (Any, optional): The fingerprint generator to use. If None, a default generator is used.
return_np (bool): Whether to return the fingerprint as a NumPy array. Defaults to True.
Returns:
Optional[np.ndarray]: The Morgan fingerprint of the molecule as a NumPy array, or None if the SMILES is invalid.
"""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
if fp_generator is None:
fp_generator = rdFingerprintGenerator.GetMorganGenerator(
radius=16,
fpSize=1024,
useBondTypes=True,
includeChirality=True,
)
if return_np:
return fp_generator.GetFingerprintAsNumPy(mol)
else:
return fp_generator.GetFingerprint(mol)
def average_tanimoto_distance(
smiles: str,
fingerprints: List[DataStructs.ExplicitBitVect],
morgan_fp_generator: Optional[Any] = None,
) -> float:
"""
Compute the average Tanimoto distance between a query SMILES and a list of RDKit fingerprints.
Parameters:
smiles (str): SMILES string of the query molecule.
fingerprints (list): List of RDKit fingerprint objects (e.g., ExplicitBitVect).
morgan_fp_generator: RDKit Morgan fingerprint generator.
Returns:
float: Average Tanimoto distance (1 - similarity) between the query and the fingerprints.
"""
query_fp = get_fp(smiles, morgan_fp_generator, return_np=False)
if query_fp is None:
raise ValueError(f"Invalid SMILES string: {smiles}")
distances = DataStructs.BulkTanimotoSimilarity(query_fp, fingerprints, returnDistance=True)
return np.array(distances).mean()
def numpy_to_rdkit_fp(arr: np.ndarray) -> DataStructs.ExplicitBitVect:
"""
Convert a NumPy array to an RDKit ExplicitBitVect.
"""
return DataStructs.CreateFromBitString(''.join(arr.astype(str))) |