import logging import random from typing import List, Tuple, Callable, Any, Union, Dict, Optional, Literal from functools import lru_cache from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import rdchem from rdkit import RDLogger from rdkit.Chem import CanonSmiles from .chemoinformatics import ( canonize, smiles2mol, ) RDLogger.DisableLog("rdApp.*") @lru_cache(maxsize=None) def get_mol(smiles: str) -> rdchem.Mol: return Chem.MolFromSmiles(smiles) def find_atom_idx_of_map_atoms( mol: rdchem.Mol, find_poi: True, find_e3: True, poi_attachment_id: int = 1, e3_attachment_id: int = 2, ) -> Union[int, Tuple[int, int]]: """ Find the indices of the attachment points in the given molecule. Args: mol (rdkit.Chem.rdchem.Mol): The molecule. find_poi (bool): Whether to find the POI attachment point. find_e3 (bool): Whether to find the E3 attachment point. poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]". e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]". Returns: int | Tuple[int, int]: The index of the attachment point for the POI ligand if find_poi is True, the index of the attachment point for the E3 binder if find_e3 is True, or a tuple containing POI and E3 indices (in this order) if both find_poi and find_e3 are True. """ if find_poi and find_e3: poi_idx = None e3_idx = None for atom in mol.GetAtoms(): if atom.GetAtomMapNum() == poi_attachment_id: poi_idx = atom.GetIdx() elif atom.GetAtomMapNum() == e3_attachment_id: e3_idx = atom.GetIdx() if poi_idx is not None and e3_idx is not None: break return poi_idx, e3_idx elif find_poi: for atom in mol.GetAtoms(): if atom.GetAtomMapNum() == poi_attachment_id: return atom.GetIdx() elif find_e3: for atom in mol.GetAtoms(): if atom.GetAtomMapNum() == e3_attachment_id: return atom.GetIdx() def reassemble_protac( ligands_smiles: Optional[str] = None, poi_smiles: Optional[str] = None, linker_smiles: Optional[str] = None, e3_smiles: Optional[str] = None, e3_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single', poi_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single', poi_attachment_id: int = 1, e3_attachment_id: int = 2, rand_generator = None, ) -> Tuple[str, Chem.rdchem.Mol]: """ Reassemble a PROTAC molecule from its substructures. The SMILES must contain attachment points. In case the bond type cannot be formed an error will be raised. Example of usage: ```python e3_smiles = '[*:2]NC(C(=O)N1CC(O)CC1C(=O)NCc1ccc(-c2scnc2C)cc1)C(C)(C)C' linker_smiles = '[*:2]C(=O)CCCCCCCCCC[*:1]' poi_smiles = '[*:1]CN1CCN(c2ccc(Nc3ncc4c(C)cc(=O)n(-c5cccc(NC(=O)C=C)c5)c4n3)c(OC)c2)CC1' merged_smiles, _ = reassemble_protac(poi_smiles, linker_smiles, e3_smiles, 'single', 'single') print(merged_smiles) ``` Args: poi_smiles (str): The SMILES notation for the POI ligand. linker_smiles (str): The SMILES notation for the linker. e3_smiles (str): The SMILES notation for the E3 binder. e3_bond_type (str): The type of bond to be added between the E3 binder and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'. poi_bond_type (str): The type of bond to be added between the POI ligand and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'. poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]". e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]". rand_generator: A random number generator for 'rand_uniform' bond types. Defaults to None, i.e., standard library random. Returns: Tuple[str, Chem.rdchem.Mol]: The SMILES notation and RDKit molecule object for the reassembled PROTAC molecule. """ if ligands_smiles is None: if None in [poi_smiles, linker_smiles, e3_smiles]: raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles") ligands_smiles = f'{e3_smiles}.{linker_smiles}.{poi_smiles}' if None in [poi_smiles, linker_smiles, e3_smiles]: if ligands_smiles is None: raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles") ligands_mol = canonize(smiles2mol(ligands_smiles)) if ligands_mol is None: return None, None try: protac_mol = Chem.molzip(ligands_mol) except ValueError as e: logging.error(f"Failed to reassemble PROTAC: {e}") return None, None