Spaces:

ailab-bio
/

PROTAC-Splitter-App

Sleeping

File size: 5,137 Bytes

9dd777e

import logging
import random
from typing import List, Tuple, Callable, Any, Union, Dict, Optional, Literal
from functools import lru_cache

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdchem
from rdkit import RDLogger
from rdkit.Chem import CanonSmiles

from .chemoinformatics import (
    canonize,
    smiles2mol,
)

RDLogger.DisableLog("rdApp.*")


@lru_cache(maxsize=None)
def get_mol(smiles: str) -> rdchem.Mol:
    return Chem.MolFromSmiles(smiles)


def find_atom_idx_of_map_atoms(
        mol: rdchem.Mol,
        find_poi: True,
        find_e3: True,
        poi_attachment_id: int = 1,
        e3_attachment_id: int = 2,
) -> Union[int, Tuple[int, int]]:
    """ Find the indices of the attachment points in the given molecule.

    Args:
        mol (rdkit.Chem.rdchem.Mol): The molecule.
        find_poi (bool): Whether to find the POI attachment point.
        find_e3 (bool): Whether to find the E3 attachment point.
        poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]".
        e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]".

    Returns:
        int | Tuple[int, int]: The index of the attachment point for the POI ligand if find_poi is True, the index of the attachment point for the E3 binder if find_e3 is True, or a tuple containing POI and E3 indices (in this order) if both find_poi and find_e3 are True.
    """
    if find_poi and find_e3:
        poi_idx = None
        e3_idx = None
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() == poi_attachment_id:
                poi_idx = atom.GetIdx()
            elif atom.GetAtomMapNum() == e3_attachment_id:
                e3_idx = atom.GetIdx()
            if poi_idx is not None and e3_idx is not None:
                break
        return poi_idx, e3_idx
    elif find_poi:
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() == poi_attachment_id:
                return atom.GetIdx()
    elif find_e3:
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() == e3_attachment_id:
                return atom.GetIdx()


def reassemble_protac(
        ligands_smiles: Optional[str] = None,
        poi_smiles: Optional[str] = None,
        linker_smiles: Optional[str] = None,
        e3_smiles: Optional[str] = None,
        e3_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single',
        poi_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single',
        poi_attachment_id: int = 1,
        e3_attachment_id: int = 2,
        rand_generator = None,
) -> Tuple[str, Chem.rdchem.Mol]:
    """ Reassemble a PROTAC molecule from its substructures. The SMILES must contain attachment points.
    
    In case the bond type cannot be formed an error will be raised.

    Example of usage:

    ```python
    e3_smiles = '[*:2]NC(C(=O)N1CC(O)CC1C(=O)NCc1ccc(-c2scnc2C)cc1)C(C)(C)C'
    linker_smiles = '[*:2]C(=O)CCCCCCCCCC[*:1]'
    poi_smiles = '[*:1]CN1CCN(c2ccc(Nc3ncc4c(C)cc(=O)n(-c5cccc(NC(=O)C=C)c5)c4n3)c(OC)c2)CC1'

    merged_smiles, _ = reassemble_protac(poi_smiles, linker_smiles, e3_smiles, 'single', 'single')
    print(merged_smiles)
    ```

    Args:
        poi_smiles (str): The SMILES notation for the POI ligand.
        linker_smiles (str): The SMILES notation for the linker.
        e3_smiles (str): The SMILES notation for the E3 binder.
        e3_bond_type (str): The type of bond to be added between the E3 binder and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'.
        poi_bond_type (str): The type of bond to be added between the POI ligand and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'.
        poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]".
        e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]".
        rand_generator: A random number generator for 'rand_uniform' bond types. Defaults to None, i.e., standard library random.
    
    Returns:
        Tuple[str, Chem.rdchem.Mol]: The SMILES notation and RDKit molecule object for the reassembled PROTAC molecule.
    """
    if ligands_smiles is None:
        if None in [poi_smiles, linker_smiles, e3_smiles]:
            raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles")
        ligands_smiles = f'{e3_smiles}.{linker_smiles}.{poi_smiles}'
    if None in [poi_smiles, linker_smiles, e3_smiles]:
        if ligands_smiles is None:
            raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles")
    
    ligands_mol = canonize(smiles2mol(ligands_smiles))
    if ligands_mol is None:
        return None, None
    
    try:
        protac_mol = Chem.molzip(ligands_mol)
    except ValueError as e:
        logging.error(f"Failed to reassemble PROTAC: {e}")
        return None, None