File size: 5,137 Bytes
9dd777e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import logging
import random
from typing import List, Tuple, Callable, Any, Union, Dict, Optional, Literal
from functools import lru_cache

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdchem
from rdkit import RDLogger
from rdkit.Chem import CanonSmiles

from .chemoinformatics import (
    canonize,
    smiles2mol,
)

RDLogger.DisableLog("rdApp.*")


@lru_cache(maxsize=None)
def get_mol(smiles: str) -> rdchem.Mol:
    return Chem.MolFromSmiles(smiles)


def find_atom_idx_of_map_atoms(
        mol: rdchem.Mol,
        find_poi: True,
        find_e3: True,
        poi_attachment_id: int = 1,
        e3_attachment_id: int = 2,
) -> Union[int, Tuple[int, int]]:
    """ Find the indices of the attachment points in the given molecule.

    Args:
        mol (rdkit.Chem.rdchem.Mol): The molecule.
        find_poi (bool): Whether to find the POI attachment point.
        find_e3 (bool): Whether to find the E3 attachment point.
        poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]".
        e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]".

    Returns:
        int | Tuple[int, int]: The index of the attachment point for the POI ligand if find_poi is True, the index of the attachment point for the E3 binder if find_e3 is True, or a tuple containing POI and E3 indices (in this order) if both find_poi and find_e3 are True.
    """
    if find_poi and find_e3:
        poi_idx = None
        e3_idx = None
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() == poi_attachment_id:
                poi_idx = atom.GetIdx()
            elif atom.GetAtomMapNum() == e3_attachment_id:
                e3_idx = atom.GetIdx()
            if poi_idx is not None and e3_idx is not None:
                break
        return poi_idx, e3_idx
    elif find_poi:
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() == poi_attachment_id:
                return atom.GetIdx()
    elif find_e3:
        for atom in mol.GetAtoms():
            if atom.GetAtomMapNum() == e3_attachment_id:
                return atom.GetIdx()


def reassemble_protac(
        ligands_smiles: Optional[str] = None,
        poi_smiles: Optional[str] = None,
        linker_smiles: Optional[str] = None,
        e3_smiles: Optional[str] = None,
        e3_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single',
        poi_bond_type: Literal['single', 'double', 'triple', 'rand_uniform'] = 'single',
        poi_attachment_id: int = 1,
        e3_attachment_id: int = 2,
        rand_generator = None,
) -> Tuple[str, Chem.rdchem.Mol]:
    """ Reassemble a PROTAC molecule from its substructures. The SMILES must contain attachment points.
    
    In case the bond type cannot be formed an error will be raised.

    Example of usage:

    ```python
    e3_smiles = '[*:2]NC(C(=O)N1CC(O)CC1C(=O)NCc1ccc(-c2scnc2C)cc1)C(C)(C)C'
    linker_smiles = '[*:2]C(=O)CCCCCCCCCC[*:1]'
    poi_smiles = '[*:1]CN1CCN(c2ccc(Nc3ncc4c(C)cc(=O)n(-c5cccc(NC(=O)C=C)c5)c4n3)c(OC)c2)CC1'

    merged_smiles, _ = reassemble_protac(poi_smiles, linker_smiles, e3_smiles, 'single', 'single')
    print(merged_smiles)
    ```

    Args:
        poi_smiles (str): The SMILES notation for the POI ligand.
        linker_smiles (str): The SMILES notation for the linker.
        e3_smiles (str): The SMILES notation for the E3 binder.
        e3_bond_type (str): The type of bond to be added between the E3 binder and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'.
        poi_bond_type (str): The type of bond to be added between the POI ligand and the linker. Can be 'single', 'double', 'triple', or 'rand_uniform'.
        poi_attachment_id (int): The label of the attachment point for the POI ligand, i.e., "[*:{poi_attachment_id}]".
        e3_attachment_id (int): The label of the attachment point for the E3 binder, i.e., "[*:{e3_attachment_id}]".
        rand_generator: A random number generator for 'rand_uniform' bond types. Defaults to None, i.e., standard library random.
    
    Returns:
        Tuple[str, Chem.rdchem.Mol]: The SMILES notation and RDKit molecule object for the reassembled PROTAC molecule.
    """
    if ligands_smiles is None:
        if None in [poi_smiles, linker_smiles, e3_smiles]:
            raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles")
        ligands_smiles = f'{e3_smiles}.{linker_smiles}.{poi_smiles}'
    if None in [poi_smiles, linker_smiles, e3_smiles]:
        if ligands_smiles is None:
            raise ValueError("Missing substructures SMILES: either provide ligands_smiles or all of poi_smiles, linker_smiles, and e3_smiles")
    
    ligands_mol = canonize(smiles2mol(ligands_smiles))
    if ligands_mol is None:
        return None, None
    
    try:
        protac_mol = Chem.molzip(ligands_mol)
    except ValueError as e:
        logging.error(f"Failed to reassemble PROTAC: {e}")
        return None, None