File size: 2,855 Bytes
a887ffc
 
 
 
 
80b6a2c
a887ffc
 
 
 
 
 
 
 
 
80b6a2c
 
bc0d37c
29899b4
 
a887ffc
 
 
 
 
 
 
 
 
 
80b6a2c
a887ffc
 
 
 
 
 
 
 
80b6a2c
a887ffc
 
 
 
 
 
80b6a2c
a887ffc
 
 
 
 
 
 
 
 
 
80b6a2c
 
 
 
 
 
29899b4
bc0d37c
80b6a2c
 
 
 
 
 
bc0d37c
29899b4
bc0d37c
 
29899b4
 
bc0d37c
 
29899b4
80b6a2c
a887ffc
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import rootutils
import hydra
from omegaconf import DictConfig
import logging

root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
logger = logging.getLogger(__name__)

# import your processing entry points here
from dpacman.data_tasks.download.genome import main as download_genome_main
from dpacman.data_tasks.download.remap import main as download_remap_main
from dpacman.data_tasks.clean.remap import main as clean_remap_main
from dpacman.data_tasks.fimo.pre_fimo import main as pre_fimo_main
from dpacman.data_tasks.fimo.run_fimo import main as run_fimo_main
from dpacman.data_tasks.fimo.post_fimo import main as post_fimo_main
from dpacman.data_tasks.cluster.remap import main as cluster_remap_main
from dpacman.data_tasks.split.remap import main as split_remap_main
from dpacman.data_tasks.embeddings.dna import main as embed_dna_main
from dpacman.data_tasks.embeddings.protein import main as embed_protein_main


@hydra.main(
    config_path=str(root / "configs"), config_name="preprocess", version_base="1.3"
)
def main(cfg: DictConfig):
    task_type = cfg.data_task.type
    task_name = cfg.data_task.name.lower()

    logger.info(f"Running {task_type} task: {task_name}")

    # Download
    if task_type == "download":
        if task_name == "genome":
            download_genome_main(cfg)
        elif task_name == "remap":
            download_remap_main(cfg)
        else:
            raise ValueError(f"No download pipeline defined for: {task_name}")

    # Clean
    elif task_type == "clean":
        if task_name == "remap":
            clean_remap_main(cfg)
        else:
            raise ValueError(f"No clean pipeline defined for: {task_name}")

    # Fimo
    elif task_type == "fimo":
        if task_name == "pre_fimo":
            pre_fimo_main(cfg)
        elif task_name == "run_fimo":
            run_fimo_main(cfg)
        elif task_name == "post_fimo":
            post_fimo_main(cfg)
        else:
            raise ValueError(f"No clean pipeline defined for: {task_name}")

    # Cluster
    elif task_type == "cluster":
        if task_name == "remap":
            cluster_remap_main(cfg)
        else:
            raise ValueError(f"No clean pipeline defined for: {task_name}")

    # Split
    elif task_type == "split":
        if task_name == "remap":
            split_remap_main(cfg)
        else:
            raise ValueError(f"No clean pipeline defined for: {task_name}")

    # Embed
    elif task_type == "embeddings":
        if task_name == "dna":
            embed_dna_main(cfg)
        elif task_name == "protein":
            embed_protein_main(cfg)
        else:
            raise ValueError(f"No clean pipeline defined for: {task_name}")

    # Unknown - error
    else:
        raise ValueError(f"Unknown task type: {task_type}")


if __name__ == "__main__":
    main()