File size: 2,855 Bytes
a887ffc 80b6a2c a887ffc 80b6a2c bc0d37c 29899b4 a887ffc 80b6a2c a887ffc 80b6a2c a887ffc 80b6a2c a887ffc 80b6a2c 29899b4 bc0d37c 80b6a2c bc0d37c 29899b4 bc0d37c 29899b4 bc0d37c 29899b4 80b6a2c a887ffc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | import rootutils
import hydra
from omegaconf import DictConfig
import logging
root = rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
logger = logging.getLogger(__name__)
# import your processing entry points here
from dpacman.data_tasks.download.genome import main as download_genome_main
from dpacman.data_tasks.download.remap import main as download_remap_main
from dpacman.data_tasks.clean.remap import main as clean_remap_main
from dpacman.data_tasks.fimo.pre_fimo import main as pre_fimo_main
from dpacman.data_tasks.fimo.run_fimo import main as run_fimo_main
from dpacman.data_tasks.fimo.post_fimo import main as post_fimo_main
from dpacman.data_tasks.cluster.remap import main as cluster_remap_main
from dpacman.data_tasks.split.remap import main as split_remap_main
from dpacman.data_tasks.embeddings.dna import main as embed_dna_main
from dpacman.data_tasks.embeddings.protein import main as embed_protein_main
@hydra.main(
config_path=str(root / "configs"), config_name="preprocess", version_base="1.3"
)
def main(cfg: DictConfig):
task_type = cfg.data_task.type
task_name = cfg.data_task.name.lower()
logger.info(f"Running {task_type} task: {task_name}")
# Download
if task_type == "download":
if task_name == "genome":
download_genome_main(cfg)
elif task_name == "remap":
download_remap_main(cfg)
else:
raise ValueError(f"No download pipeline defined for: {task_name}")
# Clean
elif task_type == "clean":
if task_name == "remap":
clean_remap_main(cfg)
else:
raise ValueError(f"No clean pipeline defined for: {task_name}")
# Fimo
elif task_type == "fimo":
if task_name == "pre_fimo":
pre_fimo_main(cfg)
elif task_name == "run_fimo":
run_fimo_main(cfg)
elif task_name == "post_fimo":
post_fimo_main(cfg)
else:
raise ValueError(f"No clean pipeline defined for: {task_name}")
# Cluster
elif task_type == "cluster":
if task_name == "remap":
cluster_remap_main(cfg)
else:
raise ValueError(f"No clean pipeline defined for: {task_name}")
# Split
elif task_type == "split":
if task_name == "remap":
split_remap_main(cfg)
else:
raise ValueError(f"No clean pipeline defined for: {task_name}")
# Embed
elif task_type == "embeddings":
if task_name == "dna":
embed_dna_main(cfg)
elif task_name == "protein":
embed_protein_main(cfg)
else:
raise ValueError(f"No clean pipeline defined for: {task_name}")
# Unknown - error
else:
raise ValueError(f"Unknown task type: {task_type}")
if __name__ == "__main__":
main()
|