turing-space / turing /config.py
papri-ka's picture
Deploy FastAPI ML service to Hugging Face Spaces
5fc6e5d
from pathlib import Path
from dotenv import load_dotenv
from loguru import logger
# Load environment variables from .env file if it exists
load_dotenv()
# Paths
PROJ_ROOT = Path(__file__).resolve().parents[1]
logger.info(f"PROJ_ROOT path is: {PROJ_ROOT}")
DATA_DIR = PROJ_ROOT / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
INTERIM_DATA_DIR = DATA_DIR / "interim"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
EXTERNAL_DATA_DIR = DATA_DIR / "external"
MODELS_DIR = PROJ_ROOT / "models"
REPORTS_DIR = PROJ_ROOT / "reports"
FIGURES_DIR = REPORTS_DIR / "figures"
# Dataset
DATASET_HF_ID = "NLBSE/nlbse26-code-comment-classification"
LANGS = ["java", "python", "pharo"]
INPUT_COLUMN = "combo"
LABEL_COLUMN = "labels"
LABELS_MAP = {
"java": ["summary", "Ownership", "Expand", "usage", "Pointer", "deprecation", "rational"],
"python": ["Usage", "Parameters", "DevelopmentNotes", "Expand", "Summary"],
"pharo": [
"Keyimplementationpoints",
"Example",
"Responsibilities",
"Intent",
"Keymessages",
"Collaborators",
],
}
TOTAL_CATEGORIES = sum(len(v) for v in LABELS_MAP.values())
# Score parameters
MAX_AVG_RUNTIME = 5.0 # seconds
MAX_AVG_FLOPS = 5000.0 # GFLOPS
# Training parameters
DEFAULT_BATCH_SIZE = 32
# Model configuration mapping
MODEL_CONFIG = {
"codeberta": {
"model_name": "fine-tuned-CodeBERTa",
"exp_name": "fine-tuned-CodeBERTa",
"model_class_module": "turing.modeling.models.codeBerta",
"model_class_name": "CodeBERTa",
},
"graphcodebert": {
"model_name": "GraphCodeBERT",
"exp_name": "fine-tuned-GraphCodeBERT",
"model_class_module": "turing.modeling.models.graphCodeBert",
"model_class_name": "GraphCodeBERTClassifier",
},
"tinybert": {
"model_name": "TinyBERT",
"exp_name": "fine-tuned-TinyBERT",
"model_class_module": "turing.modeling.models.tinyBert",
"model_class_name": "TinyBERTClassifier",
},
"randomforest": {
"model_name": "RandomForest-TfIdf",
"exp_name": "RandomForest-TfIdf",
"model_class_module": "turing.modeling.models.randomForestTfIdf",
"model_class_name": "RandomForestTfIdf",
},
}
DEFAULT_NUM_ITERATIONS = 20
# Existing model modules
EXISTING_MODELS = [
"randomForestTfIdf",
"codeBerta",
]
# If tqdm is installed, configure loguru with tqdm.write
# https://github.com/Delgan/loguru/issues/135
try:
from tqdm import tqdm
logger.remove(0)
logger.add(lambda msg: tqdm.write(msg, end=""), colorize=True)
except (ModuleNotFoundError, ValueError):
pass