import os # --- Paths --- BASE_DIR = os.path.abspath(os.path.dirname(__file__)) # Path to dataset DATA_PATH = os.path.join(BASE_DIR, "data", "synthetic_transactions_samples_5000.csv") # Directory to save models MODEL_SAVE_DIR = os.path.join(BASE_DIR, "models") os.makedirs(MODEL_SAVE_DIR, exist_ok=True) # Save paths for Logistic Regression model + artifacts MODEL_PATH = os.path.join(MODEL_SAVE_DIR, "logreg_model.pkl") # ✅ Logistic Regression model TFIDF_VECTORIZER_PATH = os.path.join(MODEL_SAVE_DIR, "tfidf_vectorizer.pkl") LABEL_ENCODERS_PATH = os.path.join(MODEL_SAVE_DIR, "label_encoders.pkl") # --- Text & Label Columns --- TEXT_COLUMN = "Sanction_Context" LABEL_COLUMNS = [ "Red_Flag_Reason", "Maker_Action", "Escalation_Level", "Risk_Category", "Risk_Drivers", "Investigation_Outcome" ] # --- TF-IDF Settings --- TFIDF_MAX_FEATURES = 5000 NGRAM_RANGE = (1, 2) USE_STOPWORDS = True # English stopwords will be removed if True # --- Train/Test Split --- RANDOM_STATE = 42 TEST_SIZE = 0.2