Spaces:
Sleeping
Sleeping
import os | |
# --- Paths --- | |
BASE_DIR = os.path.abspath(os.path.dirname(__file__)) | |
# Path to dataset | |
DATA_PATH = os.path.join(BASE_DIR, "data", "synthetic_transactions_samples_5000.csv") | |
# Directory to save models | |
MODEL_SAVE_DIR = os.path.join(BASE_DIR, "models") | |
os.makedirs(MODEL_SAVE_DIR, exist_ok=True) | |
# Save paths for Logistic Regression model + artifacts | |
MODEL_PATH = os.path.join(MODEL_SAVE_DIR, "logreg_model.pkl") # β Logistic Regression model | |
TFIDF_VECTORIZER_PATH = os.path.join(MODEL_SAVE_DIR, "tfidf_vectorizer.pkl") | |
LABEL_ENCODERS_PATH = os.path.join(MODEL_SAVE_DIR, "label_encoders.pkl") | |
# --- Text & Label Columns --- | |
TEXT_COLUMN = "Sanction_Context" | |
LABEL_COLUMNS = [ | |
"Red_Flag_Reason", | |
"Maker_Action", | |
"Escalation_Level", | |
"Risk_Category", | |
"Risk_Drivers", | |
"Investigation_Outcome" | |
] | |
# --- TF-IDF Settings --- | |
TFIDF_MAX_FEATURES = 5000 | |
NGRAM_RANGE = (1, 2) | |
USE_STOPWORDS = True # English stopwords will be removed if True | |
# --- Train/Test Split --- | |
RANDOM_STATE = 42 | |
TEST_SIZE = 0.2 | |