Spaces:
Sleeping
Sleeping
File size: 1,043 Bytes
4fcdb47 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import os
# --- Paths ---
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
# Path to dataset
DATA_PATH = os.path.join(BASE_DIR, "data", "synthetic_transactions_samples_5000.csv")
# Directory to save models
MODEL_SAVE_DIR = os.path.join(BASE_DIR, "models")
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
# Save paths for Logistic Regression model + artifacts
MODEL_PATH = os.path.join(MODEL_SAVE_DIR, "logreg_model.pkl") # ✅ Logistic Regression model
TFIDF_VECTORIZER_PATH = os.path.join(MODEL_SAVE_DIR, "tfidf_vectorizer.pkl")
LABEL_ENCODERS_PATH = os.path.join(MODEL_SAVE_DIR, "label_encoders.pkl")
# --- Text & Label Columns ---
TEXT_COLUMN = "Sanction_Context"
LABEL_COLUMNS = [
"Red_Flag_Reason",
"Maker_Action",
"Escalation_Level",
"Risk_Category",
"Risk_Drivers",
"Investigation_Outcome"
]
# --- TF-IDF Settings ---
TFIDF_MAX_FEATURES = 5000
NGRAM_RANGE = (1, 2)
USE_STOPWORDS = True # English stopwords will be removed if True
# --- Train/Test Split ---
RANDOM_STATE = 42
TEST_SIZE = 0.2
|