File size: 1,043 Bytes
4fcdb47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os

# --- Paths ---
BASE_DIR = os.path.abspath(os.path.dirname(__file__))

# Path to dataset
DATA_PATH = os.path.join(BASE_DIR, "data", "synthetic_transactions_samples_5000.csv")

# Directory to save models
MODEL_SAVE_DIR = os.path.join(BASE_DIR, "models")
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

# Save paths for Logistic Regression model + artifacts
MODEL_PATH = os.path.join(MODEL_SAVE_DIR, "logreg_model.pkl")              # ✅ Logistic Regression model
TFIDF_VECTORIZER_PATH = os.path.join(MODEL_SAVE_DIR, "tfidf_vectorizer.pkl")
LABEL_ENCODERS_PATH = os.path.join(MODEL_SAVE_DIR, "label_encoders.pkl")

# --- Text & Label Columns ---
TEXT_COLUMN = "Sanction_Context"
LABEL_COLUMNS = [
    "Red_Flag_Reason",
    "Maker_Action",
    "Escalation_Level",
    "Risk_Category",
    "Risk_Drivers",
    "Investigation_Outcome"
]

# --- TF-IDF Settings ---
TFIDF_MAX_FEATURES = 5000
NGRAM_RANGE = (1, 2)
USE_STOPWORDS = True  # English stopwords will be removed if True

# --- Train/Test Split ---
RANDOM_STATE = 42
TEST_SIZE = 0.2