|
import sys |
|
import os |
|
import pandas as pd |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import LabelEncoder, StandardScaler |
|
from sklearn.ensemble import RandomForestClassifier |
|
import joblib |
|
|
|
|
|
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
|
|
|
|
|
class DataIngestion: |
|
@staticmethod |
|
def load_data(file_path): |
|
return pd.read_csv(file_path) |
|
|
|
|
|
class DataTransformation: |
|
def __init__(self): |
|
self.label_encoders = {} |
|
self.scaler = StandardScaler() |
|
|
|
def fit_transform(self, data): |
|
|
|
categorical_columns = ['statute', 'offense_category', 'penalty'] |
|
|
|
for col in categorical_columns: |
|
if col in data.columns: |
|
encoder = LabelEncoder() |
|
data[col] = encoder.fit_transform(data[col]) |
|
self.label_encoders[col] = encoder |
|
|
|
|
|
numerical_columns = ['imprisonment_duration_served', 'risk_score', 'penalty_severity'] |
|
data[numerical_columns] = self.scaler.fit_transform(data[numerical_columns]) |
|
|
|
return data |
|
|
|
|
|
class ModelTrainer: |
|
def __init__(self): |
|
self.model = RandomForestClassifier(n_estimators=100, random_state=42) |
|
|
|
def train(self, X_train, y_train): |
|
self.model.fit(X_train, y_train) |
|
|
|
def evaluate(self, X_test, y_test): |
|
accuracy = self.model.score(X_test, y_test) |
|
print(f"Model Accuracy: {accuracy * 100:.2f}%") |
|
|
|
def save_model(self, model_path): |
|
joblib.dump(self.model, model_path) |
|
|
|
|
|
models_dir = os.path.join("ipc_vector_db") |
|
os.makedirs(models_dir, exist_ok=True) |
|
|
|
|
|
data = DataIngestion.load_data('data/a.csv') |
|
transformer = DataTransformation() |
|
data = transformer.fit_transform(data) |
|
|
|
|
|
X = data.drop(columns=['case_id', 'bail_eligibility']) |
|
y = data['bail_eligibility'] |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
trainer = ModelTrainer() |
|
trainer.train(X_train, y_train) |
|
|
|
|
|
trainer.evaluate(X_test, y_test) |
|
|
|
|
|
model_path = os.path.join(models_dir, 'bail_reckoner_model.pkl') |
|
preprocessing_path = os.path.join(models_dir, 'preprocessing_objects.pkl') |
|
|
|
trainer.save_model(model_path) |
|
joblib.dump({'label_encoders': transformer.label_encoders, 'scaler': transformer.scaler}, preprocessing_path) |
|
|
|
print(f"Model and preprocessing objects saved to {models_dir}") |
|
|