import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import joblib from sklearn.metrics import f1_score df = pd.read_csv("./data/hr_churn_data.csv",usecols=['JobRole', 'MaritalStatus', 'OverTime', 'EducationField','BusinessTravel','JobLevel','StockOptionLevel', 'Department', 'Attrition']) df['Attrition'] = df['Attrition'].map({'No': 0, 'Yes': 1}) X = df.drop("Attrition", axis=1) y = df["Attrition"] X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=125) model = joblib.load("./model.pkl") def test_f1(): preds = model.predict(X_test) f1 = f1_score(y_test, preds, average="macro") assert f1 > 0.60, "f1_score is below acceptable threshold" def test_missing_values(): assert df.isna().sum().sum() == 0, "Dataset contains missing values" def test_pipeline_execution(): assert len(X_train) > 0, "Training data is empty!" assert len(y_train) > 0, "Labels are empty!"