Spaces:
Sleeping
Sleeping
""" | |
This module provides the CancerPredictor class for training and predicting breast cancer diagnosis | |
using logistic regression. It leverages scikit-learn for model training, evaluation, and prediction, | |
and pandas for data manipulation. The predictor expects input features such as radius_mean, | |
texture_mean, symmetry_mean, and fractal_dimension_mean, and outputs a diagnosis prediction. | |
Classes: | |
CancerPredictor: Handles training on a CSV dataset and making predictions | |
based on input features. | |
""" | |
import pandas as pd | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.metrics import accuracy_score | |
class CancerPredictor: | |
""" | |
CancerPredictor is a class for training and making predictions on breast cancer diagnosis | |
using logistic regression. | |
""" | |
def __init__(self): | |
self.model = LogisticRegression() | |
self.le_diagnosis = LabelEncoder() | |
def train(self, csv_train, csv_test): | |
""" | |
Trains the logistic regression model using a CSV file containing breast cancer data. | |
The CSV must include columns: 'radius_mean', 'texture_mean', 'symmetry_mean', | |
'fractal_dimension_mean', and 'diagnosis'. | |
Prints the model accuracy after training. | |
""" | |
# Load the train data | |
data_train = pd.read_csv(csv_train) | |
# Encode categorical variables | |
data_train['diagnosis'] = self.le_diagnosis.fit_transform(data_train['diagnosis']) | |
# Split features and target | |
X_train = data_train[ | |
['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']] | |
y_train = data_train['diagnosis'] | |
# Train the model | |
self.model.fit(X_train, y_train) | |
# Load the test data | |
data_test = pd.read_csv(csv_test) | |
# Encode categorical variables | |
data_test['diagnosis'] = self.le_diagnosis.fit_transform(data_test['diagnosis']) | |
# Split features and target | |
X_test = data_test[ | |
['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']] | |
y_test = data_test['diagnosis'] | |
# Evaluate the model | |
y_pred = self.model.predict(X_test) | |
accuracy = accuracy_score(y_test, y_pred) | |
print(f"Model accuracy: {accuracy:.2f}") | |
def predict(self, radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean): | |
""" | |
Predicts the diagnosis ('M' for malignant or 'B' for benign) based on the provided | |
feature values. | |
Returns the predicted diagnosis as a string. | |
""" | |
# Create a DataFrame with the same feature names as the training data | |
input_data = pd.DataFrame( | |
[[radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean]], | |
columns=['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']) | |
# Make prediction | |
prediction = self.model.predict(input_data) | |
# Decode prediction | |
diagnosis = self.le_diagnosis.inverse_transform(prediction)[0] | |
return diagnosis | |