Spaces:
Sleeping
Sleeping
File size: 3,109 Bytes
216b413 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
"""
This module provides the CancerPredictor class for training and predicting breast cancer diagnosis
using logistic regression. It leverages scikit-learn for model training, evaluation, and prediction,
and pandas for data manipulation. The predictor expects input features such as radius_mean,
texture_mean, symmetry_mean, and fractal_dimension_mean, and outputs a diagnosis prediction.
Classes:
CancerPredictor: Handles training on a CSV dataset and making predictions
based on input features.
"""
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
class CancerPredictor:
"""
CancerPredictor is a class for training and making predictions on breast cancer diagnosis
using logistic regression.
"""
def __init__(self):
self.model = LogisticRegression()
self.le_diagnosis = LabelEncoder()
def train(self, csv_train, csv_test):
"""
Trains the logistic regression model using a CSV file containing breast cancer data.
The CSV must include columns: 'radius_mean', 'texture_mean', 'symmetry_mean',
'fractal_dimension_mean', and 'diagnosis'.
Prints the model accuracy after training.
"""
# Load the train data
data_train = pd.read_csv(csv_train)
# Encode categorical variables
data_train['diagnosis'] = self.le_diagnosis.fit_transform(data_train['diagnosis'])
# Split features and target
X_train = data_train[
['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']]
y_train = data_train['diagnosis']
# Train the model
self.model.fit(X_train, y_train)
# Load the test data
data_test = pd.read_csv(csv_test)
# Encode categorical variables
data_test['diagnosis'] = self.le_diagnosis.fit_transform(data_test['diagnosis'])
# Split features and target
X_test = data_test[
['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean']]
y_test = data_test['diagnosis']
# Evaluate the model
y_pred = self.model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy:.2f}")
def predict(self, radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean):
"""
Predicts the diagnosis ('M' for malignant or 'B' for benign) based on the provided
feature values.
Returns the predicted diagnosis as a string.
"""
# Create a DataFrame with the same feature names as the training data
input_data = pd.DataFrame(
[[radius_mean, texture_mean, symmetry_mean, fractal_dimension_mean]],
columns=['radius_mean', 'texture_mean', 'symmetry_mean', 'fractal_dimension_mean'])
# Make prediction
prediction = self.model.predict(input_data)
# Decode prediction
diagnosis = self.le_diagnosis.inverse_transform(prediction)[0]
return diagnosis
|