| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import argparse |
| import warnings |
| import train_utilities as TU |
|
|
| |
| warnings.filterwarnings("ignore") |
|
|
| def main(): |
| """ |
| Primary execution routine for the model training utility. |
| |
| This script facilitates the training of various machine learning |
| architectures by providing a standardized interface for: |
| 1. Dataset Ingestion: Loading and splitting training data. |
| 2. Hyperparameter Configuration: Setting up model-specific parameters. |
| 3. Algorithmic Training: Executing the training process via train_utilities. |
| 4. Model Serialization: Persisting the resulting model for future inference. |
| """ |
| |
| parser = argparse.ArgumentParser( |
| description="Twitter Depression Detection: Model Training Utility" |
| ) |
|
|
| |
| parser.add_argument( |
| 'filename', |
| help="Path to the training dataset (TSV/CSV format with 'label' and 'clean_text')" |
| ) |
|
|
| |
| |
| parser.add_argument( |
| 'model', |
| help="Target model architecture for training" |
| ) |
|
|
| |
| args = parser.parse_args() |
|
|
| |
| model_type = args.model |
| dataset_path = args.filename |
|
|
| |
| if model_type in ["DT", "LR", "kNN", "SVM", "RF", "NN"]: |
| |
| print(f"Initializing {model_type} training pipeline...") |
| |
| |
| X_train, X_test, Y_train, Y_test = TU.load_prepare_split_df(dataset_path) |
|
|
| |
| |
| trained_model = TU.classification(X_train=X_train, Y_train=Y_train, model=model_type) |
| |
| print(f"Training for {model_type} successful.") |
|
|
| elif model_type == "LSTM": |
| |
| |
| print("Initializing LSTM deep learning pipeline...") |
| TU.LSTM(dataset_path) |
| |
| else: |
| print(f"Error: Model architecture '{model_type}' is not currently recognized.") |
| print("Supported architectures: DT, LR, kNN, SVM, RF, NN, LSTM") |
|
|
| if __name__ == '__main__': |
| main() |