| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import argparse |
| import pickle |
| import warnings |
| import numpy as np |
| import pandas as pd |
| import spacy |
| import en_core_web_lg |
| import clean_utilities as CU |
|
|
| |
| warnings.filterwarnings("ignore") |
|
|
| def main(): |
| """ |
| Main entry point for the prediction utility. |
| |
| This script encapsulates the end-to-end inference pipeline: |
| 1. Argument Parsing: Captures input text file and model selection. |
| 2. Text Preprocessing: Normalization via clean_utilities. |
| 3. Feature Extraction: Generating centroid embeddings via spaCy. |
| 4. Classification: Binary sentiment analysis via pre-trained SVM. |
| """ |
| |
| parser = argparse.ArgumentParser( |
| description="Twitter Depression Detection: Machine Learning Inference Utility" |
| ) |
|
|
| |
| parser.add_argument( |
| 'filename', |
| help="Path to the text file containing the tweet for classification" |
| ) |
|
|
| |
| parser.add_argument( |
| 'model', |
| help="Target model architecture (currently optimized for 'SVM')" |
| ) |
|
|
| |
| args = parser.parse_args() |
|
|
| |
| if args.filename is not None and args.model == "SVM": |
| print(f"Loading input source: {args.filename}") |
| |
| try: |
| |
| with open(args.filename, 'r', encoding='utf-8') as file: |
| raw_test_tweet = file.read() |
| print(f"Captured Content: \"{raw_test_tweet}\"") |
| |
| |
| |
| print("Executing linguistic cleaning pipeline...") |
| cleaned_input = [CU.tweets_cleaner(raw_test_tweet)] |
| print(f"Normalized Form: {cleaned_input}") |
|
|
| |
| |
| print("Transforming text to 300-dimensional semantic vectors...") |
| nlp_engine = en_core_web_lg.load() |
| |
| |
| semantic_features = np.array([ |
| np.array([token.vector for token in nlp_engine(s)]).mean(axis=0) * np.ones((300)) |
| for s in cleaned_input |
| ]) |
|
|
| |
| |
| model_artifact_path = "../assets/models/model_svm1.pkl" |
| with open(model_artifact_path, 'rb') as model_file: |
| classifier = pickle.load(model_file) |
| |
| |
| |
| print("Performing binary classification...") |
| prediction_bin = classifier.predict(semantic_features) |
| |
| |
| is_depressive = prediction_bin[0] |
| if is_depressive == 1: |
| print("\n>>> CLASSIFICATION RESULT: The analyzed content exhibits depressive characteristics.") |
| else: |
| print("\n>>> CLASSIFICATION RESULT: The analyzed content is classified as non-depressive.") |
|
|
| except FileNotFoundError: |
| print(f"Error: The input file {args.filename} could not be located.") |
| except Exception as e: |
| print(f"An error occurred during the inference process: {e}") |
|
|
| else: |
| print("Usage Error: Please provide an input file and specify 'SVM' as the target model.") |
|
|
| if __name__ == '__main__': |
| main() |
|
|
|
|
|
|