| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import argparse |
| import warnings |
| import clean_utilities as CU |
|
|
| |
| warnings.filterwarnings("ignore") |
|
|
| def main(): |
| """ |
| Primary execution routine for the tweet cleaning utility. |
| |
| This script facilitates the transformation of raw unstructured text |
| into a standardized format, essential for downstream machine learning |
| inference and training. |
| """ |
| |
| parser = argparse.ArgumentParser( |
| description="Twitter Depression Detection: Text Cleaning Utility" |
| ) |
|
|
| |
| parser.add_argument( |
| 'filename', |
| help="Path to the raw text file containing the tweet to be sanitized" |
| ) |
|
|
| |
| args = parser.parse_args() |
|
|
| |
| if args.filename is not None: |
| print(f"Targeting file for preprocessing: {args.filename}") |
| |
| try: |
| |
| with open(args.filename, 'r', encoding='utf-8') as file: |
| raw_tweet = file.read() |
| |
| |
| |
| print("Linguistic cleaning in progress...") |
| sanitized_tweet = CU.tweets_cleaner(raw_tweet) |
| |
| |
| with open('clean_tweet.txt', 'w', encoding='utf-8') as output_file: |
| print("Sanitization complete. Persistence target: clean_tweet.txt") |
| output_file.write(sanitized_tweet) |
| |
| except FileNotFoundError: |
| print(f"Error: The specified file '{args.filename}' was not discovered.") |
| except Exception as e: |
| print(f"An unexpected analytical error occurred: {e}") |
| |
| else: |
| print("Required input: Please specify a valid filename as a positional argument.") |
|
|
| if __name__ == '__main__': |
| main() |
|
|
|
|
|
|
|
|