#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import os import time from project_settings import project_path os.environ["NLTK_DATA"] = (project_path / "data/nltk_data").as_posix() import nltk def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--text", type=str, default="Mr. Honey Tian. How are you." ) parser.add_argument( "--language", type=str, default="english" ) args = parser.parse_args() return args nltk_sent_tokenize_languages = [ "czech", "danish", "dutch", "english", "estonian", "finnish", "french", "german", "greek", "italian", "norwegian", "polish", "portuguese", "russian", "slovene", "spanish", "swedish", "turkish" ] def main(): args = get_args() begin_time = time.time() sent_list = nltk.sent_tokenize(args.text, args.language) cost = time.time() - begin_time print(f"time cost: {cost}") print(sent_list) return if __name__ == "__main__": main()