| | import abc_1
|
| | import time
|
| | import sys
|
| | from docx import Document
|
| | from pdfminer.high_level import extract_text
|
| | import json
|
| |
|
| | if __name__ == '__main__':
|
| | start = time.time()
|
| | if len(sys.argv) > 1:
|
| | data = sys.argv[1]
|
| | categories_keywords_dict = json.loads(data)
|
| | else:
|
| | print("No data provided.")
|
| | categories_keywords_dict1 = {
|
| | 'AI': ['Artificial', 'Intelligence'],
|
| | 'Automata': ['finite', 'state', 'machines'],
|
| | 'DT': ['game', 'theory']
|
| | }
|
| |
|
| | input='input'
|
| | output='output'
|
| | compiled_keywords = abc_1.compile_keywords(categories_keywords_dict1)
|
| | abc_1.multi_process_categorizer(input, output , compiled_keywords, num_processes=8)
|
| | end = time.time()
|
| | print(f"Categorization completed in {end - start:.2f} seconds") |