Commit
·
5d396ac
1
Parent(s):
be37136
new
Browse files
main.py
CHANGED
@@ -1,10 +1,27 @@
|
|
1 |
import os
|
2 |
import tempfile
|
|
|
3 |
|
4 |
-
# Set up cache
|
5 |
cache_dir = tempfile.mkdtemp()
|
|
|
6 |
os.environ['HF_HOME'] = cache_dir
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from flask import Flask, request, jsonify, render_template
|
9 |
import json
|
10 |
import torch
|
|
|
1 |
import os
|
2 |
import tempfile
|
3 |
+
import nltk
|
4 |
|
5 |
+
# Set up cache directories
|
6 |
cache_dir = tempfile.mkdtemp()
|
7 |
+
nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
|
8 |
os.environ['HF_HOME'] = cache_dir
|
9 |
|
10 |
+
# Create NLTK data directory
|
11 |
+
os.makedirs(nltk_data_dir, exist_ok=True)
|
12 |
+
|
13 |
+
# Add the custom directory to NLTK's search path BEFORE downloading
|
14 |
+
nltk.data.path.insert(0, nltk_data_dir) # Insert at beginning of search path
|
15 |
+
|
16 |
+
# Download required NLTK data
|
17 |
+
try:
|
18 |
+
print("Downloading required NLTK data...")
|
19 |
+
nltk.download('stopwords', download_dir=nltk_data_dir)
|
20 |
+
nltk.download('punkt', download_dir=nltk_data_dir)
|
21 |
+
nltk.download('wordnet', download_dir=nltk_data_dir)
|
22 |
+
except Exception as e:
|
23 |
+
print(f"Error downloading NLTK data: {e}")
|
24 |
+
|
25 |
from flask import Flask, request, jsonify, render_template
|
26 |
import json
|
27 |
import torch
|
similarity_check/semantic_meaning_check/semantic.py
CHANGED
@@ -16,6 +16,7 @@ from all_models import models
|
|
16 |
# Keep fasttext as is
|
17 |
fasttext = load('fasttext-wiki-news-subwords-300')
|
18 |
|
|
|
19 |
# nltk.download('punkt')
|
20 |
# nltk.download('stopwords')
|
21 |
|
|
|
16 |
# Keep fasttext as is
|
17 |
fasttext = load('fasttext-wiki-news-subwords-300')
|
18 |
|
19 |
+
# Remove these commented lines since we're handling downloads in main.py
|
20 |
# nltk.download('punkt')
|
21 |
# nltk.download('stopwords')
|
22 |
|
similarity_check/tf_idf/tf_idf_score.py
CHANGED
@@ -5,14 +5,7 @@ from nltk.corpus import wordnet
|
|
5 |
from collections import Counter
|
6 |
import string
|
7 |
|
8 |
-
|
9 |
-
try:
|
10 |
-
stopwords.words('english')
|
11 |
-
except LookupError:
|
12 |
-
print("Downloading required NLTK data...")
|
13 |
-
nltk.download('stopwords')
|
14 |
-
nltk.download('punkt')
|
15 |
-
nltk.download('wordnet')
|
16 |
|
17 |
def remove_stopwords(sentence):
|
18 |
|
|
|
5 |
from collections import Counter
|
6 |
import string
|
7 |
|
8 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
def remove_stopwords(sentence):
|
11 |
|