yamanavijayavardhan commited on
Commit
5d396ac
·
1 Parent(s): be37136
main.py CHANGED
@@ -1,10 +1,27 @@
1
  import os
2
  import tempfile
 
3
 
4
- # Set up cache directory for transformers
5
  cache_dir = tempfile.mkdtemp()
 
6
  os.environ['HF_HOME'] = cache_dir
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from flask import Flask, request, jsonify, render_template
9
  import json
10
  import torch
 
1
  import os
2
  import tempfile
3
+ import nltk
4
 
5
+ # Set up cache directories
6
  cache_dir = tempfile.mkdtemp()
7
+ nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
8
  os.environ['HF_HOME'] = cache_dir
9
 
10
+ # Create NLTK data directory
11
+ os.makedirs(nltk_data_dir, exist_ok=True)
12
+
13
+ # Add the custom directory to NLTK's search path BEFORE downloading
14
+ nltk.data.path.insert(0, nltk_data_dir) # Insert at beginning of search path
15
+
16
+ # Download required NLTK data
17
+ try:
18
+ print("Downloading required NLTK data...")
19
+ nltk.download('stopwords', download_dir=nltk_data_dir)
20
+ nltk.download('punkt', download_dir=nltk_data_dir)
21
+ nltk.download('wordnet', download_dir=nltk_data_dir)
22
+ except Exception as e:
23
+ print(f"Error downloading NLTK data: {e}")
24
+
25
  from flask import Flask, request, jsonify, render_template
26
  import json
27
  import torch
similarity_check/semantic_meaning_check/semantic.py CHANGED
@@ -16,6 +16,7 @@ from all_models import models
16
  # Keep fasttext as is
17
  fasttext = load('fasttext-wiki-news-subwords-300')
18
 
 
19
  # nltk.download('punkt')
20
  # nltk.download('stopwords')
21
 
 
16
  # Keep fasttext as is
17
  fasttext = load('fasttext-wiki-news-subwords-300')
18
 
19
+ # Remove these commented lines since we're handling downloads in main.py
20
  # nltk.download('punkt')
21
  # nltk.download('stopwords')
22
 
similarity_check/tf_idf/tf_idf_score.py CHANGED
@@ -5,14 +5,7 @@ from nltk.corpus import wordnet
5
  from collections import Counter
6
  import string
7
 
8
- # Check and download required NLTK packages
9
- try:
10
- stopwords.words('english')
11
- except LookupError:
12
- print("Downloading required NLTK data...")
13
- nltk.download('stopwords')
14
- nltk.download('punkt')
15
- nltk.download('wordnet')
16
 
17
  def remove_stopwords(sentence):
18
 
 
5
  from collections import Counter
6
  import string
7
 
8
+
 
 
 
 
 
 
 
9
 
10
  def remove_stopwords(sentence):
11