yamanavijayavardhan commited on
Commit
be37136
·
1 Parent(s): 9282835

Switched to pyspellchecker for simpler spell checking

Browse files
Files changed (3) hide show
  1. HTR/spell_and_gramer_check.py +10 -34
  2. main.py +1 -4
  3. requirements.txt +1 -1
HTR/spell_and_gramer_check.py CHANGED
@@ -1,46 +1,22 @@
1
- import os
2
- import language_tool_python
3
- from pathlib import Path
4
  from spellchecker import SpellChecker
5
-
6
- # Use the cache directory from environment variable
7
- cache_dir = os.getenv('LANGUAGE_TOOL_CACHE_DIR', '/tmp/language_tool')
8
- os.makedirs(cache_dir, exist_ok=True)
9
 
10
- # Initialize LanguageTool with the cache directory
11
- tool = language_tool_python.LanguageTool('en-US', cache_dir=cache_dir)
12
 
13
- def check_grammar(answer):
14
-
15
- my_matches = tool.check(answer)
16
- corrected_text = tool.correct(answer)
17
- return corrected_text
18
-
19
- def correct_spelling(text):
20
- spell = SpellChecker()
21
  words = text.split()
22
-
23
  # Find misspelled words
24
  misspelled = spell.unknown(words)
25
-
26
  # Correct misspelled words
27
- corrected_text = []
28
  for word in words:
29
  if word in misspelled:
30
- correction = spell.correction(word)
31
- # If no correction found or correction is None, keep the original word
32
- if correction is None:
33
- corrected_text.append(word)
34
- else:
35
- corrected_text.append(correction)
36
  else:
37
- corrected_text.append(word)
38
-
39
- return " ".join(map(str, corrected_text))
40
-
41
- def spell_grammer(text):
42
- spell_check_text = correct_spelling(text)
43
-
44
- corrected_text = check_grammar(spell_check_text)
45
 
 
 
46
  return corrected_text
 
 
 
 
1
  from spellchecker import SpellChecker
 
 
 
 
2
 
3
+ spell = SpellChecker()
 
4
 
5
+ def spell_grammer(text):
6
+ # Split text into words
 
 
 
 
 
 
7
  words = text.split()
8
+
9
  # Find misspelled words
10
  misspelled = spell.unknown(words)
11
+
12
  # Correct misspelled words
13
+ corrected_words = []
14
  for word in words:
15
  if word in misspelled:
16
+ corrected_words.append(spell.correction(word))
 
 
 
 
 
17
  else:
18
+ corrected_words.append(word)
 
 
 
 
 
 
 
19
 
20
+ # Join words back into text
21
+ corrected_text = ' '.join(corrected_words)
22
  return corrected_text
main.py CHANGED
@@ -1,12 +1,9 @@
1
  import os
2
  import tempfile
3
 
4
- # Create a writable cache directory in /tmp
5
  cache_dir = tempfile.mkdtemp()
6
- os.environ['TRANSFORMERS_CACHE'] = cache_dir
7
  os.environ['HF_HOME'] = cache_dir
8
- # Set language tool cache directory
9
- os.environ['LANGUAGE_TOOL_CACHE_DIR'] = cache_dir
10
 
11
  from flask import Flask, request, jsonify, render_template
12
  import json
 
1
  import os
2
  import tempfile
3
 
4
+ # Set up cache directory for transformers
5
  cache_dir = tempfile.mkdtemp()
 
6
  os.environ['HF_HOME'] = cache_dir
 
 
7
 
8
  from flask import Flask, request, jsonify, render_template
9
  import json
requirements.txt CHANGED
@@ -9,6 +9,6 @@ chromadb==0.4.6
9
  PyMuPDF==1.24.12
10
  nltk==3.8.1
11
  gensim==4.3.2
12
- language-tool-python==2.8
13
  pyspellchecker==0.8.1
14
  werkzeug==3.0.0
 
9
  PyMuPDF==1.24.12
10
  nltk==3.8.1
11
  gensim==4.3.2
12
+ # language-tool-python==2.8
13
  pyspellchecker==0.8.1
14
  werkzeug==3.0.0