Spaces:

dev100rabh
/

FakeNews

Sleeping

100rabhsah commited on Jun 7

Commit

3269b9d

1 Parent(s): b0bc9cf

app.py analyse function fix 3

Files changed (2) hide show

src/app.py CHANGED Viewed

@@ -18,6 +18,14 @@ try:
     nltk.data.find('corpora/stopwords')
 except LookupError:
     nltk.download('stopwords')
 # Add project root to Python path
 project_root = Path(__file__).parent.parent

     nltk.data.find('corpora/stopwords')
 except LookupError:
     nltk.download('stopwords')
+try:
+    nltk.data.find('tokenizers/punkt_tab')
+except LookupError:
+    nltk.download('punkt_tab')
+try:
+    nltk.data.find('corpora/wordnet')
+except LookupError:
+    nltk.download('wordnet')
 # Add project root to Python path
 project_root = Path(__file__).parent.parent

src/data/preprocessor.py CHANGED Viewed

@@ -37,12 +37,14 @@ class TextPreprocessor:
     def lemmatize_text(self, text: str) -> str:
         """Lemmatize text."""
-        tokens = word_tokenize(text)
         return ' '.join([self.lemmatizer.lemmatize(token) for token in tokens])
     def remove_stopwords(self, text: str) -> str:
         """Remove stopwords from text."""
-        tokens = word_tokenize(text)
         return ' '.join([token for token in tokens if token.lower() not in self.stop_words])
     def correct_spelling(self, text: str) -> str:

     def lemmatize_text(self, text: str) -> str:
         """Lemmatize text."""
+        # Simple word tokenization using split
+        tokens = text.split()
         return ' '.join([self.lemmatizer.lemmatize(token) for token in tokens])
     def remove_stopwords(self, text: str) -> str:
         """Remove stopwords from text."""
+        # Simple word tokenization using split
+        tokens = text.split()
         return ' '.join([token for token in tokens if token.lower() not in self.stop_words])
     def correct_spelling(self, text: str) -> str: