Spaces:
Runtime error
Runtime error
import pandas as pd | |
import re | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import WordNetLemmatizer | |
nltk.download('stopwords') | |
nltk.download('wordnet') | |
stop_words = set(stopwords.words('english')) | |
lemmatizer = WordNetLemmatizer() | |
def clean_text(text): | |
text = text.lower() | |
text = re.sub(r'[^a-zA-Z\s]', '', text) | |
words = text.split() | |
words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words] | |
return ' '.join(words) | |
df = pd.read_csv('data.csv') # Replace with your actual CSV file | |
df['cleaned_text'] = df['purpose_text'].apply(clean_text) | |
df.to_csv('cleaned_data.csv', index=False) | |