Spaces:

leynessa
/

bank_transaction

Runtime error

bank_transaction / preprocess.py

Upload 8 files

ff52cdd verified about 2 months ago

657 Bytes

	import pandas as pd
	import re
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	nltk.download('stopwords')
	nltk.download('wordnet')

	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()

	def clean_text(text):
	text = text.lower()
	text = re.sub(r'[^a-zA-Z\s]', '', text)
	words = text.split()
	words = [lemmatizer.lemmatize(w) for w in words if w not in stop_words]
	return ' '.join(words)

	df = pd.read_csv('data.csv') # Replace with your actual CSV file
	df['cleaned_text'] = df['purpose_text'].apply(clean_text)
	df.to_csv('cleaned_data.csv', index=False)