zenith04's picture
Upload 75 files
6bc88c9 verified
import os
import sys
from recommendationSystem.logging import logger
from recommendationSystem.utils.common import CustomException
from recommendationSystem.config.configuration import DataIngestionConfig
import pandas as pd
#from sklearn.model_selection import train_test_split
class DataIngestion:
def __init__(self):
self.ingestion_config = DataIngestionConfig()
def initiate_data_ingestion(self):
logger.info("Entered the Data Ingestion method")
try:
# Importing Data
#df_info = pd.read_csv('data/anime_data_24.csv')
#df_links = pd.read_csv('data/anime_links.csv')
anime = pd.read_csv(os.path.join("data","anime_data_7490.csv"))
logger.info("Read the dataset as a dataframe")
# Making Directory to store the data
os.makedirs(os.path.dirname(self.ingestion_config.data_path),exist_ok=True)
logger.info("created directory for datframe")
# Merging the dataset
#anime = df_info.merge(df_links,on='name')
# Selecting the required columns
#anime = anime[['name','sypnopsis','image','type','episodes','status','studios','source','genres','demographic','links']]
#anime.dropna(inplace=True)
#logger.info("Filtered out necessary columns and dropped NA rows")
# Formatting the data to get clean data
#anime['sypnopsis_length'] = [len(i) for i in anime.sypnopsis]
#anime = anime[anime['sypnopsis_length'] > 300]
anime['tags'] = anime['sypnopsis']+" " + anime['type']+" " + anime['episodes']+" " \
+ anime['status'] +" "+ anime['studios'] +" "+ anime['source']+" " + anime['genres']+" " + anime['demographic']
# Saving the clean data py
anime.to_csv(self.ingestion_config.data_path,index=False,header=True)
logger.info("Ingestion is completed")
logger.info("---------------x DIRECTORY CHANGE x------------------")
return(
self.ingestion_config.data_path
)
except Exception as e:
raise CustomException(e,sys)