File size: 2,258 Bytes
6bc88c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
import sys

from recommendationSystem.logging import logger
from recommendationSystem.utils.common import CustomException
from recommendationSystem.config.configuration import DataIngestionConfig

import pandas as pd

#from sklearn.model_selection import train_test_split

class DataIngestion:
    def __init__(self):
        self.ingestion_config = DataIngestionConfig()

    def initiate_data_ingestion(self):
 
        logger.info("Entered the Data Ingestion method")


        try:
            # Importing Data
            #df_info = pd.read_csv('data/anime_data_24.csv')
            #df_links = pd.read_csv('data/anime_links.csv')
            anime = pd.read_csv(os.path.join("data","anime_data_7490.csv"))
            logger.info("Read the dataset as a dataframe")

            # Making Directory to store the data
            os.makedirs(os.path.dirname(self.ingestion_config.data_path),exist_ok=True)
            logger.info("created directory for datframe")
            
            # Merging the dataset
            #anime = df_info.merge(df_links,on='name')

            # Selecting the required columns
            #anime = anime[['name','sypnopsis','image','type','episodes','status','studios','source','genres','demographic','links']]
            #anime.dropna(inplace=True)
            #logger.info("Filtered out necessary columns and dropped NA rows")

            # Formatting the data to get clean data
            #anime['sypnopsis_length'] = [len(i) for i in anime.sypnopsis]
            #anime = anime[anime['sypnopsis_length'] > 300]
            anime['tags'] = anime['sypnopsis']+" " + anime['type']+" " + anime['episodes']+" " \
               + anime['status'] +" "+ anime['studios'] +" "+ anime['source']+" " + anime['genres']+" " + anime['demographic']   

            # Saving the clean data py
            anime.to_csv(self.ingestion_config.data_path,index=False,header=True)
            logger.info("Ingestion is completed")
            logger.info("---------------x DIRECTORY CHANGE x------------------")


            return(
                self.ingestion_config.data_path
            )

        except Exception as e:
            raise CustomException(e,sys)