File size: 3,358 Bytes
1a364ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# --------------------------------- CUSTOM EXCEPTION CLASS --------------------------------------------

# Importing Libraries
import sys  


# Defining structure of exception message
def error_message_detail(error:Exception,error_detail:sys):
    _,_,exec_tb = error_detail.exc_info()
    file_name = exec_tb.tb_frame.f_code.co_filename
    line_number = exec_tb.tb_lineno
    error_message = f"Error occured in python script name [{file_name}] " \
        f"line number [{line_number}] error message [{error}]"
    return error_message

# Getting the exception message from sys
class CustomException(Exception):
    def __init__(self, error_message:Exception, error_detail:sys):
        super().__init__(str(error_message))
        self.error_message = error_message_detail(error_message,error_detail=error_detail)

    def __str__(self):
        return self.error_message
    




# --------------------------------- PREPROCESSING THE TEXT IN THE DATAFRAME --------------------------------------------

# Importing Libraries
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity



def removing_blank_lines(text):
    return text.replace('\n'," ")

def removing_pre_suff_ix(text):
    y = []
    
    for i in text.split():
        y.append(PorterStemmer().stem(i))
    
    return " ".join(y)

def converting_into_vectors(text):
    vec = CountVectorizer(max_features=5000,stop_words='english').fit_transform(text).toarray()
    return vec

def finding_similarity(vec):
    similarity = cosine_similarity(vec)
    return similarity

# --------------------------------- SAVING FILES --------------------------------------------

import os
import dill

from recommendationSystem.logging import logger


def save_object(file_path,obj):
    try:
        dir_path = os.path.dirname(file_path)
        os.makedirs(dir_path,exist_ok=True)

        with open(file_path,'wb') as file_obj:
            dill.dump(obj,file_obj)

    except Exception as e:
        raise CustomException(e,sys)
    

# --------------------------------- Prediction File --------------------------------------------

import numpy as np

def recommend(data,matrix,anime):
    anime_index = data[data.title == anime].index[0]
    distances = np.around(matrix[anime_index],2)
    anime_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[0:10]

    recommended_anime = []
    recommended_anime_poster = []
    recommended_anime_link = []
    recommended_similarity_score = []

    for i in anime_list:
        # anime
        recommended_anime.append(data.iloc[i[0]].title)
        # posters
        recommended_anime_poster.append(data.iloc[i[0]].image)
        # links
        recommended_anime_link.append(data.iloc[i[0]].links)
        # score
        recommended_similarity_score.append(f'{np.around(i[1]*100,2)} %')
    return recommended_anime, recommended_anime_poster, recommended_anime_link, recommended_similarity_score

# --------------------------------- Find Story of the anime --------------------------------------------

import pandas as pd

def find_anime(label):
    dataframe_path = os.path.join("/tmp/artifact","data.csv")
    df = pd.read_csv(dataframe_path)
    story = '\n'.join(df[df.name == label].sypnopsis.to_list())
    return story