|
|
|
|
|
|
|
import sys |
|
|
|
|
|
|
|
def error_message_detail(error:Exception,error_detail:sys): |
|
_,_,exec_tb = error_detail.exc_info() |
|
file_name = exec_tb.tb_frame.f_code.co_filename |
|
line_number = exec_tb.tb_lineno |
|
error_message = f"Error occured in python script name [{file_name}] " \ |
|
f"line number [{line_number}] error message [{error}]" |
|
return error_message |
|
|
|
|
|
class CustomException(Exception): |
|
def __init__(self, error_message:Exception, error_detail:sys): |
|
super().__init__(str(error_message)) |
|
self.error_message = error_message_detail(error_message,error_detail=error_detail) |
|
|
|
def __str__(self): |
|
return self.error_message |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from nltk.stem.porter import PorterStemmer |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
|
|
def removing_blank_lines(text): |
|
return text.replace('\n'," ") |
|
|
|
def removing_pre_suff_ix(text): |
|
y = [] |
|
|
|
for i in text.split(): |
|
y.append(PorterStemmer().stem(i)) |
|
|
|
return " ".join(y) |
|
|
|
def converting_into_vectors(text): |
|
vec = CountVectorizer(max_features=5000,stop_words='english').fit_transform(text).toarray() |
|
return vec |
|
|
|
def finding_similarity(vec): |
|
similarity = cosine_similarity(vec) |
|
return similarity |
|
|
|
|
|
|
|
import os |
|
import dill |
|
|
|
from recommendationSystem.logging import logger |
|
|
|
|
|
def save_object(file_path,obj): |
|
try: |
|
dir_path = os.path.dirname(file_path) |
|
os.makedirs(dir_path,exist_ok=True) |
|
|
|
with open(file_path,'wb') as file_obj: |
|
dill.dump(obj,file_obj) |
|
|
|
except Exception as e: |
|
raise CustomException(e,sys) |
|
|
|
|
|
|
|
|
|
import numpy as np |
|
|
|
def recommend(data,matrix,anime): |
|
anime_index = data[data.title == anime].index[0] |
|
distances = np.around(matrix[anime_index],2) |
|
anime_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[0:10] |
|
|
|
recommended_anime = [] |
|
recommended_anime_poster = [] |
|
recommended_anime_link = [] |
|
recommended_similarity_score = [] |
|
|
|
for i in anime_list: |
|
|
|
recommended_anime.append(data.iloc[i[0]].title) |
|
|
|
recommended_anime_poster.append(data.iloc[i[0]].image) |
|
|
|
recommended_anime_link.append(data.iloc[i[0]].links) |
|
|
|
recommended_similarity_score.append(f'{np.around(i[1]*100,2)} %') |
|
return recommended_anime, recommended_anime_poster, recommended_anime_link, recommended_similarity_score |
|
|
|
|
|
|
|
import pandas as pd |
|
|
|
def find_anime(label): |
|
dataframe_path = os.path.join("/tmp/artifact","data.csv") |
|
df = pd.read_csv(dataframe_path) |
|
story = '\n'.join(df[df.name == label].sypnopsis.to_list()) |
|
return story |
|
|
|
|
|
|