File size: 4,514 Bytes
1ed1542 9d48458 1ed1542 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import numpy as np
import pickle
import streamlit as st
import pandas as pd
import requests
movies=pd.read_csv('./movies (1).csv')
ratings=pd.read_csv('./user_ratings.csv')
movies['title'] = movies['title'].str.strip().str[:-7]
#removing the | from the genre and replacing it by space
movies['genres']=movies['genres'].str.replace('|', ' ')
from sklearn.feature_extraction.text import CountVectorizer
#making an obejct of it
cv=CountVectorizer()
# we have a token in the genres so we will now make tokens out of it
genres_tokens=cv.fit_transform(movies['genres'].values)
genres_features=cv.get_feature_names_out()
genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist())
genres_tokens['combined']=genres_tokens.values.tolist()
movies['genres']=genres_tokens['combined']
#preprocessing of rating column
pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating')
pivot_mat.fillna(0,inplace=True)
vote_movie = [[],[]]
user_votes = [[],[]]
sh = pivot_mat.shape
for i in range(sh[0]):
r,c = np.unique(pivot_mat.values[i],return_counts=True)
user_votes[0].append(np.sum(c[1:]))
user_votes[1].append(pivot_mat.index[i])
for i in range(sh[1]):
r,c = np.unique(pivot_mat.values[:,i],return_counts=True)
vote_movie[0].append(np.sum(c[1:]))
vote_movie[1].append(i+1)
vote_movie = np.array(vote_movie).T
user_votes = np.array(user_votes).T
pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:]
zc = 0
for i in range(pivot_mat.shape[0]):
for j in range(pivot_mat.shape[1]):
if pivot_mat.iloc[i,j] == 0:
zc+=1
from scipy.sparse import csr_matrix
csr_data = csr_matrix(pivot_mat.values)
pivot_mat.reset_index(inplace=True)
def dist_rec(movie_name,rec):
try:
arr = np.array(movies[movies['title'] == movie_name].values[0][2])
except:
return "Movie not found"
mov = movies[movies['title'] != movie_name].values
dis = []
recommendations=[]
for i in mov:
dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) # similar to the K-means clustering decision.
# print("The Recommendations for " + movie_name + " are :\n")
# for i in range(rec):
# print(mov[:,1][np.argmin(dis)])
# dis[np.argmin(dis)] = 9999
for i in range(rec):
recommendations.append(mov[:,1][np.argmin(dis)])
dis[np.argmin(dis)] = 9999
return recommendations
#defining the K-means clustering decision
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
knn.fit(csr_data)
def knn_reccomendation(movie_name,n_movies_to_reccomend):
movie_list = movies[movies['title'].str.contains(movie_name)]
if len(movie_list):
movie_idx= movie_list.iloc[0]['movieId']
movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0]
distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1)
rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
recommend_frame = []
for val in rec_movie_indices:
movie_idx = pivot_mat.iloc[val[0]]['movieId']
idx = movies[movies['movieId'] == movie_idx].index
recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]})
df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1))
return df
else:
return "No movies found. Please check your input"
st.header('Movie Recommender System')
option = st.selectbox(
'Which model would you like to use?',
('Genre based', 'KNN-based'))
selected_movie = st.text_input(
"Type a movie name to get recommendations"
)
number_of_recommendations = st.number_input(
"Type the number of recommendations to get"
)
if st.button('Show Recommendations'):
if option=='Genre based':
movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations))
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
for i in range(int(number_of_recommendations)):
st.text(f"{i+1}. {movie_recommendations[i]}")
elif option=='KNN-based':
movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations))
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
for i in movie_recommendations['Title']:
st.text(i)
|