|
import numpy as np |
|
import pickle |
|
import streamlit as st |
|
import pandas as pd |
|
import requests |
|
|
|
|
|
movies=pd.read_csv('./movies (1).csv') |
|
ratings=pd.read_csv('./user_ratings.csv') |
|
|
|
movies['title'] = movies['title'].str.strip().str[:-7] |
|
|
|
movies['genres']=movies['genres'].str.replace('|', ' ') |
|
|
|
from sklearn.feature_extraction.text import CountVectorizer |
|
|
|
cv=CountVectorizer() |
|
|
|
genres_tokens=cv.fit_transform(movies['genres'].values) |
|
|
|
genres_features=cv.get_feature_names_out() |
|
|
|
genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist()) |
|
|
|
genres_tokens['combined']=genres_tokens.values.tolist() |
|
|
|
movies['genres']=genres_tokens['combined'] |
|
|
|
|
|
|
|
pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating') |
|
|
|
pivot_mat.fillna(0,inplace=True) |
|
|
|
vote_movie = [[],[]] |
|
user_votes = [[],[]] |
|
sh = pivot_mat.shape |
|
for i in range(sh[0]): |
|
r,c = np.unique(pivot_mat.values[i],return_counts=True) |
|
user_votes[0].append(np.sum(c[1:])) |
|
user_votes[1].append(pivot_mat.index[i]) |
|
for i in range(sh[1]): |
|
r,c = np.unique(pivot_mat.values[:,i],return_counts=True) |
|
vote_movie[0].append(np.sum(c[1:])) |
|
vote_movie[1].append(i+1) |
|
|
|
|
|
vote_movie = np.array(vote_movie).T |
|
user_votes = np.array(user_votes).T |
|
|
|
pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:] |
|
|
|
zc = 0 |
|
for i in range(pivot_mat.shape[0]): |
|
for j in range(pivot_mat.shape[1]): |
|
if pivot_mat.iloc[i,j] == 0: |
|
zc+=1 |
|
|
|
|
|
from scipy.sparse import csr_matrix |
|
csr_data = csr_matrix(pivot_mat.values) |
|
pivot_mat.reset_index(inplace=True) |
|
|
|
|
|
|
|
def dist_rec(movie_name,rec): |
|
try: |
|
arr = np.array(movies[movies['title'] == movie_name].values[0][2]) |
|
except: |
|
return "Movie not found" |
|
|
|
mov = movies[movies['title'] != movie_name].values |
|
dis = [] |
|
recommendations=[] |
|
for i in mov: |
|
dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) |
|
|
|
|
|
|
|
|
|
for i in range(rec): |
|
recommendations.append(mov[:,1][np.argmin(dis)]) |
|
dis[np.argmin(dis)] = 9999 |
|
return recommendations |
|
|
|
|
|
|
|
from sklearn.neighbors import NearestNeighbors |
|
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1) |
|
knn.fit(csr_data) |
|
|
|
def knn_reccomendation(movie_name,n_movies_to_reccomend): |
|
movie_list = movies[movies['title'].str.contains(movie_name)] |
|
if len(movie_list): |
|
movie_idx= movie_list.iloc[0]['movieId'] |
|
movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0] |
|
distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1) |
|
rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1] |
|
recommend_frame = [] |
|
for val in rec_movie_indices: |
|
movie_idx = pivot_mat.iloc[val[0]]['movieId'] |
|
idx = movies[movies['movieId'] == movie_idx].index |
|
recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]}) |
|
df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1)) |
|
return df |
|
else: |
|
return "No movies found. Please check your input" |
|
|
|
st.header('Movie Recommender System') |
|
|
|
option = st.selectbox( |
|
'Which model would you like to use?', |
|
('Genre based', 'KNN-based')) |
|
|
|
selected_movie = st.text_input( |
|
"Type a movie name to get recommendations" |
|
) |
|
|
|
number_of_recommendations = st.number_input( |
|
"Type the number of recommendations to get" |
|
) |
|
|
|
if st.button('Show Recommendations'): |
|
|
|
if option=='Genre based': |
|
movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations)) |
|
|
|
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}") |
|
|
|
for i in range(int(number_of_recommendations)): |
|
st.text(f"{i+1}. {movie_recommendations[i]}") |
|
|
|
elif option=='KNN-based': |
|
movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations)) |
|
|
|
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}") |
|
|
|
for i in movie_recommendations['Title']: |
|
st.text(i) |
|
|
|
|
|
|
|
|