Vageesh1's picture
Update app.py
9d48458
import numpy as np
import pickle
import streamlit as st
import pandas as pd
import requests
movies=pd.read_csv('./movies (1).csv')
ratings=pd.read_csv('./user_ratings.csv')
movies['title'] = movies['title'].str.strip().str[:-7]
#removing the | from the genre and replacing it by space
movies['genres']=movies['genres'].str.replace('|', ' ')
from sklearn.feature_extraction.text import CountVectorizer
#making an obejct of it
cv=CountVectorizer()
# we have a token in the genres so we will now make tokens out of it
genres_tokens=cv.fit_transform(movies['genres'].values)
genres_features=cv.get_feature_names_out()
genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist())
genres_tokens['combined']=genres_tokens.values.tolist()
movies['genres']=genres_tokens['combined']
#preprocessing of rating column
pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating')
pivot_mat.fillna(0,inplace=True)
vote_movie = [[],[]]
user_votes = [[],[]]
sh = pivot_mat.shape
for i in range(sh[0]):
r,c = np.unique(pivot_mat.values[i],return_counts=True)
user_votes[0].append(np.sum(c[1:]))
user_votes[1].append(pivot_mat.index[i])
for i in range(sh[1]):
r,c = np.unique(pivot_mat.values[:,i],return_counts=True)
vote_movie[0].append(np.sum(c[1:]))
vote_movie[1].append(i+1)
vote_movie = np.array(vote_movie).T
user_votes = np.array(user_votes).T
pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:]
zc = 0
for i in range(pivot_mat.shape[0]):
for j in range(pivot_mat.shape[1]):
if pivot_mat.iloc[i,j] == 0:
zc+=1
from scipy.sparse import csr_matrix
csr_data = csr_matrix(pivot_mat.values)
pivot_mat.reset_index(inplace=True)
def dist_rec(movie_name,rec):
try:
arr = np.array(movies[movies['title'] == movie_name].values[0][2])
except:
return "Movie not found"
mov = movies[movies['title'] != movie_name].values
dis = []
recommendations=[]
for i in mov:
dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) # similar to the K-means clustering decision.
# print("The Recommendations for " + movie_name + " are :\n")
# for i in range(rec):
# print(mov[:,1][np.argmin(dis)])
# dis[np.argmin(dis)] = 9999
for i in range(rec):
recommendations.append(mov[:,1][np.argmin(dis)])
dis[np.argmin(dis)] = 9999
return recommendations
#defining the K-means clustering decision
from sklearn.neighbors import NearestNeighbors
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
knn.fit(csr_data)
def knn_reccomendation(movie_name,n_movies_to_reccomend):
movie_list = movies[movies['title'].str.contains(movie_name)]
if len(movie_list):
movie_idx= movie_list.iloc[0]['movieId']
movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0]
distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1)
rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1]
recommend_frame = []
for val in rec_movie_indices:
movie_idx = pivot_mat.iloc[val[0]]['movieId']
idx = movies[movies['movieId'] == movie_idx].index
recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]})
df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1))
return df
else:
return "No movies found. Please check your input"
st.header('Movie Recommender System')
option = st.selectbox(
'Which model would you like to use?',
('Genre based', 'KNN-based'))
selected_movie = st.text_input(
"Type a movie name to get recommendations"
)
number_of_recommendations = st.number_input(
"Type the number of recommendations to get"
)
if st.button('Show Recommendations'):
if option=='Genre based':
movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations))
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
for i in range(int(number_of_recommendations)):
st.text(f"{i+1}. {movie_recommendations[i]}")
elif option=='KNN-based':
movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations))
st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}")
for i in movie_recommendations['Title']:
st.text(i)