import numpy as np import pickle import streamlit as st import pandas as pd import requests movies=pd.read_csv('./movies (1).csv') ratings=pd.read_csv('./user_ratings.csv') movies['title'] = movies['title'].str.strip().str[:-7] #removing the | from the genre and replacing it by space movies['genres']=movies['genres'].str.replace('|', ' ') from sklearn.feature_extraction.text import CountVectorizer #making an obejct of it cv=CountVectorizer() # we have a token in the genres so we will now make tokens out of it genres_tokens=cv.fit_transform(movies['genres'].values) genres_features=cv.get_feature_names_out() genres_tokens=pd.DataFrame(genres_tokens.toarray(),columns=genres_features.tolist()) genres_tokens['combined']=genres_tokens.values.tolist() movies['genres']=genres_tokens['combined'] #preprocessing of rating column pivot_mat = ratings.pivot(index='movieId',columns='userId',values='rating') pivot_mat.fillna(0,inplace=True) vote_movie = [[],[]] user_votes = [[],[]] sh = pivot_mat.shape for i in range(sh[0]): r,c = np.unique(pivot_mat.values[i],return_counts=True) user_votes[0].append(np.sum(c[1:])) user_votes[1].append(pivot_mat.index[i]) for i in range(sh[1]): r,c = np.unique(pivot_mat.values[:,i],return_counts=True) vote_movie[0].append(np.sum(c[1:])) vote_movie[1].append(i+1) vote_movie = np.array(vote_movie).T user_votes = np.array(user_votes).T pivot_mat = pivot_mat.loc[user_votes[:,1][user_votes[:,0] > 10],:] zc = 0 for i in range(pivot_mat.shape[0]): for j in range(pivot_mat.shape[1]): if pivot_mat.iloc[i,j] == 0: zc+=1 from scipy.sparse import csr_matrix csr_data = csr_matrix(pivot_mat.values) pivot_mat.reset_index(inplace=True) def dist_rec(movie_name,rec): try: arr = np.array(movies[movies['title'] == movie_name].values[0][2]) except: return "Movie not found" mov = movies[movies['title'] != movie_name].values dis = [] recommendations=[] for i in mov: dis.append(np.sqrt((np.sum((np.array(i[2]) - arr)**2)))) # similar to the K-means clustering decision. # print("The Recommendations for " + movie_name + " are :\n") # for i in range(rec): # print(mov[:,1][np.argmin(dis)]) # dis[np.argmin(dis)] = 9999 for i in range(rec): recommendations.append(mov[:,1][np.argmin(dis)]) dis[np.argmin(dis)] = 9999 return recommendations #defining the K-means clustering decision from sklearn.neighbors import NearestNeighbors knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1) knn.fit(csr_data) def knn_reccomendation(movie_name,n_movies_to_reccomend): movie_list = movies[movies['title'].str.contains(movie_name)] if len(movie_list): movie_idx= movie_list.iloc[0]['movieId'] movie_idx = pivot_mat[pivot_mat['movieId'] == movie_idx].index[0] distances , indices = knn.kneighbors(csr_data[movie_idx],n_neighbors=n_movies_to_reccomend+1) rec_movie_indices = sorted(list(zip(indices.squeeze().tolist(),distances.squeeze().tolist())),key=lambda x: x[1])[:0:-1] recommend_frame = [] for val in rec_movie_indices: movie_idx = pivot_mat.iloc[val[0]]['movieId'] idx = movies[movies['movieId'] == movie_idx].index recommend_frame.append({'Title':movies.iloc[idx]['title'].values[0],'Distance':val[1]}) df = pd.DataFrame(recommend_frame,index=range(1,n_movies_to_reccomend+1)) return df else: return "No movies found. Please check your input" st.header('Movie Recommender System') option = st.selectbox( 'Which model would you like to use?', ('Genre based', 'KNN-based')) selected_movie = st.text_input( "Type a movie name to get recommendations" ) number_of_recommendations = st.number_input( "Type the number of recommendations to get" ) if st.button('Show Recommendations'): if option=='Genre based': movie_recommendations = dist_rec(selected_movie,int(number_of_recommendations)) st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}") for i in range(int(number_of_recommendations)): st.text(f"{i+1}. {movie_recommendations[i]}") elif option=='KNN-based': movie_recommendations = knn_reccomendation(selected_movie,int(number_of_recommendations)) st.text(f"Here are {number_of_recommendations} recommendations for {selected_movie}") for i in movie_recommendations['Title']: st.text(i)