Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import tensorflow as tf | |
| import gradio as gr | |
| class AnimeRecommender: | |
| def __init__(self, rating_path, anime_path, synopsis_path, model_path): | |
| self.rating_df = pd.read_csv(rating_path) | |
| self.df_anime = pd.read_csv(anime_path, low_memory=True) | |
| self.sypnopsis_df = pd.read_csv(synopsis_path, usecols=["MAL_ID", "Name", "Genres", "sypnopsis"]) | |
| self.model = tf.keras.models.load_model(model_path) | |
| self._preprocess_data() | |
| def _preprocess_data(self): | |
| # User and anime ID encoding | |
| user_ids = self.rating_df["user_id"].unique().tolist() | |
| user2user_encoded = {x: i for i, x in enumerate(user_ids)} | |
| anime_ids = self.rating_df["anime_id"].unique().tolist() | |
| anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)} | |
| self.rating_df["user"] = self.rating_df["user_id"].map(user2user_encoded) | |
| self.rating_df["anime"] = self.rating_df["anime_id"].map(anime2anime_encoded) | |
| self.n_users = len(user2user_encoded) | |
| self.n_animes = len(anime2anime_encoded) | |
| self.anime2anime_encoded = anime2anime_encoded | |
| self.anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)} | |
| # Normalize anime weights | |
| self.anime_weights = self._extract_weights('anime_embedding') | |
| # Fix anime names | |
| self.df_anime['anime_id'] = self.df_anime['MAL_ID'] | |
| self.df_anime["eng_version"] = self.df_anime['English name'] | |
| self.df_anime['eng_version'] = self.df_anime.anime_id.apply(self._get_anime_name) | |
| self.df_anime.sort_values(by=['Score'], inplace=True, ascending=False, kind='quicksort', na_position='last') | |
| self.df_anime = self.df_anime[["anime_id", "eng_version", "Score", "Genres", "Episodes", "Type", "Premiered", "Members"]] | |
| def _extract_weights(self, name): | |
| weight_layer = self.model.get_layer(name) | |
| weights = weight_layer.get_weights()[0] | |
| weights = weights / np.linalg.norm(weights, axis=1).reshape((-1, 1)) | |
| return weights | |
| def _get_anime_name(self, anime_id): | |
| try: | |
| name = self.df_anime[self.df_anime.anime_id == anime_id].eng_version.values[0] | |
| if name is np.nan: | |
| name = self.df_anime[self.df_anime.anime_id == anime_id].Name.values[0] | |
| except: | |
| name = 'Unknown' | |
| return name | |
| def get_anime_frame(self, anime): | |
| if isinstance(anime, int): | |
| return self.df_anime[self.df_anime.anime_id == anime] | |
| if isinstance(anime, str): | |
| return self.df_anime[self.df_anime.eng_version == anime] | |
| def get_sypnopsis(self, anime): | |
| if isinstance(anime, int): | |
| return self.sypnopsis_df[self.sypnopsis_df.MAL_ID == anime].sypnopsis.values[0] | |
| if isinstance(anime, str): | |
| return self.sypnopsis_df[self.sypnopsis_df.Name == anime].sypnopsis.values[0] | |
| def find_similar_animes_combined(self, anime_names, n=3, return_dist=False, neg=False): | |
| try: | |
| encoded_indices = [] | |
| input_anime_ids = [] | |
| for name in anime_names: | |
| index = self.get_anime_frame(name).anime_id.values[0] | |
| input_anime_ids.append(index) | |
| encoded_index = self.anime2anime_encoded.get(index) | |
| encoded_indices.append(encoded_index) | |
| combined_weights = np.mean(self.anime_weights[encoded_indices], axis=0) | |
| combined_weights = combined_weights / np.linalg.norm(combined_weights) | |
| dists = np.dot(self.anime_weights, combined_weights) | |
| sorted_dists = np.argsort(dists) | |
| n = n + len(input_anime_ids) | |
| if neg: | |
| closest = sorted_dists[:n] | |
| else: | |
| closest = sorted_dists[-n:] | |
| if return_dist: | |
| return dists, closest | |
| rindex = self.df_anime | |
| SimilarityArr = [] | |
| for close in closest: | |
| decoded_id = self.anime_encoded2anime.get(close) | |
| if decoded_id in input_anime_ids: | |
| continue | |
| sypnopsis = self.get_sypnopsis(decoded_id) | |
| anime_frame = self.get_anime_frame(decoded_id) | |
| anime_name = anime_frame.eng_version.values[0] | |
| genre = anime_frame.Genres.values[0] | |
| similarity = dists[close] | |
| SimilarityArr.append({"anime_id": decoded_id, "name": anime_name, "similarity": similarity, "genre": genre, 'sypnopsis': sypnopsis}) | |
| Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", ascending=False) | |
| return Frame.drop(index=0) | |
| except Exception as e: | |
| print('{}!, Not Found in Anime list'.format(anime_names)) | |
| print(str(e)) | |
| return pd.DataFrame() | |
| def get_anime_url(self, name): | |
| anime = self.df_anime[self.df_anime['eng_version'] == name] | |
| if not anime.empty: | |
| mal_id = anime['anime_id'].values[0] | |
| anime_name = anime['eng_version'].values[0].replace(' ', '_').replace(':', '_').replace('!', '_') | |
| return f"https://myanimelist.net/anime/{mal_id}/{anime_name}" | |
| else: | |
| print(f"{name}์ ํด๋นํ๋ ์ ๋๋ฉ์ด์ ์ ์ฐพ์ ์ ์์ต๋๋ค.") | |
| return None | |
| def extract_image_url(self, url): | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| except requests.RequestException as e: | |
| print(f"ํ์ด์ง๋ฅผ ๊ฐ์ ธ์ฌ ์ ์์ต๋๋ค: {e}") | |
| return None | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| image_tag = soup.find('img', {'data-src': True}) | |
| if image_tag: | |
| return image_tag['data-src'] | |
| else: | |
| print("์ด๋ฏธ์ง๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.") | |
| return None | |
| def NCF_Recommendation(self, a, b, c): | |
| anime_list = [a, b, c] | |
| anime_result = self.find_similar_animes_combined(anime_list, n=3) | |
| result1 = anime_result.loc[3, 'name'] | |
| result2 = anime_result.loc[2, 'name'] | |
| result3 = anime_result.loc[1, 'name'] | |
| explain1 = anime_result.loc[3, 'sypnopsis'] | |
| explain2 = anime_result.loc[2, 'sypnopsis'] | |
| explain3 = anime_result.loc[1, 'sypnopsis'] | |
| url1 = self.get_anime_url(result1) | |
| url2 = self.get_anime_url(result2) | |
| url3 = self.get_anime_url(result3) | |
| image1 = self.extract_image_url(url1) | |
| image2 = self.extract_image_url(url2) | |
| image3 = self.extract_image_url(url3) | |
| return result1, explain1, image1, result2, explain2, image2, result3, explain3, image3 | |
| # ํ์ผ ๊ฒฝ๋ก ์ค์ | |
| rating_path = 'data/rating_complete.csv' | |
| anime_path = 'data/anime.csv' | |
| synopsis_path = 'data/anime_with_synopsis.csv' | |
| model_path = 'data/anime_model.h5' | |
| # ๊ฐ์ฒด ์์ฑ | |
| recommender = AnimeRecommender(rating_path, anime_path, synopsis_path, model_path) | |
| with gr.Blocks() as app: | |
| with gr.Row(): | |
| a = gr.Textbox(label="๋์ ์ต์ ์ ๋ ์ฒซ ๋ฒ์งธ๋ฅผ ์์ฑํด๋ด!") | |
| b = gr.Textbox(label="๋์ ์ต์ ์ ๋ ๋ ๋ฒ์งธ๋ฅผ ์์ฑํด๋ด!") | |
| c = gr.Textbox(label="๋์ ์ต์ ์ ๋ ์ธ ๋ฒ์งธ๋ฅผ ์์ฑํด๋ด!") | |
| with gr.Row(): | |
| with gr.Column(): | |
| img1 = gr.Image(label="1๋ฒ์งธ ์ ๋ ์ถ์ฒ") | |
| output1 = gr.Textbox(label="1๏ธโฃ ์ฒซ๋ฒ์งธ ์ ๋ ์ถ์ฒ!") | |
| output2 = gr.Textbox(label="์ฒซ ๋ฒ์งธ ์ ๋ ์ค๋ช ", interactive=False) | |
| with gr.Column(): | |
| img2 = gr.Image(label="2๋ฒ์งธ ์ ๋ ์ถ์ฒ") | |
| output3 = gr.Textbox(label="2๏ธโฃ ๋๋ฒ์งธ ์ ๋ ์ถ์ฒ!") | |
| output4 = gr.Textbox(label="๋ ๋ฒ์งธ ์ ๋ ์ค๋ช ", interactive=False) | |
| with gr.Column(): | |
| img3 = gr.Image(label="3๋ฒ์งธ ์ ๋ ์ถ์ฒ") | |
| output5 = gr.Textbox(label="3๏ธโฃ ์ธ๋ฒ์งธ ์ ๋ ์ถ์ฒ!") | |
| output6 = gr.Textbox(label="์ธ ๋ฒ์งธ ์ ๋ ์ค๋ช ", interactive=False) | |
| btn = gr.Button("์ถ์ฒ์ ๋ฐ์๋ด ์๋ค!") | |
| btn.click( | |
| fn=recommender.NCF_Recommendation, | |
| inputs=[a, b, c], | |
| outputs=[output1, output2, img1, output3, output4, img2, output5, output6, img3] | |
| ) | |
| app.launch(share = True) | |