import numpy as np import re from sklearn.preprocessing import StandardScaler from sklearn.neighbors import NearestNeighbors from sklearn.pipeline import Pipeline from sklearn.preprocessing import FunctionTransformer def scaling(dataframe): scaler=StandardScaler() prep_data=scaler.fit_transform(dataframe.iloc[:,6:15].to_numpy()) return prep_data,scaler def nn_predictor(prep_data): neigh = NearestNeighbors(metric='cosine',algorithm='brute') neigh.fit(prep_data) return neigh def build_pipeline(neigh,scaler,params): transformer = FunctionTransformer(neigh.kneighbors,kw_args=params) pipeline=Pipeline([('std_scaler',scaler),('NN',transformer)]) return pipeline def extract_data(dataframe,ingredients): extracted_data=dataframe.copy() extracted_data=extract_ingredient_filtered_data(extracted_data,ingredients) return extracted_data def extract_ingredient_filtered_data(dataframe,ingredients): extracted_data=dataframe.copy() regex_string=''.join(map(lambda x:f'(?=.*{x})',ingredients)) extracted_data=extracted_data[extracted_data['RecipeIngredientParts'].str.contains(regex_string,regex=True,flags=re.IGNORECASE)] return extracted_data def apply_pipeline(pipeline,_input,extracted_data): _input=np.array(_input).reshape(1,-1) return extracted_data.iloc[pipeline.transform(_input)[0]] import pandas as pd def recommend(dataset,_input,ingredients=[],params={'n_neighbors':5,'return_distance':False})-> pd.DataFrame: extracted_data=extract_data(dataset,ingredients) if extracted_data.shape[0]>=params['n_neighbors']: prep_data,scaler=scaling(extracted_data) neigh=nn_predictor(prep_data) pipeline=build_pipeline(neigh,scaler,params) return apply_pipeline(pipeline,_input,extracted_data) else: raise Exception("an error occured") def extract_quoted_strings(s): # Find all the strings inside double quotes strings = re.findall(r'"([^"]*)"', s) # Join the strings with 'and' return strings def output_recommended_recipes(dataframe): if dataframe is not None: output=dataframe.copy() output=output.to_dict("records") for recipe in output: recipe['RecipeIngredientParts']=extract_quoted_strings(recipe['RecipeIngredientParts']) recipe['RecipeInstructions']=extract_quoted_strings(recipe['RecipeInstructions']) else: output=None return output