Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from itertools import combinations | |
| from collections import defaultdict | |
| from load_data import items | |
| # Flatten the items into a single list for vectorization | |
| flat_items = [item for sublist in items for item in sublist] | |
| # Create a co-occurrence matrix | |
| product_co_occurrence = defaultdict(int) | |
| for sublist in items: | |
| for combination in combinations(sublist, 2): | |
| product_co_occurrence[tuple(sorted(combination))] += 1 | |
| # Convert to DataFrame | |
| co_occurrence_df = pd.DataFrame(list(product_co_occurrence.items()), columns=["Pair", "Frequency"]) | |
| # Sort to find the most common co-occurring products | |
| co_occurrence_df = co_occurrence_df.sort_values(by="Frequency", ascending=False) | |
| # Function to calculate confidence | |
| def calculate_confidence(item1, item2, df): | |
| item1_transactions = df[df[item1] > 0].shape[0] | |
| both_transactions = df[(df[item1] > 0) & (df[item2] > 0)].shape[0] | |
| return both_transactions / item1_transactions | |
| # Function to get recommendations based on a product | |
| def get_recommendations(product_name, co_occurrence_df, df, confidence_threshold=0.1): | |
| # Find pairs that include the product name | |
| relevant_pairs = co_occurrence_df[co_occurrence_df["Pair"].apply(lambda x: product_name in x)] | |
| # Extract the other product in the pair and calculate confidence | |
| recommended_products = [] | |
| for pair in relevant_pairs["Pair"]: | |
| other_product = pair[0] if pair[1] == product_name else pair[1] | |
| confidence = calculate_confidence(product_name, other_product, df) | |
| if confidence > confidence_threshold: | |
| recommended_products.append(other_product) | |
| # Return the top recommendations | |
| return recommended_products[:3] | |