Spaces:

student-abdullah
/

NextGrocery

Sleeping

App Files Files Community

NextGrocery / cooccurance.py

student-abdullah

Initial commit with full project

f8c0ae2 about 1 year ago

raw

history blame contribute delete

1.71 kB

	import pandas as pd
	from itertools import combinations
	from collections import defaultdict
	from load_data import items

	# Flatten the items into a single list for vectorization
	flat_items = [item for sublist in items for item in sublist]

	# Create a co-occurrence matrix
	product_co_occurrence = defaultdict(int)
	for sublist in items:
	for combination in combinations(sublist, 2):
	product_co_occurrence[tuple(sorted(combination))] += 1

	# Convert to DataFrame
	co_occurrence_df = pd.DataFrame(list(product_co_occurrence.items()), columns=["Pair", "Frequency"])

	# Sort to find the most common co-occurring products
	co_occurrence_df = co_occurrence_df.sort_values(by="Frequency", ascending=False)


	# Function to calculate confidence
	def calculate_confidence(item1, item2, df):
	item1_transactions = df[df[item1] > 0].shape[0]
	both_transactions = df[(df[item1] > 0) & (df[item2] > 0)].shape[0]
	return both_transactions / item1_transactions


	# Function to get recommendations based on a product
	def get_recommendations(product_name, co_occurrence_df, df, confidence_threshold=0.1):
	# Find pairs that include the product name
	relevant_pairs = co_occurrence_df[co_occurrence_df["Pair"].apply(lambda x: product_name in x)]

	# Extract the other product in the pair and calculate confidence
	recommended_products = []
	for pair in relevant_pairs["Pair"]:
	other_product = pair[0] if pair[1] == product_name else pair[1]
	confidence = calculate_confidence(product_name, other_product, df)
	if confidence > confidence_threshold:
	recommended_products.append(other_product)

	# Return the top recommendations
	return recommended_products[:3]