import pandas as pd import numpy as np from faker import Faker from datetime import datetime, timedelta import random fake = Faker() class SAPDataGenerator: def __init__(self): self.suppliers = [ "Acme Corp", "Global Supplies Inc", "Tech Solutions Ltd", "Industrial Partners", "Premium Materials Co", "Swift Logistics", "Quality Components", "Reliable Vendors", "Innovative Systems", "Professional Services" ] self.categories = [ "Raw Materials", "IT Equipment", "Office Supplies", "Professional Services", "Maintenance", "Transportation", "Marketing", "Facilities", "Security", "Consulting" ] self.plant_codes = ["1000", "2000", "3000", "4000", "5000"] self.company_codes = ["US01", "DE02", "IN03", "UK04", "SG05"] def generate_purchase_orders(self, n=1000): data = [] for i in range(n): po_date = fake.date_between(start_date='-2y', end_date='today') delivery_date = po_date + timedelta(days=random.randint(7, 90)) unit_price = round(random.uniform(10, 10000), 2) quantity = random.randint(1, 1000) total_value = round(unit_price * quantity, 2) data.append({ 'PO_Number': f"PO{str(i+1).zfill(8)}", 'Supplier': random.choice(self.suppliers), 'Category': random.choice(self.categories), 'Plant': random.choice(self.plant_codes), 'Company_Code': random.choice(self.company_codes), 'PO_Date': po_date, 'Delivery_Date': delivery_date, 'Material': fake.catch_phrase(), 'Quantity': quantity, 'Unit_Price': unit_price, 'Total_Value': total_value, 'Currency': 'USD', 'Status': random.choice(['Open', 'Delivered', 'Partially Delivered', 'Cancelled']), 'Buyer': fake.name(), 'Payment_Terms': random.choice(['Net 30', 'Net 60', '2/10 Net 30', 'Immediate']), 'Delivery_Performance': round(random.uniform(85, 99), 1) }) return pd.DataFrame(data) def generate_supplier_performance(self): data = [] for supplier in self.suppliers: data.append({ 'Supplier': supplier, 'On_Time_Delivery': round(random.uniform(85, 98), 1), 'Quality_Score': round(random.uniform(80, 99), 1), 'Cost_Performance': round(random.uniform(90, 99), 1), 'Total_Spend_YTD': round(random.uniform(100000, 5000000), 2), 'Active_Contracts': random.randint(5, 50), 'Risk_Score': random.choice(['Low', 'Medium', 'High']), 'Certification_Status': random.choice(['ISO 9001', 'ISO 14001', 'Multiple', 'None']) }) return pd.DataFrame(data) def generate_spend_analysis(self): months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] data = [] for month in months: for category in self.categories: data.append({ 'Month': month, 'Category': category, 'Spend': round(random.uniform(50000, 500000), 2), 'Budget': round(random.uniform(60000, 550000), 2), 'Variance': round(random.uniform(-10, 15), 1) }) return pd.DataFrame(data)