File size: 5,133 Bytes
aea7790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""
Ledger analysis utilities for SmartLedger
Contains core functions for CSV processing and analysis
"""

import pandas as pd
from typing import Optional, Tuple
import io

def process_ledger_csv(csv_file) -> Tuple[Optional[pd.DataFrame], str]:
    """Process uploaded CSV ledger file and return DataFrame and status message"""
    if csv_file is None:
        return None, "Please upload a CSV file"
    
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file.name)
        
        # Validate required columns
        required_columns = ['date', 'vendor', 'amount']
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            return None, f"Missing required columns: {', '.join(missing_columns)}"
        
        # Basic data validation
        if df.empty:
            return None, "CSV file is empty"
        
        # Convert amount to numeric if it's not already
        try:
            df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
        except:
            return None, "Amount column contains invalid values"
        
        # Convert date column
        try:
            df['date'] = pd.to_datetime(df['date'], errors='coerce')
            # Check if any dates were successfully converted
            if df['date'].isna().all():
                return None, "No valid dates found in date column"
        except Exception as e:
            return None, f"Date column processing failed: {str(e)}"
        
        # Remove rows with invalid data
        initial_rows = len(df)
        df = df.dropna(subset=['date', 'vendor', 'amount'])
        
        if df.empty:
            return None, "No valid rows found after data cleaning"
        
        rows_removed = initial_rows - len(df)
        status_msg = f"βœ… Successfully loaded {len(df)} transactions"
        if rows_removed > 0:
            status_msg += f" ({rows_removed} invalid rows removed)"
        
        return df, status_msg
        
    except Exception as e:
        return None, f"Error reading CSV file: {str(e)}"

def analyze_ledger_data(df: pd.DataFrame) -> str:
    """Generate basic analysis of the ledger data"""
    if df is None or df.empty:
        return "No data to analyze"
    
    try:
        # Basic statistics
        total_transactions = len(df)
        total_amount = df['amount'].sum()
        avg_amount = df['amount'].mean()
        
        # Date range
        try:
            min_date = df['date'].min()
            max_date = df['date'].max()
            if hasattr(min_date, 'strftime'):
                date_range = f"{min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}"
            else:
                date_range = f"{min_date} to {max_date}"
        except:
            date_range = "Date range unavailable"
        
        # Top vendors
        top_vendors = df['vendor'].value_counts().head(3)
        
        # Categories if available
        categories_info = ""
        if 'category' in df.columns:
            category_totals = df.groupby('category')['amount'].sum().sort_values(ascending=False).head(3)
            categories_info = f"\n\n**Top Categories:**\n"
            for cat, amount in category_totals.items():
                categories_info += f"β€’ {cat}: ${amount:,.2f}\n"
        
        analysis = f"""
**πŸ“Š Ledger Analysis:**

**Summary:**
β€’ Total Transactions: {total_transactions:,}
β€’ Total Amount: ${total_amount:,.2f}
β€’ Average Transaction: ${avg_amount:.2f}
β€’ Date Range: {date_range}

**Top Vendors:**
"""
        for vendor, count in top_vendors.items():
            vendor_total = df[df['vendor'] == vendor]['amount'].sum()
            analysis += f"β€’ {vendor}: {count} transactions (${vendor_total:,.2f})\n"
        
        analysis += categories_info
        
        return analysis
        
    except Exception as e:
        return f"Error analyzing data: {str(e)}"

def load_sample_data() -> Tuple[str, pd.DataFrame, str]:
    """Load sample data for testing"""
    sample_csv = """date,vendor,amount,category,description
2024-01-15,Coffee Shop Downtown,4.50,Meals & Entertainment,Morning coffee
2024-01-16,Shell Gas Station,45.00,Vehicle Expenses,Regular gasoline
2024-01-17,Office Depot,23.99,Office Supplies,Printer paper
2024-01-18,Uber Technologies,18.75,Travel,Ride to meeting
2024-01-19,Microsoft Corporation,99.99,Software,Office 365 subscription"""
    
    # Create a temporary file-like object
    temp_file = io.StringIO(sample_csv)
    df = pd.read_csv(temp_file)
    
    # Process the data like we would for uploaded CSV
    df['amount'] = pd.to_numeric(df['amount'], errors='coerce')
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    
    analysis = analyze_ledger_data(df)
    return "βœ… Sample data loaded successfully", df, analysis

def handle_csv_upload(csv_file):
    """Handle CSV file upload and analysis"""
    df, status = process_ledger_csv(csv_file)
    
    if df is not None:
        analysis = analyze_ledger_data(df)
        return status, df, analysis
    else:
        return status, None, ""