Spaces:

shukdevdattaEX
/

AnalytixPro-v2.0

Sleeping

File size: 31,708 Bytes

64609c5

import gradio as gr
import pandas as pd
import aiohttp
import asyncio
import json
import os
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from typing import Optional, Tuple, Dict, Any
import logging
from datetime import datetime
import re
from jinja2 import Template
import markdown  # Requires 'markdown' package: install via `pip install markdown`

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class EnhancedDataAnalyzer:
    def __init__(self):
        self.api_base_url = "https://llm.chutes.ai/v1/chat/completions"
        self.max_file_size = 50 * 1024 * 1024  # 50MB limit
        self.conversation_history = []
        self.current_df = None
        self.current_charts = None
    
    def validate_api_key(self, api_key: str) -> bool:
        """Validate API key format"""
        return bool(api_key and len(api_key.strip()) > 10)
    
    def validate_file(self, file) -> Tuple[bool, str]:
        """Validate uploaded file"""
        if not file:
            return False, "No file uploaded"
        
        file_size = os.path.getsize(file.name)
        if file_size > self.max_file_size:
            return False, f"File too large. Maximum size: {self.max_file_size // (1024*1024)}MB"
        
        file_extension = os.path.splitext(file.name)[1].lower()
        if file_extension not in ['.csv', '.xlsx', '.xls']:
            return False, "Unsupported format. Please upload CSV or Excel files only."
        
        return True, "File valid"
    
    async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str:
        """Enhanced API call with better error handling and streaming"""
        headers = {
            "Authorization": f"Bearer {api_token.strip()}",
            "Content-Type": "application/json"
        }
        
        # Create context-aware prompt
        if user_question:
            prompt = f"""You are a data analyst expert. Based on this dataset:
{data_summary}
User's specific question: {user_question}
Provide a detailed, actionable answer with specific data points and recommendations."""
        else:
            prompt = f"""You are a senior data analyst. Analyze this dataset thoroughly:
{data_summary}
Provide a comprehensive analysis including:
1. **Key Statistical Insights**: Most important numbers and what they mean
2. **Patterns & Trends**: Notable patterns, correlations, or anomalies
3. **Data Quality Assessment**: Missing values, outliers, data consistency
4. **Business Intelligence**: Actionable insights and opportunities
5. **Recommendations**: Specific next steps or areas to investigate
Format your response with clear sections and bullet points for readability."""

        body = {
            "model": "openai/gpt-oss-20b",
            "messages": [
                {
                    "role": "system",
                    "content": "You are an expert data analyst who provides clear, actionable insights from datasets. Always structure your responses with clear headings and specific data points."
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "stream": True,
            "max_tokens": 3000,
            "temperature": 0.2,
            "top_p": 0.9
        }
        
        try:
            timeout = aiohttp.ClientTimeout(total=30)
            async with aiohttp.ClientSession(timeout=timeout) as session:
                async with session.post(self.api_base_url, headers=headers, json=body) as response:
                    if response.status == 401:
                        return "❌ **Authentication Error**: Invalid API key. Please check your Chutes API token."
                    elif response.status == 429:
                        return "⏳ **Rate Limit**: Too many requests. Please wait a moment and try again."
                    elif response.status != 200:
                        return f"❌ **API Error**: Request failed with status {response.status}"
                    
                    full_response = ""
                    async for line in response.content:
                        line = line.decode("utf-8").strip()
                        if line.startswith("data: "):
                            data = line[6:]
                            if data == "[DONE]":
                                break
                            try:
                                chunk_data = json.loads(data)
                                if "choices" in chunk_data and len(chunk_data["choices"]) > 0:
                                    delta = chunk_data["choices"][0].get("delta", {})
                                    content = delta.get("content", "")
                                    if content:
                                        full_response += content
                            except json.JSONDecodeError:
                                continue
                    
                    return full_response if full_response else "⚠️ No response received from the model."
                    
        except asyncio.TimeoutError:
            return "⏰ **Timeout Error**: Request took too long. Please try again."
        except Exception as e:
            logger.error(f"API Error: {str(e)}")
            return f"❌ **Connection Error**: {str(e)}"

    def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str, str]:
        """Enhanced file processing with better error handling"""
        try:
            file_extension = os.path.splitext(file_path)[1].lower()
            
            if file_extension == '.csv':
                for encoding in ['utf-8', 'latin-1', 'cp1252']:
                    try:
                        df = pd.read_csv(file_path, encoding=encoding)
                        break
                    except UnicodeDecodeError:
                        continue
                else:
                    raise ValueError("Could not decode CSV file. Please check file encoding.")
            elif file_extension in ['.xlsx', '.xls']:
                df = pd.read_excel(file_path)
            else:
                raise ValueError("Unsupported file format. Please upload CSV or Excel files.")
            
            df.columns = df.columns.str.strip().str.replace(r'\s+', ' ', regex=True)
            self.current_df = df
            data_summary = self.generate_enhanced_summary(df)
            charts_html = self.generate_visualizations(df)
            
            return df, data_summary, charts_html
            
        except Exception as e:
            raise Exception(f"Error processing file: {str(e)}")
    
    def generate_enhanced_summary(self, df: pd.DataFrame) -> str:
        """Generate comprehensive data summary with statistical insights"""
        summary = []
        summary.append(f"# 📊 Dataset Analysis Report")
        summary.append(f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        summary.append(f"**File Size**: {df.shape[0]:,} rows × {df.shape[1]} columns")
        memory_usage = df.memory_usage(deep=True).sum() / 1024**2
        summary.append(f"**Memory Usage**: {memory_usage:.2f} MB\n")
        
        type_counts = df.dtypes.value_counts()
        summary.append("## 📋 Column Types:")
        for dtype, count in type_counts.items():
            summary.append(f"- **{dtype}**: {count} columns")
        
        missing_data = df.isnull().sum()
        missing_pct = (missing_data / len(df) * 100).round(2)
        missing_summary = missing_data[missing_data > 0].sort_values(ascending=False)
        
        if len(missing_summary) > 0:
            summary.append("\n## ⚠️ Missing Data:")
            for col, count in missing_summary.head(10).items():
                pct = missing_pct[col]
                summary.append(f"- **{col}**: {count:,} missing ({pct}%)")
        else:
            summary.append("\n## ✅ Data Quality: No missing values detected!")
        
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        if len(numeric_cols) > 0:
            summary.append(f"\n## 📈 Numerical Columns Analysis ({len(numeric_cols)} columns):")
            for col in numeric_cols[:10]:
                stats = df[col].describe()
                outliers = len(df[df[col] > (stats['75%'] + 1.5 * (stats['75%'] - stats['25%']))])
                summary.append(f"- **{col}**: μ={stats['mean']:.2f}, σ={stats['std']:.2f}, outliers={outliers}")
        
        categorical_cols = df.select_dtypes(include=['object', 'category']).columns
        if len(categorical_cols) > 0:
            summary.append(f"\n## 📝 Categorical Columns Analysis ({len(categorical_cols)} columns):")
            for col in categorical_cols[:10]:
                unique_count = df[col].nunique()
                cardinality = "High" if unique_count > len(df) * 0.9 else "Medium" if unique_count > 10 else "Low"
                most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A"
                summary.append(f"- **{col}**: {unique_count:,} unique values ({cardinality} cardinality), Top: '{most_common}'")
        
        summary.append("\n## 🔍 Data Sample (First 3 Rows):")
        sample_df = df.head(3)
        for idx, row in sample_df.iterrows():
            summary.append(f"\n**Row {idx + 1}:**")
            for col, val in row.items():
                summary.append(f"  - {col}: {val}")
        
        return "\n".join(summary)
    
    def generate_visualizations(self, df: pd.DataFrame) -> str:
        """Generate comprehensive visualizations for the dataset"""
        charts_html = []
        
        try:
            missing_data = df.isnull().sum()
            if missing_data.sum() > 0:
                fig = px.bar(
                    x=missing_data.index, 
                    y=missing_data.values, 
                    title="🔍 Missing Data Analysis",
                    labels={'x': 'Columns', 'y': 'Missing Values Count'},
                    color=missing_data.values,
                    color_continuous_scale='Reds'
                )
                fig.update_layout(
                    height=400, 
                    showlegend=False,
                    title_x=0.5,
                    xaxis_tickangle=-45
                )
                charts_html.append(f"<h3>📊 Data Quality Overview</h3>")
                charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="missing_data_chart"))
            
            numeric_cols = df.select_dtypes(include=[np.number]).columns
            if len(numeric_cols) > 1:
                corr_matrix = df[numeric_cols].corr()
                fig = px.imshow(
                    corr_matrix, 
                    title="🔗 Correlation Matrix - Numerical Variables",
                    color_continuous_scale='RdBu_r',
                    aspect="auto",
                    text_auto=True
                )
                fig.update_layout(height=500, title_x=0.5)
                charts_html.append(f"<h3>📈 Correlation Analysis</h3>")
                charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="correlation_chart"))
            
            if len(numeric_cols) > 0:
                for i, col in enumerate(numeric_cols[:3]):
                    fig = px.histogram(
                        df, 
                        x=col, 
                        title=f"📊 Distribution: {col}",
                        marginal="box",
                        nbins=30
                    )
                    fig.update_layout(height=400, title_x=0.5)
                    if i == 0:
                        charts_html.append(f"<h3>📈 Data Distributions</h3>")
                    charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"dist_chart_{i}"))
            
            categorical_cols = df.select_dtypes(include=['object', 'category']).columns
            if len(categorical_cols) > 0:
                for i, col in enumerate(categorical_cols[:2]):
                    if df[col].nunique() <= 20:
                        value_counts = df[col].value_counts().head(10)
                        fig = px.bar(
                            x=value_counts.values,
                            y=value_counts.index,
                            orientation='h',
                            title=f"📊 Top 10 Values: {col}",
                            labels={'x': 'Count', 'y': col}
                        )
                        fig.update_layout(height=400, title_x=0.5)
                        if i == 0:
                            charts_html.append(f"<h3>📝 Categorical Data Analysis</h3>")
                        charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id=f"cat_chart_{i}"))
            
            summary_data = {
                'Metric': ['Total Rows', 'Total Columns', 'Numeric Columns', 'Categorical Columns', 'Missing Values'],
                'Count': [
                    len(df),
                    len(df.columns),
                    len(numeric_cols),
                    len(categorical_cols),
                    df.isnull().sum().sum()
                ]
            }
            
            fig = px.bar(
                summary_data,
                x='Metric',
                y='Count',
                title="📋 Dataset Overview",
                color='Count',
                color_continuous_scale='Blues'
            )
            fig.update_layout(height=400, title_x=0.5, showlegend=False)
            charts_html.append(f"<h3>📊 Dataset Overview</h3>")
            charts_html.append(fig.to_html(include_plotlyjs='cdn', div_id="overview_chart"))
            
            self.current_charts = charts_html
            return "\n".join(charts_html) if charts_html else "<p>No charts could be generated for this dataset.</p>"
        
        except Exception as e:
            logger.error(f"Chart generation error: {str(e)}")
            return f"<p>❌ Chart generation failed: {str(e)}</p>"

    def generate_report_html(self, analysis_text: str, data_summary: str, file_name: str = "Unknown") -> str:
        """Generate HTML report with properly formatted text and print button"""
        html_template = """
        <!DOCTYPE html>
        <html>
        <head>
            <meta charset="UTF-8">
            <title>Data Analysis Report</title>
            <style>
                body {
                    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                    line-height: 1.6;
                    color: #333;
                    max-width: 1200px;
                    margin: 0 auto;
                    padding: 20px;
                    background: #f8f9fa;
                }
                .header {
                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    color: white;
                    padding: 30px;
                    border-radius: 10px;
                    margin-bottom: 30px;
                    text-align: center;
                }
                .section {
                    background: white;
                    padding: 25px;
                    margin-bottom: 20px;
                    border-radius: 8px;
                    box-shadow: 0 2px 10px rgba(0,0,0,0.1);
                }
                .chart-container {
                    margin: 20px 0;
                    padding: 15px;
                    background: #f8f9ff;
                    border-radius: 8px;
                    border-left: 4px solid #667eea;
                }
                h1, h2, h3 { 
                    color: #2c3e50;
                    margin-top: 20px;
                    margin-bottom: 15px;
                }
                .metadata {
                    background: #e8f4f8;
                    padding: 15px;
                    border-radius: 5px;
                    margin-bottom: 20px;
                }
                .footer {
                    text-align: center;
                    color: #666;
                    margin-top: 40px;
                    padding: 20px;
                    background: #f1f1f1;
                    border-radius: 5px;
                }
                pre {
                    background: #f4f4f4;
                    padding: 15px;
                    border-radius: 5px;
                    overflow-x: auto;
                    white-space: pre-wrap;
                    font-size: 14px;
                }
                strong {
                    color: #2c3e50;
                    font-weight: 600;
                }
                table {
                    width: 100%;
                    border-collapse: collapse;
                    margin: 20px 0;
                }
                th, td {
                    border: 1px solid #ddd;
                    padding: 8px;
                    text-align: left;
                }
                th {
                    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                    color: white;
                }
                tr:nth-child(even) {
                    background-color: #f2f2f2;
                }
                .print-button {
                    background: #667eea;
                    color: white;
                    padding: 10px 20px;
                    border: none;
                    border-radius: 5px;
                    cursor: pointer;
                    font-size: 16px;
                    margin: 10px 0;
                    display: inline-block;
                }
                .print-button:hover {
                    background: #764ba2;
                }
                @media print {
                    .print-button {
                        display: none;
                    }
                    body {
                        background: white;
                    }
                    .section, .metadata, .footer {
                        box-shadow: none;
                    }
                }
            </style>
            <script>
                function printReport() {
                    window.print();
                }
            </script>
        </head>
        <body>
            <div class="header">
                <h1>🚀 Smart Data Analysis Report</h1>
                <p>Comprehensive AI-Powered Data Insights</p>
            </div>
            
            <div class="metadata">
                <strong>📁 File:</strong> {{ file_name }}<br>
                <strong>📅 Generated:</strong> {{ timestamp }}<br>
                <strong>🤖 Model:</strong> OpenAI gpt-oss-20b
            </div>
            
            <div class="section">
                <h2>🎯 AI Analysis & Insights</h2>
                <button class="print-button" onclick="printReport()">🖨️ Print as PDF</button>
                <div>{{ ai_analysis }}</div>
            </div>
            
            <div class="section">
                <h2>📊 Visualizations</h2>
                <div class="chart-container">
                    {{ charts_html }}
                </div>
            </div>
            
            <div class="section">
                <h2>📋 Technical Data Summary</h2>
                <pre>{{ data_summary }}</pre>
            </div>
            
            <div class="footer">
                <p>Report generated by Smart Data Analyzer Pro • Powered by Smart AI</p>
                <p>For questions or support, contact +8801719296601 (via Whatsapp)</p>
            </div>
        </body>
        </html>
        """
        
        template = Template(html_template)
        ai_analysis_html = markdown.markdown(analysis_text, extensions=['extra', 'tables'])
        charts_content = "\n".join(self.current_charts) if self.current_charts else "<p>No visualizations available</p>"
        
        return template.render(
            file_name=file_name,
            timestamp=datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            ai_analysis=ai_analysis_html,
            charts_html=charts_content,
            data_summary=data_summary
        )

analyzer = EnhancedDataAnalyzer()

async def analyze_data(file, api_key, user_question="", progress=gr.Progress()):
    if not file:
        return "❌ Please upload a CSV or Excel file.", "", "", "", None

    if not analyzer.validate_api_key(api_key):
        return "❌ Please enter a valid Chutes API key (minimum 10 characters).", "", "", "", None

    is_valid, validation_msg = analyzer.validate_file(file)
    if not is_valid:
        return f"❌ {validation_msg}", "", "", "", None

    progress(0.1, desc="📁 Reading file...")
    try:
        df, data_summary, charts_html = analyzer.process_file(file.name)
        progress(0.3, desc="📊 Processing data...")
        progress(0.5, desc="🤖 Generating AI insights...")
        ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question)
        progress(0.9, desc="✨ Finalizing results...")

        response = f"""# 🎯 Analysis Complete!
{ai_analysis}
---
*Analysis powered by OpenAI gpt-oss-20b via Chutes • Generated at {datetime.now().strftime('%H:%M:%S')}*
"""
        data_preview_html = df.head(15).to_html(
            classes="table table-striped table-hover",
            table_id="data-preview-table",
            escape=False
        )
        styled_preview = f"""
        <style>
            #data-preview-table {{
                width: 100%;
                border-collapse: collapse;
                margin: 20px 0;
                font-size: 14px;
            }}
            #data-preview-table th {{
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                padding: 12px 8px;
                text-align: left;
                font-weight: bold;
            }}
            #data-preview-table td {{
                padding: 10px 8px;
                border-bottom: 1px solid #ddd;
            }}
            #data-preview-table tr:hover {{
                background-color: #f5f5f5;
            }}
        </style>
        {data_preview_html}
        """

        progress(1.0, desc="✅ Done!")
        return response, data_summary, styled_preview, charts_html, file.name

    except Exception as e:
        logger.error(f"Analysis error: {str(e)}")
        return f"❌ **Error**: {str(e)}", "", "", "", None

def sync_analyze_data(file, api_key, user_question="", progress=gr.Progress()):
    return asyncio.run(analyze_data(file, api_key, user_question, progress))

def clear_all():
    analyzer.current_df = None
    analyzer.current_charts = None
    return None, "", "", "", "", "", "", None

def download_report(analysis_text, data_summary, file_name, format_choice):
    if not analysis_text:
        return None, "❌ No analysis data available for download."
    
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    file_base_name = os.path.splitext(file_name)[0] if file_name else "data_analysis"
    
    try:
        if format_choice == "HTML":
            html_content = analyzer.generate_report_html(analysis_text, data_summary, file_name)
            filename = f"{file_base_name}_analysis_report_{timestamp}.html"
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(html_content)
            return filename, f"✅ HTML report generated successfully! File: {filename}"
            
        else:  # Markdown
            report = f"""# Data Analysis Report
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
File: {file_name}
## AI Analysis:
{analysis_text}
## Raw Data Summary:
{data_summary}
"""
            filename = f"{file_base_name}_analysis_report_{timestamp}.md"
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(report)
            return filename, f"✅ Markdown report generated successfully! File: {filename}"
            
    except Exception as e:
        logger.error(f"Report generation error: {str(e)}")
        return None, f"❌ Error generating report: {str(e)}"

with gr.Blocks(
    title="🚀 Smart Data Analyzer Pro", 
    theme=gr.themes.Ocean(),
    css="""
    .gradio-container {
        font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
    }
    .tab-nav {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    }
    .upload-area {
        border: 2px dashed #667eea;
        border-radius: 10px;
        padding: 20px;
        text-align: center;
        background: #f8f9ff;
    }
    """
) as app:
    current_file_name = gr.State("")
    
    gr.Markdown("""
    # 🚀 Smart Data Analyzer Pro
    ### AI-Powered Excel & CSV Analysis with OpenAI gpt-oss-20b
    
    Upload your data files and get instant professional insights and downloadable reports!
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### ⚙️ Configuration")
            api_key_input = gr.Textbox(
                label="🔑 Chutes API Key",
                placeholder="sk-chutes-your-api-key-here...",
                type="password",
                lines=1,
                info="Get your free API key from chutes.ai"
            )
            file_input = gr.File(
                label="📁 Upload Data File",
                file_types=[".csv", ".xlsx", ".xls"],
                file_count="single",
                elem_classes=["upload-area"]
            )
            with gr.Row():
                analyze_btn = gr.Button("🚀 Analyze Data", variant="primary", size="lg")
                clear_btn = gr.Button("🗑️ Clear All", variant="secondary")
            with gr.Group():
                gr.Markdown("### 📊 Quick Stats")
                file_stats = gr.Textbox(
                    label="File Information",
                    lines=3,
                    interactive=False,
                    placeholder="Upload a file to see statistics..."
                )
        
        with gr.Column(scale=2):
            gr.Markdown("### 🎯 Analysis Results")
            analysis_output = gr.Markdown(
                value="📋 **Ready to analyze your data!**\n\nUpload a CSV or Excel file and click 'Analyze Data' to get started.",
                show_label=False
            )
    
    with gr.Tabs():
        with gr.Tab("💬 Ask Questions"):
            question_input = gr.Textbox(
                label="❓ Ask Specific Questions About Your Data",
                placeholder="Examples:\n• What are the top 5 customers by revenue?\n• Are there any seasonal trends?\n• Which products have the highest margins?\n• What anomalies do you see in this data?",
                lines=3
            )
            ask_btn = gr.Button("🔍 Get Answer", variant="primary")
            question_output = gr.Markdown()
        
        with gr.Tab("📊 Data Preview"):
            data_preview = gr.HTML(
                label="Dataset Preview",
                value="<p>Upload a file to see data preview...</p>"
            )
        
        with gr.Tab("🔍 Raw Summary"):
            raw_summary = gr.Textbox(
                label="Detailed Data Summary",
                lines=15,
                max_lines=20,
                show_copy_button=True
            )
        
        with gr.Tab("💾 Export Reports"):
            gr.Markdown("### 📥 Download Your Analysis Report")
            with gr.Row():
                format_choice = gr.Radio(
                    choices=["HTML", "Markdown"],
                    value="HTML",
                    label="📄 Report Format",
                    info="Choose your preferred download format"
                )
            download_btn = gr.Button("📥 Generate & Download Report", variant="primary", size="lg")
            download_status = gr.Textbox(label="Download Status", interactive=False)
            download_file = gr.File(label="📄 Download Link", visible=True)

    def update_file_stats(file):
        if not file:
            return "No file uploaded"
        try:
            file_size = os.path.getsize(file.name) / (1024 * 1024)
            file_name = os.path.basename(file.name)
            return f"📄 **File**: {file_name}\n📏 **Size**: {file_size:.2f} MB\n⏰ **Uploaded**: {datetime.now().strftime('%H:%M:%S')}"
        except:
            return "File information unavailable"
    
    def handle_analysis(file, api_key, user_question="", progress=gr.Progress()):
        result = sync_analyze_data(file, api_key, user_question, progress)
        if len(result) == 5:
            return result[0], result[1], result[2], result[4]
        else:
            return result[0], result[1], result[2], ""
    
    def handle_question_analysis(file, api_key, question, progress=gr.Progress()):
        if not question.strip():
            return "❓ Please enter a specific question about your data."
        result = sync_analyze_data(file, api_key, question, progress)
        return result[0]
    
    analyze_btn.click(
        fn=handle_analysis,
        inputs=[file_input, api_key_input, gr.Textbox(value="", visible=False)],
        outputs=[analysis_output, raw_summary, data_preview, current_file_name],
        show_progress=True
    )
    
    ask_btn.click(
        fn=handle_question_analysis,
        inputs=[file_input, api_key_input, question_input],
        outputs=[question_output],
        show_progress=True
    )
    
    file_input.change(
        fn=update_file_stats,
        inputs=[file_input],
        outputs=[file_stats]
    )
    
    clear_btn.click(
        fn=clear_all,
        outputs=[file_input, api_key_input, question_input, analysis_output, 
                question_output, data_preview, raw_summary, current_file_name]
    )
    
    download_btn.click(
        fn=download_report,
        inputs=[analysis_output, raw_summary, current_file_name, format_choice],
        outputs=[download_file, download_status]
    )
    
    gr.Markdown("""
    ---
    ### 💡 Pro Tips for Better Analysis:
    
    **🎯 For Best Results:**
    - Clean your data before upload (remove extra headers, format dates consistently)
    - Use descriptive column names
    - Ask specific questions like "What drives the highest profits?" instead of "Analyze this data"
    
    **📥 Export Options:**
    - **HTML**: Interactive report with embedded charts and print-to-PDF option
    - **Markdown**: Simple text format for documentation
    
    **⚡ Speed Optimization:**
    - Files under 10MB process fastest
    - CSV files typically load faster than Excel
    - Limit to essential columns for quicker analysis
    
    **🔧 Supported Formats:** CSV, XLSX, XLS | **📏 Max Size:** 50MB | **🚀 Response Time:** ~3-5 seconds
    """)

if __name__ == "__main__":
    app.queue(max_size=10)
    app.launch()