Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import aiohttp | |
import asyncio | |
import json | |
import io | |
import os | |
from typing import Optional, Tuple | |
class DataAnalyzer: | |
def __init__(self): | |
self.api_base_url = "https://llm.chutes.ai/v1/chat/completions" | |
async def analyze_with_chutes(self, api_token: str, data_summary: str, user_question: str = None) -> str: | |
"""Send data to Chutes API for analysis""" | |
headers = { | |
"Authorization": f"Bearer {api_token}", | |
"Content-Type": "application/json" | |
} | |
# Create the prompt based on whether it's initial analysis or follow-up question | |
if user_question: | |
prompt = f"""Based on this dataset summary: | |
{data_summary} | |
User question: {user_question} | |
Please provide a detailed answer based on the data.""" | |
else: | |
prompt = f"""Analyze the following dataset and provide comprehensive insights: | |
{data_summary} | |
Please provide: | |
1. Key statistical insights | |
2. Notable patterns or trends | |
3. Data quality observations | |
4. Business recommendations | |
5. Potential areas for further analysis | |
Keep the analysis clear, actionable, and data-driven.""" | |
body = { | |
"model": "openai/gpt-oss-20b", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": prompt | |
} | |
], | |
"stream": True, | |
"max_tokens": 2048, | |
"temperature": 0.3 # Lower temperature for more consistent analysis | |
} | |
try: | |
async with aiohttp.ClientSession() as session: | |
async with session.post(self.api_base_url, headers=headers, json=body) as response: | |
if response.status != 200: | |
return f"Error: API request failed with status {response.status}" | |
full_response = "" | |
async for line in response.content: | |
line = line.decode("utf-8").strip() | |
if line.startswith("data: "): | |
data = line[6:] | |
if data == "[DONE]": | |
break | |
try: | |
chunk_data = json.loads(data) | |
if "choices" in chunk_data and len(chunk_data["choices"]) > 0: | |
delta = chunk_data["choices"][0].get("delta", {}) | |
content = delta.get("content", "") | |
if content: | |
full_response += content | |
except json.JSONDecodeError: | |
continue | |
return full_response if full_response else "No response received from the model." | |
except Exception as e: | |
return f"Error connecting to Chutes API: {str(e)}" | |
def process_file(self, file_path: str) -> Tuple[pd.DataFrame, str]: | |
"""Process uploaded CSV or Excel file""" | |
try: | |
file_extension = os.path.splitext(file_path)[1].lower() | |
if file_extension == '.csv': | |
df = pd.read_csv(file_path) | |
elif file_extension in ['.xlsx', '.xls']: | |
df = pd.read_excel(file_path) | |
else: | |
raise ValueError("Unsupported file format. Please upload CSV or Excel files.") | |
# Generate comprehensive data summary | |
summary = self.generate_data_summary(df) | |
return df, summary | |
except Exception as e: | |
raise Exception(f"Error processing file: {str(e)}") | |
def generate_data_summary(self, df: pd.DataFrame) -> str: | |
"""Generate a comprehensive summary of the dataset""" | |
summary = [] | |
# Basic info | |
summary.append(f"Dataset Overview:") | |
summary.append(f"- Shape: {df.shape[0]} rows Γ {df.shape[1]} columns") | |
summary.append(f"- Total cells: {df.shape[0] * df.shape[1]:,}") | |
# Column information | |
summary.append(f"\nColumn Information:") | |
for i, (col, dtype) in enumerate(df.dtypes.items()): | |
null_count = df[col].isnull().sum() | |
null_pct = (null_count / len(df)) * 100 | |
summary.append(f"- {col} ({dtype}): {null_count} nulls ({null_pct:.1f}%)") | |
# Numerical columns statistics | |
numeric_cols = df.select_dtypes(include=['number']).columns | |
if len(numeric_cols) > 0: | |
summary.append(f"\nNumerical Columns Summary:") | |
for col in numeric_cols: | |
stats = df[col].describe() | |
summary.append(f"- {col}: Mean={stats['mean']:.2f}, Std={stats['std']:.2f}, Range=[{stats['min']:.2f}, {stats['max']:.2f}]") | |
# Categorical columns | |
categorical_cols = df.select_dtypes(include=['object', 'category']).columns | |
if len(categorical_cols) > 0: | |
summary.append(f"\nCategorical Columns Summary:") | |
for col in categorical_cols: | |
unique_count = df[col].nunique() | |
most_common = df[col].mode().iloc[0] if len(df[col].mode()) > 0 else "N/A" | |
summary.append(f"- {col}: {unique_count} unique values, Most common: '{most_common}'") | |
# Sample data | |
summary.append(f"\nFirst 5 rows preview:") | |
summary.append(df.head().to_string()) | |
return "\n".join(summary) | |
# Initialize the analyzer | |
analyzer = DataAnalyzer() | |
async def analyze_data(file, api_key, user_question=""): | |
"""Main function to analyze uploaded data""" | |
if not file: | |
return "Please upload a CSV or Excel file.", "", "" | |
if not api_key: | |
return "Please enter your Chutes API key.", "", "" | |
try: | |
# Process the uploaded file | |
df, data_summary = analyzer.process_file(file.name) | |
# Get AI analysis | |
ai_analysis = await analyzer.analyze_with_chutes(api_key, data_summary, user_question) | |
# Format the complete response | |
response = f"""## π Data Analysis Complete! | |
### π Dataset Overview: | |
{data_summary} | |
### π€ AI Insights & Recommendations: | |
{ai_analysis} | |
""" | |
return response, data_summary, df.head(10).to_html() | |
except Exception as e: | |
return f"Error: {str(e)}", "", "" | |
def sync_analyze_data(file, api_key, user_question=""): | |
"""Synchronous wrapper for the async analyze function""" | |
return asyncio.run(analyze_data(file, api_key, user_question)) | |
# Create the Gradio interface | |
with gr.Blocks(title="π Smart Data Analyzer", theme=gr.themes.Ocean()) as app: | |
gr.Markdown(""" | |
# π Smart Data Analyzer | |
### Upload your CSV/Excel file and get instant AI-powered insights using OpenAI's gpt-oss-20b model via Chutes! | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
# File upload | |
file_input = gr.File( | |
label="π Upload CSV or Excel File", | |
file_types=[".csv", ".xlsx", ".xls"], | |
file_count="single" | |
) | |
# API key input | |
api_key_input = gr.Textbox( | |
label="π Chutes API Key", | |
placeholder="Enter your Chutes API token here...", | |
type="password", | |
lines=1 | |
) | |
# Optional question input | |
question_input = gr.Textbox( | |
label="β Ask a Specific Question (Optional)", | |
placeholder="e.g., What are the sales trends? Which region performs best?", | |
lines=2 | |
) | |
# Analyze button | |
analyze_btn = gr.Button("π Analyze Data", variant="primary", size="lg") | |
with gr.Column(scale=2): | |
# Results display | |
analysis_output = gr.Markdown( | |
label="π Analysis Results", | |
value="Upload a file and click 'Analyze Data' to see insights..." | |
) | |
# Additional outputs (hidden by default) | |
with gr.Accordion("π Data Preview", open=False): | |
data_preview = gr.HTML(label="First 10 Rows") | |
with gr.Accordion("π Raw Data Summary", open=False): | |
raw_summary = gr.Textbox(label="Dataset Summary", lines=10) | |
# Event handlers | |
analyze_btn.click( | |
fn=sync_analyze_data, | |
inputs=[file_input, api_key_input, question_input], | |
outputs=[analysis_output, raw_summary, data_preview] | |
) | |
# Example section | |
gr.Markdown(""" | |
### π‘ Tips for Best Results: | |
- **File Size**: Keep files under 10MB for fastest processing | |
- **API Key**: Get your free Chutes API key from [chutes.ai](https://chutes.ai) | |
- **Questions**: Be specific! Ask about trends, patterns, outliers, or recommendations | |
- **Formats**: Supports CSV, XLSX, and XLS files | |
### π― Example Questions to Ask: | |
- "What are the key trends in this sales data?" | |
- "Which products are underperforming?" | |
- "Are there any seasonal patterns?" | |
- "What recommendations do you have based on this data?" | |
""") | |
# Launch the application | |
if __name__ == "__main__": | |
app.launch( | |
share=True | |
) |