from flask import Flask, render_template, request, session, redirect, url_for, flash import pandas as pd import plotly.express as px import plotly.graph_objects as go from collections import defaultdict, Counter import json import os import google.generativeai as genai import tempfile from pathlib import Path import time from dotenv import load_dotenv from tenacity import retry, stop_after_attempt, wait_exponential import traceback app = Flask(__name__) app.secret_key = '0fdd675e2c6f513deb04c79bd7ddb7e0' # IMPORTANT: Change this to a strong, random key in production app.config['UPLOAD_FOLDER'] = 'uploads' app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max file size app.config['TEMP_FOLDER'] = Path(tempfile.gettempdir()) / 'placement_analyzer' # Update the Gemini configuration to use environment variable load_dotenv() # Make sure API key is available api_key ="AIzaSyBLcWuSj6N1bkhQsTF4kt3_hFh4ibH11pQ" if not api_key: print("WARNING: GEMINI_API_KEY not found in environment variables. AI insights will not be available.") model = None else: try: genai.configure(api_key=api_key) model = genai.GenerativeModel('gemini-2.0-flash') print("Gemini model configured successfully.") except Exception as e: print(f"Error configuring Gemini model: {e}. AI insights will not be available.") model = None def validate_data_columns(data, required_columns): """ Validate that the DataFrame contains all required columns. """ # Create a mapping of lowercase column names to actual column names column_mapping = {col.lower(): col for col in data.columns} # Check if required columns exist (case-insensitive) actual_required_columns = [] missing_columns = [] for req_col in required_columns: if req_col.lower() in column_mapping: actual_required_columns.append(column_mapping[req_col.lower()]) else: missing_columns.append(req_col) if missing_columns: raise ValueError( f"Missing required columns: {', '.join(missing_columns)}. Please ensure your CSV has the correct column names.") # Validate data types and non-empty values if 'Package' in actual_required_columns: data['Package'] = pd.to_numeric(data['Package'], errors='coerce') if data['Package'].isna().all(): raise ValueError("Package column contains no valid numeric values or is entirely empty.") if 'Year of Placement' in actual_required_columns: # This might be 'Year' after rename, but check original # Use the actual column name for 'Year of Placement' if it exists year_col_name = column_mapping.get('year of placement', None) if year_col_name and not data[year_col_name].isna().all(): data[year_col_name] = pd.to_numeric(data[year_col_name], errors='coerce') if data[year_col_name].isna().all(): raise ValueError("Year of Placement column contains no valid numeric values or is entirely empty.") elif year_col_name: # If column exists but is all NA raise ValueError("Year of Placement column is entirely empty.") return True def generate_graphs(data): """ Generate comprehensive graphs based on the provided placement data. """ graphs = [] try: print(f"Generating graphs for {len(data)} records...") # Ensure column names are standardized for graph generation data.columns = [col.replace('Year of Placement', 'Year').replace('Post', 'Role') for col in data.columns] # 1. Department-wise Placement Distribution if 'Department' in data.columns and not data['Department'].isna().all(): print("Generating Department-wise Placement Distribution...") department_counts = data['Department'].value_counts().reset_index() department_counts.columns = ['Department', 'Count'] fig1 = px.bar(department_counts, x='Department', y='Count', title="Department-wise Placement Distribution", color_discrete_sequence=['#2563eb']) fig1.update_layout(height=500, xaxis_title="Department", yaxis_title="Number of Placements") graphs.append({"graph": fig1.to_html(full_html=False), "title": "Department-wise Placement Distribution"}) # 2. Package Distribution if 'Package' in data.columns and not data['Package'].isna().all(): print("Generating Package Distribution...") package_data = data.dropna(subset=['Package']) if not package_data.empty: fig2 = px.histogram(package_data, x='Package', title="Distribution of Package Values", color_discrete_sequence=['#10b981']) fig2.update_layout(height=500, xaxis_title="Package (e.g., in LPA)", yaxis_title="Number of Students") graphs.append({"graph": fig2.to_html(full_html=False), "title": "Distribution of Package Values"}) # 3. Average Package by Department if all(col in data.columns for col in ['Department', 'Package']): print("Generating Average Package by Department...") clean_data = data.dropna(subset=['Department', 'Package']) if not clean_data.empty: avg_package = clean_data.groupby('Department')['Package'].mean().reset_index() fig3 = px.bar(avg_package, x='Department', y='Package', title="Average Package by Department", color_discrete_sequence=['#3b82f6']) fig3.update_layout(height=500, xaxis_title="Department", yaxis_title="Average Package (e.g., in LPA)") graphs.append({"graph": fig3.to_html(full_html=False), "title": "Average Package by Department"}) # 4. Year-wise Placement Trends (Using 'Year' column after rename) if 'Year' in data.columns and not data['Year'].isna().all(): print("Generating Year-wise Placement Trends...") year_counts = data['Year'].value_counts().sort_index().reset_index() year_counts.columns = ['Year', 'Count'] fig4 = px.line(year_counts, x='Year', y='Count', title="Placement Trends Over Years", markers=True, color_discrete_sequence=['#f59e0b']) fig4.update_layout(height=500, xaxis_title="Year", yaxis_title="Number of Placements") graphs.append({"graph": fig4.to_html(full_html=False), "title": "Placement Trends Over Years"}) # 5. Company-wise Placements if 'Company' in data.columns and not data['Company'].isna().all(): print("Generating Company-wise Placements...") top_companies = data['Company'].value_counts().head(10).reset_index() top_companies.columns = ['Company', 'Count'] fig5 = px.pie(top_companies, values='Count', names='Company', title="Top 10 Recruiting Companies", hole=0.4) fig5.update_layout(height=500, margin=dict(t=50, b=50, l=50, r=50)) graphs.append({"graph": fig5.to_html(full_html=False), "title": "Top 10 Recruiting Companies"}) # 6. Top 10 Job Roles if 'Role' in data.columns and not data['Role'].isna().all(): print("Generating Top 10 Job Roles...") top_roles = data['Role'].value_counts().head(10).reset_index() top_roles.columns = ['Role', 'Count'] fig6 = px.bar(top_roles, x='Count', y='Role', orientation='h', title='Top 10 Job Roles Placed', color_discrete_sequence=px.colors.qualitative.Pastel) fig6.update_layout(height=500, yaxis={'categoryorder': 'total ascending'}, xaxis_title="Number of Placements", yaxis_title="Job Role") graphs.append({"graph": fig6.to_html(full_html=False), "title": "Top 10 Job Roles Placed"}) print(f"Generated {len(graphs)} graphs successfully") return graphs except Exception as e: print(f"Error generating graphs: {str(e)}") traceback.print_exc() return [] @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) def generate_single_insight(prompt, model): try: response = model.generate_content( prompt + "\n\nProvide a brief analysis in 2-3 concise bullet points, formatted as HTML