|
from flask import Flask, render_template, request, session, redirect, url_for, flash
|
|
import pandas as pd
|
|
import plotly.express as px
|
|
import plotly.graph_objects as go
|
|
from collections import defaultdict, Counter
|
|
import json
|
|
import os
|
|
import google.generativeai as genai
|
|
import tempfile
|
|
from pathlib import Path
|
|
import time
|
|
from dotenv import load_dotenv
|
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
import traceback
|
|
|
|
app = Flask(__name__)
|
|
app.secret_key = '0fdd675e2c6f513deb04c79bd7ddb7e0'
|
|
app.config['UPLOAD_FOLDER'] = 'uploads'
|
|
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
|
|
app.config['TEMP_FOLDER'] = Path(tempfile.gettempdir()) / 'placement_analyzer'
|
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
api_key ="AIzaSyBLcWuSj6N1bkhQsTF4kt3_hFh4ibH11pQ"
|
|
if not api_key:
|
|
print("WARNING: GEMINI_API_KEY not found in environment variables. AI insights will not be available.")
|
|
model = None
|
|
else:
|
|
try:
|
|
genai.configure(api_key=api_key)
|
|
model = genai.GenerativeModel('gemini-2.0-flash')
|
|
print("Gemini model configured successfully.")
|
|
except Exception as e:
|
|
print(f"Error configuring Gemini model: {e}. AI insights will not be available.")
|
|
model = None
|
|
|
|
|
|
def validate_data_columns(data, required_columns):
|
|
"""
|
|
Validate that the DataFrame contains all required columns.
|
|
"""
|
|
|
|
column_mapping = {col.lower(): col for col in data.columns}
|
|
|
|
|
|
actual_required_columns = []
|
|
missing_columns = []
|
|
for req_col in required_columns:
|
|
if req_col.lower() in column_mapping:
|
|
actual_required_columns.append(column_mapping[req_col.lower()])
|
|
else:
|
|
missing_columns.append(req_col)
|
|
|
|
if missing_columns:
|
|
raise ValueError(
|
|
f"Missing required columns: {', '.join(missing_columns)}. Please ensure your CSV has the correct column names.")
|
|
|
|
|
|
if 'Package' in actual_required_columns:
|
|
data['Package'] = pd.to_numeric(data['Package'], errors='coerce')
|
|
if data['Package'].isna().all():
|
|
raise ValueError("Package column contains no valid numeric values or is entirely empty.")
|
|
|
|
if 'Year of Placement' in actual_required_columns:
|
|
|
|
year_col_name = column_mapping.get('year of placement', None)
|
|
if year_col_name and not data[year_col_name].isna().all():
|
|
data[year_col_name] = pd.to_numeric(data[year_col_name], errors='coerce')
|
|
if data[year_col_name].isna().all():
|
|
raise ValueError("Year of Placement column contains no valid numeric values or is entirely empty.")
|
|
elif year_col_name:
|
|
raise ValueError("Year of Placement column is entirely empty.")
|
|
|
|
return True
|
|
|
|
|
|
def generate_graphs(data):
|
|
"""
|
|
Generate comprehensive graphs based on the provided placement data.
|
|
"""
|
|
graphs = []
|
|
|
|
try:
|
|
print(f"Generating graphs for {len(data)} records...")
|
|
|
|
|
|
data.columns = [col.replace('Year of Placement', 'Year').replace('Post', 'Role') for col in data.columns]
|
|
|
|
|
|
if 'Department' in data.columns and not data['Department'].isna().all():
|
|
print("Generating Department-wise Placement Distribution...")
|
|
department_counts = data['Department'].value_counts().reset_index()
|
|
department_counts.columns = ['Department', 'Count']
|
|
fig1 = px.bar(department_counts,
|
|
x='Department', y='Count',
|
|
title="Department-wise Placement Distribution",
|
|
color_discrete_sequence=['#2563eb'])
|
|
fig1.update_layout(height=500, xaxis_title="Department", yaxis_title="Number of Placements")
|
|
graphs.append({"graph": fig1.to_html(full_html=False), "title": "Department-wise Placement Distribution"})
|
|
|
|
|
|
if 'Package' in data.columns and not data['Package'].isna().all():
|
|
print("Generating Package Distribution...")
|
|
package_data = data.dropna(subset=['Package'])
|
|
if not package_data.empty:
|
|
fig2 = px.histogram(package_data,
|
|
x='Package',
|
|
title="Distribution of Package Values",
|
|
color_discrete_sequence=['#10b981'])
|
|
fig2.update_layout(height=500, xaxis_title="Package (e.g., in LPA)", yaxis_title="Number of Students")
|
|
graphs.append({"graph": fig2.to_html(full_html=False), "title": "Distribution of Package Values"})
|
|
|
|
|
|
if all(col in data.columns for col in ['Department', 'Package']):
|
|
print("Generating Average Package by Department...")
|
|
clean_data = data.dropna(subset=['Department', 'Package'])
|
|
if not clean_data.empty:
|
|
avg_package = clean_data.groupby('Department')['Package'].mean().reset_index()
|
|
fig3 = px.bar(avg_package,
|
|
x='Department', y='Package',
|
|
title="Average Package by Department",
|
|
color_discrete_sequence=['#3b82f6'])
|
|
fig3.update_layout(height=500, xaxis_title="Department", yaxis_title="Average Package (e.g., in LPA)")
|
|
graphs.append({"graph": fig3.to_html(full_html=False), "title": "Average Package by Department"})
|
|
|
|
|
|
if 'Year' in data.columns and not data['Year'].isna().all():
|
|
print("Generating Year-wise Placement Trends...")
|
|
year_counts = data['Year'].value_counts().sort_index().reset_index()
|
|
year_counts.columns = ['Year', 'Count']
|
|
fig4 = px.line(year_counts,
|
|
x='Year', y='Count',
|
|
title="Placement Trends Over Years",
|
|
markers=True,
|
|
color_discrete_sequence=['#f59e0b'])
|
|
fig4.update_layout(height=500, xaxis_title="Year", yaxis_title="Number of Placements")
|
|
graphs.append({"graph": fig4.to_html(full_html=False), "title": "Placement Trends Over Years"})
|
|
|
|
|
|
if 'Company' in data.columns and not data['Company'].isna().all():
|
|
print("Generating Company-wise Placements...")
|
|
top_companies = data['Company'].value_counts().head(10).reset_index()
|
|
top_companies.columns = ['Company', 'Count']
|
|
fig5 = px.pie(top_companies,
|
|
values='Count', names='Company',
|
|
title="Top 10 Recruiting Companies",
|
|
hole=0.4)
|
|
fig5.update_layout(height=500, margin=dict(t=50, b=50, l=50, r=50))
|
|
graphs.append({"graph": fig5.to_html(full_html=False), "title": "Top 10 Recruiting Companies"})
|
|
|
|
|
|
if 'Role' in data.columns and not data['Role'].isna().all():
|
|
print("Generating Top 10 Job Roles...")
|
|
top_roles = data['Role'].value_counts().head(10).reset_index()
|
|
top_roles.columns = ['Role', 'Count']
|
|
fig6 = px.bar(top_roles, x='Count', y='Role', orientation='h',
|
|
title='Top 10 Job Roles Placed',
|
|
color_discrete_sequence=px.colors.qualitative.Pastel)
|
|
fig6.update_layout(height=500, yaxis={'categoryorder': 'total ascending'},
|
|
xaxis_title="Number of Placements", yaxis_title="Job Role")
|
|
graphs.append({"graph": fig6.to_html(full_html=False), "title": "Top 10 Job Roles Placed"})
|
|
|
|
print(f"Generated {len(graphs)} graphs successfully")
|
|
return graphs
|
|
|
|
except Exception as e:
|
|
print(f"Error generating graphs: {str(e)}")
|
|
traceback.print_exc()
|
|
return []
|
|
|
|
|
|
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
|
|
def generate_single_insight(prompt, model):
|
|
try:
|
|
response = model.generate_content(
|
|
prompt + "\n\nProvide a brief analysis in 2-3 concise bullet points, formatted as HTML <ul><li> tags. Ensure the output is only the HTML."
|
|
)
|
|
if not response or not response.text:
|
|
print("Empty response received from Gemini")
|
|
return "<ul><li>No insight generated - empty response from AI.</li></ul>"
|
|
|
|
|
|
clean_text = response.text.replace('```html', '').replace('```', '').strip()
|
|
if not clean_text.startswith('<ul>') or not clean_text.endswith('</ul>'):
|
|
|
|
lines = [line.strip() for line in clean_text.split('\n') if line.strip()]
|
|
if lines:
|
|
return "<ul>" + "".join([f"<li>{line.lstrip('- ').lstrip('* ')}</li>" for line in lines]) + "</ul>"
|
|
else:
|
|
return "<ul><li>AI insight could not be properly formatted.</li></ul>"
|
|
return clean_text
|
|
except Exception as e:
|
|
print(f"Error in generate_single_insight: {type(e).__name__}: {str(e)}")
|
|
raise
|
|
|
|
|
|
def generate_insights(data, graph_titles):
|
|
"""
|
|
Generate insights for each graph using Gemini AI with retry logic
|
|
"""
|
|
if not model:
|
|
print("No AI model available, returning default insights")
|
|
return ["<ul><li>AI insights not available - missing API key or configuration error.</li></ul>"] * len(
|
|
graph_titles)
|
|
|
|
insights = []
|
|
|
|
|
|
data.columns = [col.replace('Year of Placement', 'Year').replace('Post', 'Role') for col in data.columns]
|
|
|
|
try:
|
|
|
|
stats = {
|
|
'total_placements': len(data),
|
|
'avg_package': round(data['Package'].mean(), 2) if 'Package' in data.columns and not data[
|
|
'Package'].isna().all() else 'N/A',
|
|
'departments': data['Department'].nunique() if 'Department' in data.columns and not data[
|
|
'Department'].isna().all() else 'N/A',
|
|
'companies': data['Company'].nunique() if 'Company' in data.columns and not data[
|
|
'Company'].isna().all() else 'N/A',
|
|
'max_package': round(data['Package'].max(), 2) if 'Package' in data.columns and not data[
|
|
'Package'].isna().all() else 'N/A',
|
|
'min_package': round(data['Package'].min(), 2) if 'Package' in data.columns and not data[
|
|
'Package'].isna().all() else 'N/A',
|
|
'median_package': round(data['Package'].median(), 2) if 'Package' in data.columns and not data[
|
|
'Package'].isna().all() else 'N/A',
|
|
'years_covered': f"{data['Year'].min()}-{data['Year'].max()}" if 'Year' in data.columns and not data[
|
|
'Year'].isna().all() and len(data['Year'].dropna().unique()) > 1 else str(
|
|
data['Year'].min()) if 'Year' in data.columns and not data['Year'].isna().all() else 'N/A',
|
|
'top_department': data['Department'].value_counts().idxmax() if 'Department' in data.columns and not data[
|
|
'Department'].isna().all() else 'N/A',
|
|
'top_company': data['Company'].value_counts().idxmax() if 'Company' in data.columns and not data[
|
|
'Company'].isna().all() else 'N/A',
|
|
'top_role': data['Role'].value_counts().idxmax() if 'Role' in data.columns and not data[
|
|
'Role'].isna().all() else 'N/A'
|
|
}
|
|
|
|
|
|
overall_context = f"""
|
|
Here is a summary of the placement data:
|
|
- Total Placements: {stats['total_placements']}
|
|
- Departments involved: {stats['departments']}
|
|
- Unique Companies: {stats['companies']}
|
|
- Average Package: {stats['avg_package']}
|
|
- Maximum Package: {stats['max_package']}
|
|
- Minimum Package: {stats['min_package']}
|
|
- Median Package: {stats['median_package']}
|
|
- Years Covered: {stats['years_covered']}
|
|
- Most Placements by Department: {stats['top_department']}
|
|
- Most Placements by Company: {stats['top_company']}
|
|
- Most Placements by Role: {stats['top_role']}
|
|
"""
|
|
|
|
|
|
prompt_map = {
|
|
"Department-wise Placement Distribution": f"""{overall_context}
|
|
The graph shows the distribution of placements across different departments. What are the key observations regarding which departments have the most/least placements, and any significant disparities?""",
|
|
|
|
"Distribution of Package Values": f"""{overall_context}
|
|
The graph displays the frequency distribution of package values. What does this reveal about typical salary ranges, outliers, and the overall earning potential?""",
|
|
|
|
"Average Package by Department": f"""{overall_context}
|
|
This graph presents the average package offered per department. What insights can be drawn about the earning potential differences between departments?""",
|
|
|
|
"Placement Trends Over Years": f"""{overall_context}
|
|
This graph illustrates the number of placements over the years. What trends (growth, decline, stability) can be identified in placement activity over time?""",
|
|
|
|
"Top 10 Recruiting Companies": f"""{overall_context}
|
|
This graph shows the top 10 companies by the number of placements. What does this indicate about the primary recruiters and their impact on placements?""",
|
|
|
|
"Top 10 Job Roles Placed": f"""{overall_context}
|
|
This graph displays the top 10 job roles students were placed in. What are the predominant job types or career paths for these students?"""
|
|
}
|
|
|
|
for title in graph_titles:
|
|
prompt = prompt_map.get(title, f"{overall_context}\n\nProvide key insights for a graph titled '{title}'.")
|
|
try:
|
|
print(f"Generating insight for graph: '{title}'...")
|
|
insight = generate_single_insight(prompt, model)
|
|
insights.append(insight)
|
|
except Exception as e:
|
|
print(f"Failed to generate insight for '{title}' after retries: {type(e).__name__}: {str(e)}")
|
|
insights.append("<ul><li>Unable to generate insight for this graph at this time.</li></ul>")
|
|
|
|
return insights
|
|
|
|
except Exception as e:
|
|
print(f"Error in generate_insights overall: {type(e).__name__}: {str(e)}")
|
|
traceback.print_exc()
|
|
return ["<ul><li>Error generating insights. Please try again.</li></ul>"] * len(graph_titles)
|
|
|
|
|
|
@app.route('/', methods=['GET', 'POST'])
|
|
def home():
|
|
"""
|
|
Handle placement data upload and analysis
|
|
"""
|
|
print(f"Request method: {request.method}")
|
|
print(f"Request form keys: {list(request.form.keys())}")
|
|
print(f"Request files keys: {list(request.files.keys())}")
|
|
|
|
if request.method == 'POST':
|
|
print("POST request received")
|
|
|
|
|
|
if 'upload_csv' not in request.form:
|
|
print("upload_csv not in form (this means the button's name/value wasn't sent)")
|
|
flash("Invalid form submission or button not recognized. Please try again.", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
if 'file' not in request.files:
|
|
print("No file part in request")
|
|
flash("No file selected", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
file = request.files['file']
|
|
print(f"File received: {file.filename}")
|
|
|
|
|
|
if file.filename == '':
|
|
print("No file selected (empty filename)")
|
|
flash("No file selected", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
if not file.filename.lower().endswith('.csv'):
|
|
print("Invalid file type (not .csv)")
|
|
flash("Please upload a CSV file", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
try:
|
|
print("Processing CSV file...")
|
|
|
|
|
|
data = pd.read_csv(file.stream)
|
|
print(f"CSV loaded successfully with {len(data)} rows and {len(data.columns)} columns")
|
|
print(f"Columns before cleaning: {list(data.columns)}")
|
|
|
|
if data.empty:
|
|
flash("Uploaded file is empty or invalid. Please upload a valid CSV.", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
data.columns = data.columns.str.strip()
|
|
print(f"Cleaned columns: {list(data.columns)}")
|
|
|
|
|
|
|
|
required_csv_columns = ['Name', 'Department', 'Company', 'Post', 'Package', 'Year of Placement',
|
|
'Graduation Year']
|
|
|
|
|
|
try:
|
|
validate_data_columns(data.copy(),
|
|
required_csv_columns)
|
|
except ValueError as ve:
|
|
print(f"Validation error: {str(ve)}")
|
|
flash(f"Invalid data or missing columns: {str(ve)}", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
|
|
rename_map = {
|
|
col: new_name for col_check, new_name in [('Year of Placement', 'Year'), ('Post', 'Role')]
|
|
for col in data.columns if col.lower() == col_check.lower()
|
|
}
|
|
data = data.rename(columns=rename_map)
|
|
print("Columns potentially renamed for internal processing.")
|
|
print(f"Columns after renaming for processing: {list(data.columns)}")
|
|
|
|
|
|
print("Generating graphs...")
|
|
graphs_info = generate_graphs(data.copy())
|
|
graph_html_list = [item["graph"] for item in graphs_info]
|
|
graph_titles_list = [item["title"] for item in graphs_info]
|
|
print(f"Generated {len(graph_html_list)} graphs")
|
|
|
|
print("Generating insights...")
|
|
insights_list = generate_insights(data.copy(), graph_titles_list)
|
|
print(f"Generated {len(insights_list)} insights")
|
|
|
|
|
|
min_length = min(len(graph_html_list), len(insights_list))
|
|
if min_length == 0:
|
|
flash("No graphs or insights could be generated from the data. Please check file format and content.",
|
|
"error")
|
|
return redirect(url_for('home'))
|
|
|
|
final_graphs_and_insights = [{"graph": g, "insight": i}
|
|
for g, i in zip(graph_html_list[:min_length], insights_list[:min_length])]
|
|
|
|
|
|
|
|
session['analysis_id'] = f"analysis_{int(time.time())}"
|
|
|
|
|
|
try:
|
|
analysis_path_dir = app.config['TEMP_FOLDER'] / session['analysis_id']
|
|
os.makedirs(analysis_path_dir, exist_ok=True)
|
|
analysis_file_path = analysis_path_dir / 'data.json'
|
|
|
|
with open(analysis_file_path, 'w') as f:
|
|
json.dump({
|
|
'graphs': graph_html_list[:min_length],
|
|
'insights': insights_list[:min_length]
|
|
}, f)
|
|
print(f"Analysis results saved successfully to {analysis_file_path}")
|
|
except Exception as e:
|
|
print(f"Error saving analysis to temporary file: {str(e)}")
|
|
traceback.print_exc()
|
|
flash("Analysis completed, but there was an issue saving the results temporarily.", "warning")
|
|
|
|
|
|
flash("Analysis completed successfully! Scroll down to see the results.", "success")
|
|
return render_template('index.html', graphs_and_insights=final_graphs_and_insights)
|
|
|
|
except pd.errors.EmptyDataError:
|
|
flash("The uploaded CSV file is empty. Please upload a file with data.", "error")
|
|
return redirect(url_for('home'))
|
|
except pd.errors.ParserError:
|
|
flash("Could not parse the CSV file. Please ensure it's a valid CSV format.", "error")
|
|
return redirect(url_for('home'))
|
|
except Exception as e:
|
|
error_msg = f"An unexpected error occurred while processing your file: {str(e)}. Please check the file's content and try again."
|
|
print(error_msg)
|
|
traceback.print_exc()
|
|
flash(error_msg, "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
print("GET request - checking for saved analysis...")
|
|
analysis_id = session.get('analysis_id')
|
|
graphs_and_insights = []
|
|
if analysis_id:
|
|
analysis_file_path = app.config['TEMP_FOLDER'] / analysis_id / 'data.json'
|
|
if analysis_file_path.exists():
|
|
try:
|
|
with open(analysis_file_path) as f:
|
|
data = json.load(f)
|
|
print(f"Loaded saved analysis from {analysis_file_path}")
|
|
graphs_and_insights = [{"graph": g, "insight": i}
|
|
for g, i in zip(data['graphs'], data['insights'])]
|
|
except Exception as e:
|
|
print(f"Error loading saved analysis: {str(e)}")
|
|
traceback.print_exc()
|
|
|
|
session.pop('analysis_id', None)
|
|
flash("Could not load previous analysis. Please upload your file again.", "warning")
|
|
|
|
print("Rendering template.")
|
|
return render_template('index.html', graphs_and_insights=graphs_and_insights)
|
|
|
|
|
|
@app.errorhandler(413)
|
|
def too_large(e):
|
|
flash("File is too large. Maximum file size is 16MB.", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
@app.errorhandler(400)
|
|
def bad_request(e):
|
|
flash("Bad request. Please check your input and try again.", "error")
|
|
return redirect(url_for('home'))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
|
os.makedirs(app.config['TEMP_FOLDER'], exist_ok=True)
|
|
|
|
print("Flask app starting...")
|
|
print(f"Upload folder: {app.config['UPLOAD_FOLDER']}")
|
|
print(f"Temp folder: {app.config['TEMP_FOLDER']}")
|
|
|
|
|
|
app.run(debug=True, port=2541) |