Spaces:

sikeaditya
/

Faculty_Data_Analysis

Runtime error

SanskrutiChopade

Initial commit

f94dafd 9 months ago

23.3 kB

	from datetime import time
	import os

	from flask import Flask, render_template, request
	import pandas as pd
	import plotly.express as px
	import io
	import base64
	import google.generativeai as genai
	from collections import defaultdict
	import numpy as np
	import google.generativeai as genai

	app = Flask(__name__)

	# Configure Gemini API
	genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
	model = genai.GenerativeModel("gemini-1.5-flash")

	WEIGHTS = {
	'experience': 20,
	'degree': 20,
	'research': 20,
	'publication': 20,
	'skills': 20
	}

	# Ideal student-to-faculty ratio
	IDEAL_RATIO = 20


	def calculate_grades(row, weights):
	"""Function to calculate grades."""
	experience_grade = normalize(row['Years_of_Experience'], 0, 35) * weights['experience']
	degree_grade = (1 if row['Degree_Held'] in ['PhD', 'MPhil'] else 0.75) * weights['degree']
	research_grade = normalize(row['Research_Count'], 0, 20) * weights['research']
	publication_grade = normalize(row['Publications_Count'], 0, 50) * weights['publication']
	skills_grade = normalize(len(str(row['Skills']).split(',')), 0, 10) * weights['skills']

	total_grade = (experience_grade + degree_grade + research_grade + publication_grade + skills_grade) / sum(weights.values())

	if row['Publications_Count'] > 30:
	total_grade += 0.05
	if row['Years_of_Experience'] < 2:
	total_grade -= 0.05

	return min(1.0, max(0.0, total_grade))


	def normalize(value, min_value, max_value):
	"""Function to normalize values."""
	return (value - min_value) / (max_value - min_value) if max_value - min_value != 0 else 0

	def generate_with_retry(query, retries=3, delay=2):
	for attempt in range(retries):
	try:
	gemini_response = model.generate_content(query)
	return gemini_response.text
	except Exception as e:
	if "429" in str(e) and attempt < retries - 1:
	time.sleep(delay * (2 ** attempt)) # Correctly use time.sleep
	else:
	raise e


	def perform_swot_analysis(faculty_df, teaching_responses):
	"""Enhanced SWOT analysis based on faculty data and teaching responses."""

	strengths = []
	weaknesses = []
	opportunities = []
	threats = []

	# Analyze faculty data
	avg_experience = faculty_df['Years_of_Experience'].mean()
	avg_publications = faculty_df['Publications_Count'].mean()
	avg_research = faculty_df['Research_Count'].mean()
	phd_count = len(faculty_df[faculty_df['Degree_Held'] == 'PhD'])
	phd_percentage = (phd_count / len(faculty_df)) * 100

	# Faculty Qualifications Analysis
	if phd_percentage > 60:
	strengths.append("High percentage of PhD holders ({}%)".format(round(phd_percentage)))
	elif phd_percentage < 30:
	weaknesses.append("Low percentage of PhD holders ({}%)".format(round(phd_percentage)))
	opportunities.append("Encourage faculty to pursue higher education")

	if avg_experience > 10:
	strengths.append("Strong experienced faculty base (avg. {} years)".format(round(avg_experience)))
	elif avg_experience < 5:
	weaknesses.append("Relatively inexperienced faculty (avg. {} years)".format(round(avg_experience)))
	opportunities.append("Implement mentorship programs")

	# Research Output Analysis
	if avg_publications > 20:
	strengths.append("High research output through publications")
	elif avg_publications < 10:
	weaknesses.append("Low publication count")
	opportunities.append("Create research incentives")

	# Teaching Methodology Analysis
	if teaching_responses:
	# Course Design Analysis
	course_revision = teaching_responses.get('course_revision', '')
	if course_revision == 'Annually':
	strengths.append("Regular curriculum updates")
	elif course_revision == 'Rarely':
	weaknesses.append("Infrequent curriculum revision")
	threats.append("Risk of outdated curriculum")

	# Technology Integration
	tech_usage = teaching_responses.get('tech_usage', '')
	if tech_usage == 'Yes':
	strengths.append("Strong technology integration in teaching")
	else:
	weaknesses.append("Limited use of technology in teaching")
	opportunities.append("Implement modern teaching technologies")

	# Assessment Methods
	assessment_methods = teaching_responses.get('assessment_methods', [])
	if len(assessment_methods) >= 3:
	strengths.append("Diverse assessment methods")
	elif len(assessment_methods) < 2:
	weaknesses.append("Limited assessment variety")
	opportunities.append("Diversify assessment methods")

	# Practical Learning
	practical_percentage = int(teaching_responses.get('practical_percentage', 0))
	if practical_percentage > 50:
	strengths.append("Strong practical learning focus")
	elif practical_percentage < 30:
	weaknesses.append("Limited practical exposure")
	opportunities.append("Increase hands-on learning activities")

	# Student Engagement
	student_participation = int(teaching_responses.get('student_participation', 0))
	if student_participation > 75:
	strengths.append("High student engagement")
	elif student_participation < 50:
	weaknesses.append("Low student participation")
	opportunities.append("Implement engagement strategies")

	# Teaching Methods
	teaching_methods = teaching_responses.get('teaching_methods', [])
	if len(teaching_methods) >= 3:
	strengths.append("Diverse teaching methodologies")
	elif len(teaching_methods) < 2:
	weaknesses.append("Limited teaching methods")
	opportunities.append("Expand teaching methodology")

	# Professional Development
	prof_dev = int(teaching_responses.get('professional_development', 0))
	if prof_dev > 3:
	strengths.append("Strong commitment to professional development")
	elif prof_dev < 2:
	weaknesses.append("Limited professional development")
	opportunities.append("Increase faculty development programs")

	# Industry Relevance
	curriculum_relevance = int(teaching_responses.get('curriculum_relevance', 0))
	if curriculum_relevance >= 8:
	strengths.append("High industry relevance")
	elif curriculum_relevance <= 5:
	weaknesses.append("Low industry alignment")
	threats.append("Risk of skill-industry mismatch")

	# Add general threats
	threats.extend([
	"Rapid technological changes in education",
	"Increasing competition from online education",
	"Changing student learning preferences"
	])

	# Add general opportunities
	opportunities.extend([
	"Integration of emerging technologies",
	"Industry collaboration potential",
	"International academic partnerships"
	])

	return {
	'strengths': strengths,
	'weaknesses': weaknesses,
	'opportunities': opportunities,
	'threats': threats
	}

	@app.route('/', methods=['GET', 'POST'])
	def index():
	plots = {}
	graded_csv = None
	department_tables = {}
	deficiency_table = None
	departments = []
	swot_results=None
	teaching_responses=None
	gemini_insights = {}

	if request.method == 'POST':
	if 'faculty_file' not in request.files:
	return render_template('index.html', error="Error: Faculty file must be uploaded.")

	faculty_file = request.files['faculty_file']

	if faculty_file.filename == '':
	return render_template('index.html', error="Error: Faculty file must be selected for upload.")

	try:
	# Load faculty data
	faculty_df = pd.read_csv(faculty_file)

	# Validate columns in faculty data
	required_faculty_columns = {'Name', 'Department', 'Post', 'Years_of_Experience', 'Degree_Held',
	'Research_Count', 'Publications_Count', 'Skills'}
	missing_faculty_columns = required_faculty_columns - set(faculty_df.columns)
	if missing_faculty_columns:
	return render_template('index.html',
	error=f"Error: The faculty CSV is missing the following columns: {', '.join(missing_faculty_columns)}")

	# Calculate grades
	faculty_df['Grade'] = faculty_df.apply(lambda row: calculate_grades(row, WEIGHTS), axis=1)

	# Get student counts from the form
	student_counts = {department: int(request.form.get(f'students_{department}', 0) or 0) for department in faculty_df['Department'].unique()}


	# Separate tables for each department
	for department in faculty_df['Department'].unique():
	department_data = faculty_df[faculty_df['Department'] == department]
	department_tables[department] = {
	'columns': department_data.columns.tolist(),
	'rows': department_data.values.tolist()
	}
	departments.append(department)

	graph_data = [
	{
	"title": "Count of Faculty by Department",
	"data": faculty_df['Department'].value_counts().reset_index(name='count'),
	"graph": lambda df: px.bar(df, x='Department', y='count', title="Count of Faculty by Department",
	labels={'Department': 'Department', 'count': 'Count'}),
	"query": "Provide insights into the distribution of faculty across departments based on this data."
	},
	{
	"title": "Students vs Faculty by Department",
	"data": pd.DataFrame({
	"Department": faculty_df['Department'].unique(),
	"Number_of_Students": [int(request.form.get(f'students_{dep}', 0)) for dep in
	faculty_df['Department'].unique()],
	"Number_of_Faculty": faculty_df['Department'].value_counts().values
	}),
	"graph": lambda df: px.bar(df, x='Department', y=['Number_of_Students', 'Number_of_Faculty'],
	barmode='group',
	title="Students vs Faculty by Department"),
	"query": "Analyze the relationship between the number of students and faculty by department based on this data."
	},
	{
	"title": "Post vs Skills",
	"data": faculty_df[['Post', 'Skills']].assign(
	Skills_Count=lambda x: x['Skills'].apply(lambda y: len(str(y).split(',')))),
	"graph": lambda df: px.scatter(df, x='Post', y='Skills_Count', title="Post vs Skills"),
	"query": "Explain the relationship between Post and Skills based on this data."
	},
	{
	"title": "Degree vs Publications",
	"data": faculty_df[['Degree_Held', 'Publications_Count']],
	"graph": lambda df: px.box(df, x='Degree_Held', y='Publications_Count',
	title="Degree vs Publications", color='Degree_Held'),
	"query": "Describe the distribution of publications by degree based on this data."
	},
	{
	"title": "Department-wise Faculty Count by Degree",
	"data": faculty_df.groupby(['Department', 'Degree_Held']).size().reset_index(name='Count'),
	"graph": lambda df: px.bar(df, x='Department', y='Count', color='Degree_Held', barmode='group',
	title="Department-wise Faculty Count by Degree"),
	"query": "What can we infer about the qualifications of faculty across departments from this data?"
	},
	{
	"title": "Experience Distribution by Degree",
	"data": faculty_df[['Degree_Held', 'Years_of_Experience']],
	"graph": lambda df: px.violin(df, x='Degree_Held', y='Years_of_Experience',
	title="Experience Distribution by Degree",
	color='Degree_Held'),
	"query": "Analyze the distribution of experience across different degree levels using this data."
	},
	{
	"title": "Research Count by Department",
	"data": faculty_df.groupby('Department')['Research_Count'].sum().reset_index(),
	"graph": lambda df: px.bar(df, x='Department', y='Research_Count',
	title="Research Count by Department"),
	"query": "What insights can be drawn about the research output of each department based on this data?"
	},
	{
	"title": "Publications Count by Department",
	"data": faculty_df.groupby('Department')['Publications_Count'].sum().reset_index(),
	"graph": lambda df: px.bar(df, x='Department', y='Publications_Count',
	title="Publications Count by Department"),
	"query": "Describe the publication trends across departments using this data."
	},
	{
	"title": "Skills Count by Department",
	"data": faculty_df.groupby('Department').apply(
	lambda x: x['Skills'].apply(lambda y: len(str(y).split(','))).sum()
	).reset_index(name='Skills_Count'),
	"graph": lambda df: px.bar(df, x='Department', y='Skills_Count',
	title="Skills Count by Department"),
	"query": "Explain the distribution of skills among faculty across different departments based on this data."
	},
	{
	"title": "Grades Distribution",
	"data": faculty_df[['Department', 'Grade']],
	"graph": lambda df: px.box(df, x='Department', y='Grade', title="Grades Distribution by Department",
	color='Department'),
	"query": "What insights can we infer from the grades distribution of faculty across departments?"
	},
	{
	"title": "Experience vs Publications",
	"data": faculty_df[['Years_of_Experience', 'Publications_Count']],
	"graph": lambda df: px.scatter(df, x='Years_of_Experience', y='Publications_Count',
	title="Experience vs Publications",
	labels={'Years_of_Experience': 'Years of Experience',
	'Publications_Count': 'Publications Count'}),
	"query": "Analyze the relationship between years of experience and the number of publications based on this data."
	},
	{
	"title": "Top Departments by Research",
	"data": faculty_df.groupby('Department')['Research_Count'].sum().reset_index().sort_values(
	by='Research_Count', ascending=False).head(5),
	"graph": lambda df: px.bar(df, x='Department', y='Research_Count',
	title="Top 5 Departments by Research Output"),
	"query": "Identify the top departments by research output and analyze their characteristics."
	}
	]

	for graph in graph_data:
	# Generate the graph
	graph_df = graph["data"]
	fig = graph["graph"](graph_df)
	plot_html = fig.to_html(full_html=False)
	plots[graph["title"]] = plot_html

	# Prepare the query for Gemini
	query = (
	f"{graph['query']}\n\n"
	"Data:\n"
	f"{graph_df.to_csv(index=False)}\n\n"
	"Provide a concise summary in 100 words, formatted without special characters like '*'. "
	"Use proper sentences and highlight key points using bold text."
	)

	# Use retry logic for Gemini API
	try:
	gemini_response = generate_with_retry(query)
	raw_text = gemini_response.replace('*', '').strip()

	# Truncate if necessary
	if len(raw_text) > 150:
	raw_text = raw_text[:147].rsplit(' ', 1)[0] + "..."
	gemini_insights[graph["title"]] = raw_text

	except Exception as e:
	gemini_insights[graph["title"]] = f"Error generating insight: {str(e)}"

	# Move all faculty_counts calculations inside the try block
	faculty_counts = faculty_df['Department'].value_counts().reset_index()
	faculty_counts.columns = ['Department', 'Number_of_Faculty']

	# Map number of students
	faculty_counts['Number_of_Students'] = faculty_counts['Department'].map(student_counts)

	# Calculate ideal numbers based on S (students), R=9 (1+2+6)
	faculty_counts['Total_Ideal_Faculty'] = (faculty_counts['Number_of_Students'] / IDEAL_RATIO).apply(
	lambda x: int(x) if x.is_integer() else int(x) + 1
	)

	# Role-specific ideal faculty counts
	faculty_counts['Ideal_Principal'] = 1 # Always 1
	faculty_counts['Ideal_Professor'] = (faculty_counts['Number_of_Students'] * 1 / (20 * 9)).apply(
	lambda x: int(x) if x.is_integer() else int(x) + 1
	)
	faculty_counts['Ideal_Associate_Professor'] = (faculty_counts['Number_of_Students'] * 2 / (20 * 9)).apply(
	lambda x: int(x) if x.is_integer() else int(x) + 1
	)
	faculty_counts['Ideal_Assistant_Professor'] = (faculty_counts['Number_of_Students'] * 6 / (20 * 9)).apply(
	lambda x: int(x) if x.is_integer() else int(x) + 1
	)

	# Calculate deficiencies for each role
	faculty_counts['Deficiency_Principal'] = faculty_counts['Ideal_Principal'] - faculty_df[
	faculty_df['Post'] == 'Principal'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)
	faculty_counts['Deficiency_Professor'] = faculty_counts['Ideal_Professor'] - faculty_df[
	faculty_df['Post'] == 'Professor'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)
	faculty_counts['Deficiency_Associate_Professor'] = faculty_counts['Ideal_Associate_Professor'] - faculty_df[
	faculty_df['Post'] == 'Associate Professor'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)
	faculty_counts['Deficiency_Assistant_Professor'] = faculty_counts['Ideal_Assistant_Professor'] - faculty_df[
	faculty_df['Post'] == 'Assistant Professor'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)

	# Overall deficiency
	faculty_counts['Meets_Ratio'] = (faculty_counts['Deficiency_Principal'] <= 0) & \
	(faculty_counts['Deficiency_Professor'] <= 0) & \
	(faculty_counts['Deficiency_Associate_Professor'] <= 0) & \
	(faculty_counts['Deficiency_Assistant_Professor'] <= 0)

	faculty_counts['Meets_Ratio'] = faculty_counts['Meets_Ratio'].apply(lambda x: "✔️" if x else "❌")

	# Prepare the final deficiency table
	deficiency_table = faculty_counts[[
	'Department', 'Number_of_Students', 'Number_of_Faculty',
	'Ideal_Principal', 'Ideal_Professor', 'Ideal_Associate_Professor', 'Ideal_Assistant_Professor',
	'Deficiency_Principal', 'Deficiency_Professor', 'Deficiency_Associate_Professor', 'Deficiency_Assistant_Professor',
	'Meets_Ratio']].to_html(classes="table table-bordered table-hover", index=False, escape=False)

	# Encode graded CSV
	csv_output = io.BytesIO()
	faculty_df.to_csv(csv_output, index=False)
	csv_output.seek(0)
	graded_csv = base64.b64encode(csv_output.getvalue()).decode()

	# Collect teaching evaluation responses
	teaching_responses = {
	'course_revision': request.form.get('course_revision', ''),
	'case_studies': request.form.get('case_studies', ''),
	'assessment_methods': request.form.getlist('assessment_methods') or [],
	'practical_percentage': int(request.form.get('practical_percentage', 0) or 0),
	'curriculum_relevance': int(request.form.get('curriculum_relevance', 0) or 0),
	'interactive_sessions': request.form.get('interactive_sessions', ''),
	'student_participation': int(request.form.get('student_participation', 0) or 0),
	'personalized_feedback': request.form.get('personalized_feedback', ''),
	'student_interest': request.form.get('student_interest', ''),
	'tech_usage': request.form.get('tech_usage', ''),
	'teaching_methods': request.form.getlist('teaching_methods') or [],
	'critical_thinking': request.form.get('critical_thinking', ''),
	'student_feedback': request.form.get('student_feedback', ''),
	'feedback_actions': request.form.get('feedback_actions', ''),
	'professional_development': int(request.form.get('professional_development', 0) or 0)
	}

	# Perform SWOT analysis
	swot_results = perform_swot_analysis(faculty_df, teaching_responses)

	except Exception as e:
	return render_template('index.html', error=f"An unexpected error occurred: {str(e)}")

	return render_template('index.html',
	plots=plots,
	graded_csv=graded_csv,
	department_tables=department_tables,
	departments=departments,
	deficiency_table=deficiency_table,
	gemini_insights=gemini_insights,
	swot_results=swot_results,
	teaching_responses=teaching_responses)


	if __name__ == '__main__':
	app.run(debug=True, port=5632)