Spaces:

sikeaditya
/

Faculty_Data_Analysis

Runtime error

File size: 23,340 Bytes

f94dafd

from datetime import time
import os

from flask import Flask, render_template, request
import pandas as pd
import plotly.express as px
import io
import base64
import google.generativeai as genai
from collections import defaultdict
import numpy as np
import google.generativeai as genai

app = Flask(__name__)

# Configure Gemini API
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
model = genai.GenerativeModel("gemini-1.5-flash")

WEIGHTS = {
    'experience': 20,
    'degree': 20,
    'research': 20,
    'publication': 20,
    'skills': 20
}

# Ideal student-to-faculty ratio
IDEAL_RATIO = 20


def calculate_grades(row, weights):
    """Function to calculate grades."""
    experience_grade = normalize(row['Years_of_Experience'], 0, 35) * weights['experience']
    degree_grade = (1 if row['Degree_Held'] in ['PhD', 'MPhil'] else 0.75) * weights['degree']
    research_grade = normalize(row['Research_Count'], 0, 20) * weights['research']
    publication_grade = normalize(row['Publications_Count'], 0, 50) * weights['publication']
    skills_grade = normalize(len(str(row['Skills']).split(',')), 0, 10) * weights['skills']

    total_grade = (experience_grade + degree_grade + research_grade + publication_grade + skills_grade) / sum(weights.values())

    if row['Publications_Count'] > 30:
        total_grade += 0.05
    if row['Years_of_Experience'] < 2:
        total_grade -= 0.05

    return min(1.0, max(0.0, total_grade))


def normalize(value, min_value, max_value):
    """Function to normalize values."""
    return (value - min_value) / (max_value - min_value) if max_value - min_value != 0 else 0

def generate_with_retry(query, retries=3, delay=2):
    for attempt in range(retries):
        try:
            gemini_response = model.generate_content(query)
            return gemini_response.text
        except Exception as e:
            if "429" in str(e) and attempt < retries - 1:
                time.sleep(delay * (2 ** attempt))  # Correctly use time.sleep
            else:
                raise e


def perform_swot_analysis(faculty_df, teaching_responses):
    """Enhanced SWOT analysis based on faculty data and teaching responses."""
    
    strengths = []
    weaknesses = []
    opportunities = []
    threats = []
    
    # Analyze faculty data
    avg_experience = faculty_df['Years_of_Experience'].mean()
    avg_publications = faculty_df['Publications_Count'].mean()
    avg_research = faculty_df['Research_Count'].mean()
    phd_count = len(faculty_df[faculty_df['Degree_Held'] == 'PhD'])
    phd_percentage = (phd_count / len(faculty_df)) * 100
    
    # Faculty Qualifications Analysis
    if phd_percentage > 60:
        strengths.append("High percentage of PhD holders ({}%)".format(round(phd_percentage)))
    elif phd_percentage < 30:
        weaknesses.append("Low percentage of PhD holders ({}%)".format(round(phd_percentage)))
        opportunities.append("Encourage faculty to pursue higher education")

    if avg_experience > 10:
        strengths.append("Strong experienced faculty base (avg. {} years)".format(round(avg_experience)))
    elif avg_experience < 5:
        weaknesses.append("Relatively inexperienced faculty (avg. {} years)".format(round(avg_experience)))
        opportunities.append("Implement mentorship programs")
        
    # Research Output Analysis
    if avg_publications > 20:
        strengths.append("High research output through publications")
    elif avg_publications < 10:
        weaknesses.append("Low publication count")
        opportunities.append("Create research incentives")
        
    # Teaching Methodology Analysis
    if teaching_responses:
        # Course Design Analysis
        course_revision = teaching_responses.get('course_revision', '')
        if course_revision == 'Annually':
            strengths.append("Regular curriculum updates")
        elif course_revision == 'Rarely':
            weaknesses.append("Infrequent curriculum revision")
            threats.append("Risk of outdated curriculum")

        # Technology Integration
        tech_usage = teaching_responses.get('tech_usage', '')
        if tech_usage == 'Yes':
            strengths.append("Strong technology integration in teaching")
        else:
            weaknesses.append("Limited use of technology in teaching")
            opportunities.append("Implement modern teaching technologies")

        # Assessment Methods
        assessment_methods = teaching_responses.get('assessment_methods', [])
        if len(assessment_methods) >= 3:
            strengths.append("Diverse assessment methods")
        elif len(assessment_methods) < 2:
            weaknesses.append("Limited assessment variety")
            opportunities.append("Diversify assessment methods")

        # Practical Learning
        practical_percentage = int(teaching_responses.get('practical_percentage', 0))
        if practical_percentage > 50:
            strengths.append("Strong practical learning focus")
        elif practical_percentage < 30:
            weaknesses.append("Limited practical exposure")
            opportunities.append("Increase hands-on learning activities")

        # Student Engagement
        student_participation = int(teaching_responses.get('student_participation', 0))
        if student_participation > 75:
            strengths.append("High student engagement")
        elif student_participation < 50:
            weaknesses.append("Low student participation")
            opportunities.append("Implement engagement strategies")

        # Teaching Methods
        teaching_methods = teaching_responses.get('teaching_methods', [])
        if len(teaching_methods) >= 3:
            strengths.append("Diverse teaching methodologies")
        elif len(teaching_methods) < 2:
            weaknesses.append("Limited teaching methods")
            opportunities.append("Expand teaching methodology")

        # Professional Development
        prof_dev = int(teaching_responses.get('professional_development', 0))
        if prof_dev > 3:
            strengths.append("Strong commitment to professional development")
        elif prof_dev < 2:
            weaknesses.append("Limited professional development")
            opportunities.append("Increase faculty development programs")

        # Industry Relevance
        curriculum_relevance = int(teaching_responses.get('curriculum_relevance', 0))
        if curriculum_relevance >= 8:
            strengths.append("High industry relevance")
        elif curriculum_relevance <= 5:
            weaknesses.append("Low industry alignment")
            threats.append("Risk of skill-industry mismatch")

    # Add general threats
    threats.extend([
        "Rapid technological changes in education",
        "Increasing competition from online education",
        "Changing student learning preferences"
    ])

    # Add general opportunities
    opportunities.extend([
        "Integration of emerging technologies",
        "Industry collaboration potential",
        "International academic partnerships"
    ])
            
    return {
        'strengths': strengths,
        'weaknesses': weaknesses,
        'opportunities': opportunities,
        'threats': threats
    }

@app.route('/', methods=['GET', 'POST'])
def index():
    plots = {}
    graded_csv = None
    department_tables = {}
    deficiency_table = None
    departments = []
    swot_results=None
    teaching_responses=None
    gemini_insights = {}

    if request.method == 'POST':
        if 'faculty_file' not in request.files:
            return render_template('index.html', error="Error: Faculty file must be uploaded.")

        faculty_file = request.files['faculty_file']

        if faculty_file.filename == '':
            return render_template('index.html', error="Error: Faculty file must be selected for upload.")

        try:
            # Load faculty data
            faculty_df = pd.read_csv(faculty_file)

            # Validate columns in faculty data
            required_faculty_columns = {'Name', 'Department', 'Post', 'Years_of_Experience', 'Degree_Held',
                                        'Research_Count', 'Publications_Count', 'Skills'}
            missing_faculty_columns = required_faculty_columns - set(faculty_df.columns)
            if missing_faculty_columns:
                return render_template('index.html',
                                       error=f"Error: The faculty CSV is missing the following columns: {', '.join(missing_faculty_columns)}")

            # Calculate grades
            faculty_df['Grade'] = faculty_df.apply(lambda row: calculate_grades(row, WEIGHTS), axis=1)

            # Get student counts from the form
            student_counts = {department: int(request.form.get(f'students_{department}', 0) or 0) for department in faculty_df['Department'].unique()}


            # Separate tables for each department
            for department in faculty_df['Department'].unique():
                department_data = faculty_df[faculty_df['Department'] == department]
                department_tables[department] = {
                    'columns': department_data.columns.tolist(),
                    'rows': department_data.values.tolist()
                }
                departments.append(department)

            graph_data = [
                {
                    "title": "Count of Faculty by Department",
                    "data": faculty_df['Department'].value_counts().reset_index(name='count'),
                    "graph": lambda df: px.bar(df, x='Department', y='count', title="Count of Faculty by Department",
                                               labels={'Department': 'Department', 'count': 'Count'}),
                    "query": "Provide insights into the distribution of faculty across departments based on this data."
                },
                {
                    "title": "Students vs Faculty by Department",
                    "data": pd.DataFrame({
                        "Department": faculty_df['Department'].unique(),
                        "Number_of_Students": [int(request.form.get(f'students_{dep}', 0)) for dep in
                                               faculty_df['Department'].unique()],
                        "Number_of_Faculty": faculty_df['Department'].value_counts().values
                    }),
                    "graph": lambda df: px.bar(df, x='Department', y=['Number_of_Students', 'Number_of_Faculty'],
                                               barmode='group',
                                               title="Students vs Faculty by Department"),
                    "query": "Analyze the relationship between the number of students and faculty by department based on this data."
                },
                {
                    "title": "Post vs Skills",
                    "data": faculty_df[['Post', 'Skills']].assign(
                        Skills_Count=lambda x: x['Skills'].apply(lambda y: len(str(y).split(',')))),
                    "graph": lambda df: px.scatter(df, x='Post', y='Skills_Count', title="Post vs Skills"),
                    "query": "Explain the relationship between Post and Skills based on this data."
                },
                {
                    "title": "Degree vs Publications",
                    "data": faculty_df[['Degree_Held', 'Publications_Count']],
                    "graph": lambda df: px.box(df, x='Degree_Held', y='Publications_Count',
                                               title="Degree vs Publications", color='Degree_Held'),
                    "query": "Describe the distribution of publications by degree based on this data."
                },
                {
                    "title": "Department-wise Faculty Count by Degree",
                    "data": faculty_df.groupby(['Department', 'Degree_Held']).size().reset_index(name='Count'),
                    "graph": lambda df: px.bar(df, x='Department', y='Count', color='Degree_Held', barmode='group',
                                               title="Department-wise Faculty Count by Degree"),
                    "query": "What can we infer about the qualifications of faculty across departments from this data?"
                },
                {
                    "title": "Experience Distribution by Degree",
                    "data": faculty_df[['Degree_Held', 'Years_of_Experience']],
                    "graph": lambda df: px.violin(df, x='Degree_Held', y='Years_of_Experience',
                                                  title="Experience Distribution by Degree",
                                                  color='Degree_Held'),
                    "query": "Analyze the distribution of experience across different degree levels using this data."
                },
                {
                    "title": "Research Count by Department",
                    "data": faculty_df.groupby('Department')['Research_Count'].sum().reset_index(),
                    "graph": lambda df: px.bar(df, x='Department', y='Research_Count',
                                               title="Research Count by Department"),
                    "query": "What insights can be drawn about the research output of each department based on this data?"
                },
                {
                    "title": "Publications Count by Department",
                    "data": faculty_df.groupby('Department')['Publications_Count'].sum().reset_index(),
                    "graph": lambda df: px.bar(df, x='Department', y='Publications_Count',
                                               title="Publications Count by Department"),
                    "query": "Describe the publication trends across departments using this data."
                },
                {
                    "title": "Skills Count by Department",
                    "data": faculty_df.groupby('Department').apply(
                        lambda x: x['Skills'].apply(lambda y: len(str(y).split(','))).sum()
                    ).reset_index(name='Skills_Count'),
                    "graph": lambda df: px.bar(df, x='Department', y='Skills_Count',
                                               title="Skills Count by Department"),
                    "query": "Explain the distribution of skills among faculty across different departments based on this data."
                },
                {
                    "title": "Grades Distribution",
                    "data": faculty_df[['Department', 'Grade']],
                    "graph": lambda df: px.box(df, x='Department', y='Grade', title="Grades Distribution by Department",
                                               color='Department'),
                    "query": "What insights can we infer from the grades distribution of faculty across departments?"
                },
                {
                    "title": "Experience vs Publications",
                    "data": faculty_df[['Years_of_Experience', 'Publications_Count']],
                    "graph": lambda df: px.scatter(df, x='Years_of_Experience', y='Publications_Count',
                                                   title="Experience vs Publications",
                                                   labels={'Years_of_Experience': 'Years of Experience',
                                                           'Publications_Count': 'Publications Count'}),
                    "query": "Analyze the relationship between years of experience and the number of publications based on this data."
                },
                {
                    "title": "Top Departments by Research",
                    "data": faculty_df.groupby('Department')['Research_Count'].sum().reset_index().sort_values(
                        by='Research_Count', ascending=False).head(5),
                    "graph": lambda df: px.bar(df, x='Department', y='Research_Count',
                                               title="Top 5 Departments by Research Output"),
                    "query": "Identify the top departments by research output and analyze their characteristics."
                }
            ]

            for graph in graph_data:
                # Generate the graph
                graph_df = graph["data"]
                fig = graph["graph"](graph_df)
                plot_html = fig.to_html(full_html=False)
                plots[graph["title"]] = plot_html

                # Prepare the query for Gemini
                query = (
                    f"{graph['query']}\n\n"
                    "Data:\n"
                    f"{graph_df.to_csv(index=False)}\n\n"
                    "Provide a concise summary in 100 words, formatted without special characters like '*'. "
                    "Use proper sentences and highlight key points using **bold text**."
                )

                # Use retry logic for Gemini API
                try:
                    gemini_response = generate_with_retry(query)
                    raw_text = gemini_response.replace('*', '').strip()

                    # Truncate if necessary
                    if len(raw_text) > 150:
                        raw_text = raw_text[:147].rsplit(' ', 1)[0] + "..."
                    gemini_insights[graph["title"]] = raw_text

                except Exception as e:
                    gemini_insights[graph["title"]] = f"Error generating insight: {str(e)}"

            # Move all faculty_counts calculations inside the try block
            faculty_counts = faculty_df['Department'].value_counts().reset_index()
            faculty_counts.columns = ['Department', 'Number_of_Faculty']

            # Map number of students
            faculty_counts['Number_of_Students'] = faculty_counts['Department'].map(student_counts)

            # Calculate ideal numbers based on S (students), R=9 (1+2+6)
            faculty_counts['Total_Ideal_Faculty'] = (faculty_counts['Number_of_Students'] / IDEAL_RATIO).apply(
                lambda x: int(x) if x.is_integer() else int(x) + 1
            )

            # Role-specific ideal faculty counts
            faculty_counts['Ideal_Principal'] = 1  # Always 1
            faculty_counts['Ideal_Professor'] = (faculty_counts['Number_of_Students'] * 1 / (20 * 9)).apply(
                lambda x: int(x) if x.is_integer() else int(x) + 1
            )
            faculty_counts['Ideal_Associate_Professor'] = (faculty_counts['Number_of_Students'] * 2 / (20 * 9)).apply(
                lambda x: int(x) if x.is_integer() else int(x) + 1
            )
            faculty_counts['Ideal_Assistant_Professor'] = (faculty_counts['Number_of_Students'] * 6 / (20 * 9)).apply(
                lambda x: int(x) if x.is_integer() else int(x) + 1
            )

            # Calculate deficiencies for each role
            faculty_counts['Deficiency_Principal'] = faculty_counts['Ideal_Principal'] - faculty_df[
                faculty_df['Post'] == 'Principal'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)
            faculty_counts['Deficiency_Professor'] = faculty_counts['Ideal_Professor'] - faculty_df[
                faculty_df['Post'] == 'Professor'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)
            faculty_counts['Deficiency_Associate_Professor'] = faculty_counts['Ideal_Associate_Professor'] - faculty_df[
                faculty_df['Post'] == 'Associate Professor'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)
            faculty_counts['Deficiency_Assistant_Professor'] = faculty_counts['Ideal_Assistant_Professor'] - faculty_df[
                faculty_df['Post'] == 'Assistant Professor'].groupby('Department')['Post'].count().reindex(faculty_counts['Department']).fillna(0).astype(int)

            # Overall deficiency
            faculty_counts['Meets_Ratio'] = (faculty_counts['Deficiency_Principal'] <= 0) & \
                                            (faculty_counts['Deficiency_Professor'] <= 0) & \
                                            (faculty_counts['Deficiency_Associate_Professor'] <= 0) & \
                                            (faculty_counts['Deficiency_Assistant_Professor'] <= 0)

            faculty_counts['Meets_Ratio'] = faculty_counts['Meets_Ratio'].apply(lambda x: "✔️" if x else "❌")

            # Prepare the final deficiency table
            deficiency_table = faculty_counts[[
                'Department', 'Number_of_Students', 'Number_of_Faculty',
                'Ideal_Principal', 'Ideal_Professor', 'Ideal_Associate_Professor', 'Ideal_Assistant_Professor',
                'Deficiency_Principal', 'Deficiency_Professor', 'Deficiency_Associate_Professor', 'Deficiency_Assistant_Professor',
                'Meets_Ratio']].to_html(classes="table table-bordered table-hover", index=False, escape=False)

            # Encode graded CSV
            csv_output = io.BytesIO()
            faculty_df.to_csv(csv_output, index=False)
            csv_output.seek(0)
            graded_csv = base64.b64encode(csv_output.getvalue()).decode()

            # Collect teaching evaluation responses
            teaching_responses = {
                'course_revision': request.form.get('course_revision', ''),
                'case_studies': request.form.get('case_studies', ''),
                'assessment_methods': request.form.getlist('assessment_methods') or [],
                'practical_percentage': int(request.form.get('practical_percentage', 0) or 0),
                'curriculum_relevance': int(request.form.get('curriculum_relevance', 0) or 0),
                'interactive_sessions': request.form.get('interactive_sessions', ''),
                'student_participation': int(request.form.get('student_participation', 0) or 0),
                'personalized_feedback': request.form.get('personalized_feedback', ''),
                'student_interest': request.form.get('student_interest', ''),
                'tech_usage': request.form.get('tech_usage', ''),
                'teaching_methods': request.form.getlist('teaching_methods') or [],
                'critical_thinking': request.form.get('critical_thinking', ''),
                'student_feedback': request.form.get('student_feedback', ''),
                'feedback_actions': request.form.get('feedback_actions', ''),
                'professional_development': int(request.form.get('professional_development', 0) or 0)
            }

            # Perform SWOT analysis
            swot_results = perform_swot_analysis(faculty_df, teaching_responses)

        except Exception as e:
            return render_template('index.html', error=f"An unexpected error occurred: {str(e)}")

    return render_template('index.html', 
                        plots=plots, 
                        graded_csv=graded_csv, 
                        department_tables=department_tables,
                        departments=departments, 
                        deficiency_table=deficiency_table, 
                        gemini_insights=gemini_insights,
                        swot_results=swot_results,
                        teaching_responses=teaching_responses)


if __name__ == '__main__':
    app.run(debug=True, port=5632)