Spaces:

pranit144
/

Institute_activity_anaylisis

Sleeping

File size: 17,944 Bytes

3cc6b13

from flask import Flask, render_template, request, flash, redirect, url_for
import matplotlib

matplotlib.use('Agg')
import matplotlib.pyplot as plt
import pandas as pd
import google.generativeai as genai
import os
import logging
from docx import Document
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from werkzeug.utils import secure_filename
import re
import ast
import json
from datetime import datetime

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = Flask(__name__)
app.secret_key = 'your-secret-key-here'  # Change this to a random secret key
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size

# Configure Gemini API - Replace with your actual API key
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY', 'AIzaSyBLcWuSj6N1bkhQsTF4kt3_hFh4ibH11pQ')
if GOOGLE_API_KEY and GOOGLE_API_KEY != 'your-api-key-here':
    try:
        genai.configure(api_key=GOOGLE_API_KEY)
        model = genai.GenerativeModel('gemini-2.0-flash-exp')
        logger.info("Gemini API configured successfully")
    except Exception as e:
        logger.error(f"Failed to configure Gemini API: {e}")
        model = None
else:
    logger.warning("Gemini API key not configured")
    model = None


def ensure_upload_folder():
    """Create upload folder if it doesn't exist."""
    try:
        if not os.path.exists(app.config['UPLOAD_FOLDER']):
            os.makedirs(app.config['UPLOAD_FOLDER'])
            logger.info(f"Created upload folder: {app.config['UPLOAD_FOLDER']}")
    except Exception as e:
        logger.error(f"Failed to create upload folder: {e}")
        raise


def extract_text_from_docx(file_path):
    """Extract text from a DOCX file."""
    try:
        doc = Document(file_path)
        full_text = []
        for paragraph in doc.paragraphs:
            if paragraph.text.strip():  # Only add non-empty paragraphs
                full_text.append(paragraph.text)

        # Also extract text from tables
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    if cell.text.strip():
                        full_text.append(cell.text)

        text = '\n'.join(full_text)
        logger.info(f"Extracted {len(text)} characters from document")
        return text
    except Exception as e:
        logger.error(f"Error extracting text from DOCX: {e}")
        raise


def extract_data_using_gemini(text):
    """Extract event data using Gemini AI."""
    if not model:
        logger.error("Gemini model not configured")
        return None

    prompt = """

    Extract the event counts from the following text. Look for data organized by academic years from 2018-2019 to 2022-2023.



    Find numbers for these categories:

    - Cultural competitions/events

    - Sports competitions/events  

    - Technical fest/Academic fest

    - Social activities/events

    - Any other events through Active clubs and forums



    Return ONLY a Python dictionary in this exact format:

    {

        '2022-2023': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},

        '2021-2022': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},

        '2020-2021': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},

        '2019-2020': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},

        '2018-2019': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B}

    }



    Replace X, Y, Z, A, B with the actual numbers from the text. If a number is not found, use 0.

    """

    try:
        # Debug: Look for patterns in text
        years = re.findall(r'(20\d{2}-20\d{2})', text)
        logger.info(f"Found years in text: {years}")

        # Generate response using Gemini
        response = model.generate_content(f"{text}\n\n{prompt}")
        response_text = response.text.strip()

        logger.info(f"Gemini response length: {len(response_text)}")

        # Clean the response
        if '```' in response_text:
            # Extract code block
            code_blocks = re.findall(r'```(?:python)?\s*(.*?)\s*```', response_text, re.DOTALL)
            if code_blocks:
                response_text = code_blocks[0].strip()

        # Remove any extra whitespace and comments
        response_text = re.sub(r'#.*$', '', response_text, flags=re.MULTILINE)
        response_text = response_text.strip()

        logger.info(f"Cleaned response: {response_text[:200]}...")

        # Parse the response
        try:
            data = ast.literal_eval(response_text)
        except (ValueError, SyntaxError):
            # Fallback to JSON parsing
            response_text = response_text.replace("'", '"')
            data = json.loads(response_text)

        # Validate data structure
        if not isinstance(data, dict):
            raise ValueError("Response is not a dictionary")

        # Ensure all expected years are present
        expected_years = ['2022-2023', '2021-2022', '2020-2021', '2019-2020', '2018-2019']
        for year in expected_years:
            if year not in data:
                logger.warning(f"Missing year {year}, adding with zeros")
                data[year] = {'Cultural': 0, 'Sports': 0, 'Technical': 0, 'Social': 0, 'Other': 0}

        # Ensure all categories are present for each year
        required_categories = ['Cultural', 'Sports', 'Technical', 'Social', 'Other']
        for year in data:
            for cat in required_categories:
                if cat not in data[year]:
                    logger.warning(f"Missing category {cat} in year {year}, setting to 0")
                    data[year][cat] = 0
                # Ensure values are integers
                try:
                    data[year][cat] = int(data[year][cat])
                except (ValueError, TypeError):
                    data[year][cat] = 0

        logger.info(f"Successfully extracted data: {data}")
        return data

    except Exception as e:
        logger.error(f"Error processing with Gemini: {e}")
        return None


def get_graph_insights(data, plot_type):
    """Generate insights and SWOT analysis for different plot types."""
    try:
        df = pd.DataFrame(data).T

        insights = {
            'main_insight': "",
            'swot': {
                'strengths': [],
                'weaknesses': [],
                'opportunities': [],
                'threats': []
            },
            'recommendations': []
        }

        if plot_type == 'bar':
            total_by_category = df.sum()
            max_category = total_by_category.idxmax()
            min_category = total_by_category.idxmin()
            avg_events = total_by_category.mean()

            insights[
                'main_insight'] = f"The most active category is {max_category} with {int(total_by_category[max_category])} total events, while {min_category} has the least with {int(total_by_category[min_category])} events."

            insights['swot']['strengths'] = [
                f"Strong performance in {max_category} events ({int(total_by_category[max_category])} total)",
                f"Diverse event portfolio across {len(total_by_category)} categories",
                f"Average of {avg_events:.1f} events per category shows balanced approach"
            ]

            insights['swot']['weaknesses'] = [
                f"Underperformance in {min_category} category",
                f"Significant gap between highest and lowest performing categories",
                "Potential resource allocation imbalances"
            ]

            insights['swot']['opportunities'] = [
                f"Growth potential in {min_category} category",
                "Cross-category collaboration possibilities",
                "Opportunity to standardize event quality"
            ]

            insights['swot']['threats'] = [
                "Over-reliance on dominant categories",
                "Resource competition between categories",
                "Sustainability challenges for high-volume categories"
            ]

            insights['recommendations'] = [
                f"Increase focus on {min_category} events",
                "Implement balanced resource allocation strategy",
                "Develop cross-category event initiatives"
            ]

        elif plot_type == 'pie':
            latest_year = '2022-2023'
            year_data = data[latest_year]
            total = sum(year_data.values())
            max_cat = max(year_data.items(), key=lambda x: x[1])
            min_cat = min(year_data.items(), key=lambda x: x[1])

            if total > 0:
                percentage = (max_cat[1] / total) * 100
                insights[
                    'main_insight'] = f"In {latest_year}, {max_cat[0]} events dominated with {max_cat[1]} events ({percentage:.1f}% of total)."
            else:
                insights['main_insight'] = f"No events recorded for {latest_year}."

        elif plot_type == 'line':
            if len(df) > 1:
                trend_direction = "increasing" if df.iloc[-1].mean() > df.iloc[0].mean() else "decreasing"
                growth_rate = ((df.iloc[-1].mean() - df.iloc[0].mean()) / df.iloc[0].mean() * 100) if df.iloc[
                                                                                                          0].mean() > 0 else 0
                insights[
                    'main_insight'] = f"Overall trend shows {trend_direction} pattern with {growth_rate:.1f}% change in average events."

        return insights

    except Exception as e:
        logger.error(f"Error generating insights: {e}")
        return {
            'main_insight': "Unable to generate insights for this visualization.",
            'swot': {'strengths': [], 'weaknesses': [], 'opportunities': [], 'threats': []},
            'recommendations': []
        }


def create_plots(data):
    """Create various plots and analyses from the data."""
    plots = {}

    try:
        df = pd.DataFrame(data).T

        # Color scheme for consistency
        colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']

        # 1. Bar Chart - Events by Category and Year
        fig1 = px.bar(
            df,
            barmode='group',
            title='Event Distribution Across Years and Categories',
            labels={'index': 'Year', 'value': 'Number of Events', 'variable': 'Category'},
            color_discrete_sequence=colors
        )
        fig1.update_layout(
            xaxis_title="Academic Year",
            yaxis_title="Number of Events",
            legend_title="Event Category",
            template="plotly_white"
        )
        plots['bar'] = {
            'plot': pio.to_html(fig1, full_html=False, div_id="bar-chart"),
            'insight': get_graph_insights(data, 'bar')
        }

        # 2. Pie Chart - Latest Year Distribution
        latest_year = '2022-2023'
        if latest_year in data:
            fig2 = px.pie(
                names=list(data[latest_year].keys()),
                values=list(data[latest_year].values()),
                title=f'Event Distribution for {latest_year}',
                color_discrete_sequence=colors
            )
            fig2.update_traces(textposition='inside', textinfo='percent+label')
            plots['pie'] = {
                'plot': pio.to_html(fig2, full_html=False, div_id="pie-chart"),
                'insight': get_graph_insights(data, 'pie')
            }

        # 3. Line Chart - Trends Over Time
        fig3 = px.line(
            df,
            markers=True,
            title='Event Trends Over Years',
            labels={'index': 'Year', 'value': 'Number of Events', 'variable': 'Category'},
            color_discrete_sequence=colors
        )
        fig3.update_layout(
            xaxis_title="Academic Year",
            yaxis_title="Number of Events",
            legend_title="Event Category",
            template="plotly_white"
        )
        plots['line'] = {
            'plot': pio.to_html(fig3, full_html=False, div_id="line-chart"),
            'insight': get_graph_insights(data, 'line')
        }

        # 4. Stacked Area Chart
        fig4 = px.area(
            df,
            title='Cumulative Event Distribution Over Years',
            labels={'index': 'Year', 'value': 'Number of Events', 'variable': 'Category'},
            color_discrete_sequence=colors
        )
        fig4.update_layout(
            xaxis_title="Academic Year",
            yaxis_title="Number of Events",
            legend_title="Event Category",
            template="plotly_white"
        )
        plots['area'] = {
            'plot': pio.to_html(fig4, full_html=False, div_id="area-chart"),
            'insight': get_graph_insights(data, 'area')
        }

        # 5. Statistical Summary
        total_events = df.sum().sum()
        avg_events_per_year = df.sum(axis=1).mean()
        most_active_year = df.sum(axis=1).idxmax()
        most_common_category = df.sum().idxmax()

        stats = {
            'total_events': int(total_events),
            'avg_events_per_year': round(avg_events_per_year, 1),
            'most_active_year': most_active_year,
            'most_common_category': most_common_category,
            'category_totals': df.sum().to_dict(),
            'yearly_totals': df.sum(axis=1).to_dict()
        }

        plots['stats'] = stats

        logger.info("Successfully created all plots")
        return plots

    except Exception as e:
        logger.error(f"Error creating plots: {e}")
        return None


def allowed_file(filename):
    """Check if the uploaded file is allowed."""
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ['docx']


@app.route('/', methods=['GET', 'POST'])
def index():
    """Main route for the application."""
    plots = None

    if request.method == 'POST':
        # Check if file is uploaded
        if 'document' not in request.files:
            flash('No file uploaded. Please select a DOCX file.', 'error')
            return redirect(request.url)

        file = request.files['document']

        if file.filename == '':
            flash('No file selected. Please choose a DOCX file.', 'error')
            return redirect(request.url)

        if not allowed_file(file.filename):
            flash('Invalid file type. Please upload a DOCX file.', 'error')
            return redirect(request.url)

        if file:
            try:
                ensure_upload_folder()

                # Secure the filename
                filename = secure_filename(file.filename)
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                filename = f"{timestamp}_{filename}"
                file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

                # Save the file
                file.save(file_path)
                logger.info(f"File saved: {file_path}")

                # Extract text
                text = extract_text_from_docx(file_path)

                if not text.strip():
                    flash('The uploaded document appears to be empty. Please check the file.', 'error')
                    return redirect(request.url)

                # Extract data using Gemini
                data = extract_data_using_gemini(text)

                if data:
                    # Create plots
                    plots = create_plots(data)
                    if plots:
                        flash('Document processed successfully! 🎉', 'success')
                    else:
                        flash('Error creating visualizations. Please try again.', 'error')
                else:
                    flash(
                        'Could not extract event data from the document. Please ensure the document contains event statistics in the expected format.',
                        'error')

                # Clean up uploaded file
                try:
                    os.remove(file_path)
                    logger.info(f"Cleaned up file: {file_path}")
                except Exception as e:
                    logger.warning(f"Could not remove file {file_path}: {e}")

            except Exception as e:
                logger.error(f"Error processing document: {e}")
                flash(f'Error processing document: {str(e)}', 'error')

    return render_template('index.html', plots=plots)


@app.errorhandler(413)
def too_large(e):
    """Handle file too large error."""
    flash("File too large. Please upload a file smaller than 16MB.", 'error')
    return redirect(request.url)


@app.errorhandler(404)
def not_found(e):
    """Handle 404 errors."""
    return render_template('404.html'), 404


@app.errorhandler(500)
def internal_error(e):
    """Handle internal server errors."""
    logger.error(f"Internal server error: {e}")
    flash('An internal error occurred. Please try again.', 'error')
    return redirect(url_for('index'))


if __name__ == '__main__':
    print("🚀 Starting Event Analytics Application...")
    print("📊 Upload a DOCX file to analyze event data")
    print("🔗 Access the application at: http://localhost:5001")

    if not model:
        print("⚠️  Warning: Gemini API not configured. Please set GOOGLE_API_KEY environment variable.")

    app.run(debug=True, port=5001, host='0.0.0.0')