|
from flask import Flask, render_template, request, flash, redirect, url_for
|
|
import matplotlib
|
|
|
|
matplotlib.use('Agg')
|
|
import matplotlib.pyplot as plt
|
|
import pandas as pd
|
|
import google.generativeai as genai
|
|
import os
|
|
import logging
|
|
from docx import Document
|
|
import plotly.express as px
|
|
import plotly.graph_objects as go
|
|
import plotly.io as pio
|
|
from werkzeug.utils import secure_filename
|
|
import re
|
|
import ast
|
|
import json
|
|
from datetime import datetime
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
app = Flask(__name__)
|
|
app.secret_key = 'your-secret-key-here'
|
|
app.config['UPLOAD_FOLDER'] = 'uploads'
|
|
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
|
|
|
|
|
|
GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY', 'AIzaSyBLcWuSj6N1bkhQsTF4kt3_hFh4ibH11pQ')
|
|
if GOOGLE_API_KEY and GOOGLE_API_KEY != 'your-api-key-here':
|
|
try:
|
|
genai.configure(api_key=GOOGLE_API_KEY)
|
|
model = genai.GenerativeModel('gemini-2.0-flash-exp')
|
|
logger.info("Gemini API configured successfully")
|
|
except Exception as e:
|
|
logger.error(f"Failed to configure Gemini API: {e}")
|
|
model = None
|
|
else:
|
|
logger.warning("Gemini API key not configured")
|
|
model = None
|
|
|
|
|
|
def ensure_upload_folder():
|
|
"""Create upload folder if it doesn't exist."""
|
|
try:
|
|
if not os.path.exists(app.config['UPLOAD_FOLDER']):
|
|
os.makedirs(app.config['UPLOAD_FOLDER'])
|
|
logger.info(f"Created upload folder: {app.config['UPLOAD_FOLDER']}")
|
|
except Exception as e:
|
|
logger.error(f"Failed to create upload folder: {e}")
|
|
raise
|
|
|
|
|
|
def extract_text_from_docx(file_path):
|
|
"""Extract text from a DOCX file."""
|
|
try:
|
|
doc = Document(file_path)
|
|
full_text = []
|
|
for paragraph in doc.paragraphs:
|
|
if paragraph.text.strip():
|
|
full_text.append(paragraph.text)
|
|
|
|
|
|
for table in doc.tables:
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
if cell.text.strip():
|
|
full_text.append(cell.text)
|
|
|
|
text = '\n'.join(full_text)
|
|
logger.info(f"Extracted {len(text)} characters from document")
|
|
return text
|
|
except Exception as e:
|
|
logger.error(f"Error extracting text from DOCX: {e}")
|
|
raise
|
|
|
|
|
|
def extract_data_using_gemini(text):
|
|
"""Extract event data using Gemini AI."""
|
|
if not model:
|
|
logger.error("Gemini model not configured")
|
|
return None
|
|
|
|
prompt = """
|
|
Extract the event counts from the following text. Look for data organized by academic years from 2018-2019 to 2022-2023.
|
|
|
|
Find numbers for these categories:
|
|
- Cultural competitions/events
|
|
- Sports competitions/events
|
|
- Technical fest/Academic fest
|
|
- Social activities/events
|
|
- Any other events through Active clubs and forums
|
|
|
|
Return ONLY a Python dictionary in this exact format:
|
|
{
|
|
'2022-2023': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},
|
|
'2021-2022': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},
|
|
'2020-2021': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},
|
|
'2019-2020': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B},
|
|
'2018-2019': {'Cultural': X, 'Sports': Y, 'Technical': Z, 'Social': A, 'Other': B}
|
|
}
|
|
|
|
Replace X, Y, Z, A, B with the actual numbers from the text. If a number is not found, use 0.
|
|
"""
|
|
|
|
try:
|
|
|
|
years = re.findall(r'(20\d{2}-20\d{2})', text)
|
|
logger.info(f"Found years in text: {years}")
|
|
|
|
|
|
response = model.generate_content(f"{text}\n\n{prompt}")
|
|
response_text = response.text.strip()
|
|
|
|
logger.info(f"Gemini response length: {len(response_text)}")
|
|
|
|
|
|
if '```' in response_text:
|
|
|
|
code_blocks = re.findall(r'```(?:python)?\s*(.*?)\s*```', response_text, re.DOTALL)
|
|
if code_blocks:
|
|
response_text = code_blocks[0].strip()
|
|
|
|
|
|
response_text = re.sub(r'#.*$', '', response_text, flags=re.MULTILINE)
|
|
response_text = response_text.strip()
|
|
|
|
logger.info(f"Cleaned response: {response_text[:200]}...")
|
|
|
|
|
|
try:
|
|
data = ast.literal_eval(response_text)
|
|
except (ValueError, SyntaxError):
|
|
|
|
response_text = response_text.replace("'", '"')
|
|
data = json.loads(response_text)
|
|
|
|
|
|
if not isinstance(data, dict):
|
|
raise ValueError("Response is not a dictionary")
|
|
|
|
|
|
expected_years = ['2022-2023', '2021-2022', '2020-2021', '2019-2020', '2018-2019']
|
|
for year in expected_years:
|
|
if year not in data:
|
|
logger.warning(f"Missing year {year}, adding with zeros")
|
|
data[year] = {'Cultural': 0, 'Sports': 0, 'Technical': 0, 'Social': 0, 'Other': 0}
|
|
|
|
|
|
required_categories = ['Cultural', 'Sports', 'Technical', 'Social', 'Other']
|
|
for year in data:
|
|
for cat in required_categories:
|
|
if cat not in data[year]:
|
|
logger.warning(f"Missing category {cat} in year {year}, setting to 0")
|
|
data[year][cat] = 0
|
|
|
|
try:
|
|
data[year][cat] = int(data[year][cat])
|
|
except (ValueError, TypeError):
|
|
data[year][cat] = 0
|
|
|
|
logger.info(f"Successfully extracted data: {data}")
|
|
return data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing with Gemini: {e}")
|
|
return None
|
|
|
|
|
|
def get_graph_insights(data, plot_type):
|
|
"""Generate insights and SWOT analysis for different plot types."""
|
|
try:
|
|
df = pd.DataFrame(data).T
|
|
|
|
insights = {
|
|
'main_insight': "",
|
|
'swot': {
|
|
'strengths': [],
|
|
'weaknesses': [],
|
|
'opportunities': [],
|
|
'threats': []
|
|
},
|
|
'recommendations': []
|
|
}
|
|
|
|
if plot_type == 'bar':
|
|
total_by_category = df.sum()
|
|
max_category = total_by_category.idxmax()
|
|
min_category = total_by_category.idxmin()
|
|
avg_events = total_by_category.mean()
|
|
|
|
insights[
|
|
'main_insight'] = f"The most active category is {max_category} with {int(total_by_category[max_category])} total events, while {min_category} has the least with {int(total_by_category[min_category])} events."
|
|
|
|
insights['swot']['strengths'] = [
|
|
f"Strong performance in {max_category} events ({int(total_by_category[max_category])} total)",
|
|
f"Diverse event portfolio across {len(total_by_category)} categories",
|
|
f"Average of {avg_events:.1f} events per category shows balanced approach"
|
|
]
|
|
|
|
insights['swot']['weaknesses'] = [
|
|
f"Underperformance in {min_category} category",
|
|
f"Significant gap between highest and lowest performing categories",
|
|
"Potential resource allocation imbalances"
|
|
]
|
|
|
|
insights['swot']['opportunities'] = [
|
|
f"Growth potential in {min_category} category",
|
|
"Cross-category collaboration possibilities",
|
|
"Opportunity to standardize event quality"
|
|
]
|
|
|
|
insights['swot']['threats'] = [
|
|
"Over-reliance on dominant categories",
|
|
"Resource competition between categories",
|
|
"Sustainability challenges for high-volume categories"
|
|
]
|
|
|
|
insights['recommendations'] = [
|
|
f"Increase focus on {min_category} events",
|
|
"Implement balanced resource allocation strategy",
|
|
"Develop cross-category event initiatives"
|
|
]
|
|
|
|
elif plot_type == 'pie':
|
|
latest_year = '2022-2023'
|
|
year_data = data[latest_year]
|
|
total = sum(year_data.values())
|
|
max_cat = max(year_data.items(), key=lambda x: x[1])
|
|
min_cat = min(year_data.items(), key=lambda x: x[1])
|
|
|
|
if total > 0:
|
|
percentage = (max_cat[1] / total) * 100
|
|
insights[
|
|
'main_insight'] = f"In {latest_year}, {max_cat[0]} events dominated with {max_cat[1]} events ({percentage:.1f}% of total)."
|
|
else:
|
|
insights['main_insight'] = f"No events recorded for {latest_year}."
|
|
|
|
elif plot_type == 'line':
|
|
if len(df) > 1:
|
|
trend_direction = "increasing" if df.iloc[-1].mean() > df.iloc[0].mean() else "decreasing"
|
|
growth_rate = ((df.iloc[-1].mean() - df.iloc[0].mean()) / df.iloc[0].mean() * 100) if df.iloc[
|
|
0].mean() > 0 else 0
|
|
insights[
|
|
'main_insight'] = f"Overall trend shows {trend_direction} pattern with {growth_rate:.1f}% change in average events."
|
|
|
|
return insights
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating insights: {e}")
|
|
return {
|
|
'main_insight': "Unable to generate insights for this visualization.",
|
|
'swot': {'strengths': [], 'weaknesses': [], 'opportunities': [], 'threats': []},
|
|
'recommendations': []
|
|
}
|
|
|
|
|
|
def create_plots(data):
|
|
"""Create various plots and analyses from the data."""
|
|
plots = {}
|
|
|
|
try:
|
|
df = pd.DataFrame(data).T
|
|
|
|
|
|
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
|
|
|
|
|
|
fig1 = px.bar(
|
|
df,
|
|
barmode='group',
|
|
title='Event Distribution Across Years and Categories',
|
|
labels={'index': 'Year', 'value': 'Number of Events', 'variable': 'Category'},
|
|
color_discrete_sequence=colors
|
|
)
|
|
fig1.update_layout(
|
|
xaxis_title="Academic Year",
|
|
yaxis_title="Number of Events",
|
|
legend_title="Event Category",
|
|
template="plotly_white"
|
|
)
|
|
plots['bar'] = {
|
|
'plot': pio.to_html(fig1, full_html=False, div_id="bar-chart"),
|
|
'insight': get_graph_insights(data, 'bar')
|
|
}
|
|
|
|
|
|
latest_year = '2022-2023'
|
|
if latest_year in data:
|
|
fig2 = px.pie(
|
|
names=list(data[latest_year].keys()),
|
|
values=list(data[latest_year].values()),
|
|
title=f'Event Distribution for {latest_year}',
|
|
color_discrete_sequence=colors
|
|
)
|
|
fig2.update_traces(textposition='inside', textinfo='percent+label')
|
|
plots['pie'] = {
|
|
'plot': pio.to_html(fig2, full_html=False, div_id="pie-chart"),
|
|
'insight': get_graph_insights(data, 'pie')
|
|
}
|
|
|
|
|
|
fig3 = px.line(
|
|
df,
|
|
markers=True,
|
|
title='Event Trends Over Years',
|
|
labels={'index': 'Year', 'value': 'Number of Events', 'variable': 'Category'},
|
|
color_discrete_sequence=colors
|
|
)
|
|
fig3.update_layout(
|
|
xaxis_title="Academic Year",
|
|
yaxis_title="Number of Events",
|
|
legend_title="Event Category",
|
|
template="plotly_white"
|
|
)
|
|
plots['line'] = {
|
|
'plot': pio.to_html(fig3, full_html=False, div_id="line-chart"),
|
|
'insight': get_graph_insights(data, 'line')
|
|
}
|
|
|
|
|
|
fig4 = px.area(
|
|
df,
|
|
title='Cumulative Event Distribution Over Years',
|
|
labels={'index': 'Year', 'value': 'Number of Events', 'variable': 'Category'},
|
|
color_discrete_sequence=colors
|
|
)
|
|
fig4.update_layout(
|
|
xaxis_title="Academic Year",
|
|
yaxis_title="Number of Events",
|
|
legend_title="Event Category",
|
|
template="plotly_white"
|
|
)
|
|
plots['area'] = {
|
|
'plot': pio.to_html(fig4, full_html=False, div_id="area-chart"),
|
|
'insight': get_graph_insights(data, 'area')
|
|
}
|
|
|
|
|
|
total_events = df.sum().sum()
|
|
avg_events_per_year = df.sum(axis=1).mean()
|
|
most_active_year = df.sum(axis=1).idxmax()
|
|
most_common_category = df.sum().idxmax()
|
|
|
|
stats = {
|
|
'total_events': int(total_events),
|
|
'avg_events_per_year': round(avg_events_per_year, 1),
|
|
'most_active_year': most_active_year,
|
|
'most_common_category': most_common_category,
|
|
'category_totals': df.sum().to_dict(),
|
|
'yearly_totals': df.sum(axis=1).to_dict()
|
|
}
|
|
|
|
plots['stats'] = stats
|
|
|
|
logger.info("Successfully created all plots")
|
|
return plots
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating plots: {e}")
|
|
return None
|
|
|
|
|
|
def allowed_file(filename):
|
|
"""Check if the uploaded file is allowed."""
|
|
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ['docx']
|
|
|
|
|
|
@app.route('/', methods=['GET', 'POST'])
|
|
def index():
|
|
"""Main route for the application."""
|
|
plots = None
|
|
|
|
if request.method == 'POST':
|
|
|
|
if 'document' not in request.files:
|
|
flash('No file uploaded. Please select a DOCX file.', 'error')
|
|
return redirect(request.url)
|
|
|
|
file = request.files['document']
|
|
|
|
if file.filename == '':
|
|
flash('No file selected. Please choose a DOCX file.', 'error')
|
|
return redirect(request.url)
|
|
|
|
if not allowed_file(file.filename):
|
|
flash('Invalid file type. Please upload a DOCX file.', 'error')
|
|
return redirect(request.url)
|
|
|
|
if file:
|
|
try:
|
|
ensure_upload_folder()
|
|
|
|
|
|
filename = secure_filename(file.filename)
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{timestamp}_{filename}"
|
|
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
|
|
|
|
|
file.save(file_path)
|
|
logger.info(f"File saved: {file_path}")
|
|
|
|
|
|
text = extract_text_from_docx(file_path)
|
|
|
|
if not text.strip():
|
|
flash('The uploaded document appears to be empty. Please check the file.', 'error')
|
|
return redirect(request.url)
|
|
|
|
|
|
data = extract_data_using_gemini(text)
|
|
|
|
if data:
|
|
|
|
plots = create_plots(data)
|
|
if plots:
|
|
flash('Document processed successfully! π', 'success')
|
|
else:
|
|
flash('Error creating visualizations. Please try again.', 'error')
|
|
else:
|
|
flash(
|
|
'Could not extract event data from the document. Please ensure the document contains event statistics in the expected format.',
|
|
'error')
|
|
|
|
|
|
try:
|
|
os.remove(file_path)
|
|
logger.info(f"Cleaned up file: {file_path}")
|
|
except Exception as e:
|
|
logger.warning(f"Could not remove file {file_path}: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing document: {e}")
|
|
flash(f'Error processing document: {str(e)}', 'error')
|
|
|
|
return render_template('index.html', plots=plots)
|
|
|
|
|
|
@app.errorhandler(413)
|
|
def too_large(e):
|
|
"""Handle file too large error."""
|
|
flash("File too large. Please upload a file smaller than 16MB.", 'error')
|
|
return redirect(request.url)
|
|
|
|
|
|
@app.errorhandler(404)
|
|
def not_found(e):
|
|
"""Handle 404 errors."""
|
|
return render_template('404.html'), 404
|
|
|
|
|
|
@app.errorhandler(500)
|
|
def internal_error(e):
|
|
"""Handle internal server errors."""
|
|
logger.error(f"Internal server error: {e}")
|
|
flash('An internal error occurred. Please try again.', 'error')
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print("π Starting Event Analytics Application...")
|
|
print("π Upload a DOCX file to analyze event data")
|
|
print("π Access the application at: http://localhost:5001")
|
|
|
|
if not model:
|
|
print("β οΈ Warning: Gemini API not configured. Please set GOOGLE_API_KEY environment variable.")
|
|
|
|
app.run(debug=True, port=5001, host='0.0.0.0') |