Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ import os
|
|
7 |
import logging
|
8 |
from reportlab.lib.pagesizes import letter
|
9 |
from reportlab.pdfgen import canvas
|
|
|
10 |
import tempfile
|
11 |
|
12 |
# Configure logging to match the log format
|
@@ -31,46 +32,51 @@ def validate_csv(df):
|
|
31 |
|
32 |
def generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path):
|
33 |
"""
|
34 |
-
Generate a detailed summary of the processing results.
|
35 |
Returns a markdown string for display in the Gradio interface.
|
36 |
"""
|
37 |
-
summary = [
|
38 |
|
39 |
-
#
|
|
|
40 |
total_records = len(combined_df)
|
41 |
unique_devices = combined_df['equipment'].unique()
|
42 |
-
summary.append(f"
|
43 |
-
summary.append(
|
44 |
|
45 |
-
# Anomalies
|
|
|
46 |
if anomaly_df is not None:
|
47 |
num_anomalies = sum(anomaly_df['anomaly'] == -1)
|
48 |
-
summary.append(f"- **Anomalies Detected**: {num_anomalies}")
|
49 |
if num_anomalies > 0:
|
|
|
50 |
anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
|
51 |
-
summary.append(" **Anomalous Devices**:")
|
52 |
for _, row in anomaly_records.iterrows():
|
53 |
-
summary.append(f"
|
54 |
else:
|
55 |
-
summary.append("
|
56 |
else:
|
57 |
-
summary.append("
|
58 |
summary.append("\n")
|
59 |
|
60 |
-
# AMC Expiries
|
|
|
61 |
if amc_df is not None and not amc_df.empty:
|
62 |
unique_devices_amc = amc_df['equipment'].unique()
|
63 |
-
summary.append(f"
|
64 |
-
summary.append(" **Details**:")
|
65 |
for _, row in amc_df.iterrows():
|
66 |
-
|
|
|
|
|
|
|
67 |
else:
|
68 |
-
summary.append("
|
69 |
summary.append("\n")
|
70 |
|
71 |
-
#
|
72 |
-
summary.append("
|
73 |
-
summary.append("- **
|
|
|
74 |
|
75 |
return "\n".join(summary)
|
76 |
|
@@ -84,14 +90,14 @@ def process_files(uploaded_files):
|
|
84 |
|
85 |
if not uploaded_files:
|
86 |
logging.warning("No files uploaded.")
|
87 |
-
return None, None, None, "Please upload at least one valid CSV file.", "
|
88 |
|
89 |
valid_files = [f for f in uploaded_files if f.name.endswith('.csv')]
|
90 |
logging.info(f"Processing {len(valid_files)} valid files: {valid_files}")
|
91 |
|
92 |
if not valid_files:
|
93 |
logging.warning("No valid CSV files uploaded.")
|
94 |
-
return None, None, None, "Please upload at least one valid CSV file.", "
|
95 |
|
96 |
logging.info("Loading logs from uploaded files...")
|
97 |
all_data = []
|
@@ -105,15 +111,15 @@ def process_files(uploaded_files):
|
|
105 |
is_valid, error_msg = validate_csv(df)
|
106 |
if not is_valid:
|
107 |
logging.error(f"Failed to load {file.name}: {error_msg}")
|
108 |
-
return None, None, None, f"Error loading {file.name}: {error_msg}", f"
|
109 |
all_data.append(df)
|
110 |
except Exception as e:
|
111 |
logging.error(f"Failed to load {file.name}: {str(e)}")
|
112 |
-
return None, None, None, f"Error loading {file.name}: {str(e)}", f"
|
113 |
|
114 |
if not all_data:
|
115 |
logging.warning("No data loaded from uploaded files.")
|
116 |
-
return None, None, None, "No valid data found in uploaded files.", "
|
117 |
|
118 |
combined_df = pd.concat(all_data, ignore_index=True)
|
119 |
logging.info(f"Combined {len(combined_df)} total records.")
|
@@ -126,7 +132,7 @@ def process_files(uploaded_files):
|
|
126 |
logging.info("Usage plot generated successfully.")
|
127 |
else:
|
128 |
logging.error("Failed to generate usage plot.")
|
129 |
-
return combined_df, None, None, "Failed to generate usage plot.", "
|
130 |
|
131 |
# Detect anomalies using Local Outlier Factor
|
132 |
logging.info("Detecting anomalies using Local Outlier Factor...")
|
@@ -141,7 +147,12 @@ def process_files(uploaded_files):
|
|
141 |
amc_message, amc_df = process_amc_expiries(combined_df)
|
142 |
|
143 |
# Generate PDF report
|
|
|
144 |
pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df)
|
|
|
|
|
|
|
|
|
145 |
|
146 |
# Generate summary
|
147 |
logging.info("Generating summary of results...")
|
@@ -151,7 +162,7 @@ def process_files(uploaded_files):
|
|
151 |
# Prepare output dataframe (combine original data with anomalies)
|
152 |
output_df = combined_df.copy()
|
153 |
if anomaly_df is not None:
|
154 |
-
output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "
|
155 |
|
156 |
return output_df, plot_path, pdf_path, amc_message, summary
|
157 |
|
@@ -226,7 +237,7 @@ def process_amc_expiries(df):
|
|
226 |
|
227 |
def generate_pdf_report(original_df, anomaly_df, amc_df):
|
228 |
"""
|
229 |
-
Generate a PDF report with
|
230 |
Returns the path to the saved PDF.
|
231 |
"""
|
232 |
try:
|
@@ -236,63 +247,134 @@ def generate_pdf_report(original_df, anomaly_df, amc_df):
|
|
236 |
|
237 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
|
238 |
c = canvas.Canvas(tmp.name, pagesize=letter)
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
# Summary
|
250 |
-
|
251 |
-
|
252 |
-
c.drawString(
|
253 |
y -= 20
|
254 |
-
c.drawString(
|
255 |
y -= 40
|
256 |
|
257 |
-
#
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
y -= 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
if anomaly_df is not None:
|
261 |
num_anomalies = sum(anomaly_df['anomaly'] == -1)
|
262 |
-
c.drawString(
|
263 |
y -= 20
|
264 |
if num_anomalies > 0:
|
265 |
anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
|
266 |
-
c.drawString(
|
267 |
y -= 20
|
|
|
268 |
for _, row in anomaly_records.iterrows():
|
269 |
-
c.drawString(
|
|
|
|
|
270 |
y -= 20
|
271 |
if y < 50:
|
272 |
c.showPage()
|
273 |
-
y =
|
274 |
-
|
|
|
275 |
else:
|
276 |
-
c.drawString(
|
277 |
y -= 20
|
278 |
y -= 20
|
279 |
|
280 |
# AMC Expiries
|
281 |
-
|
282 |
-
|
283 |
if amc_df is not None and not amc_df.empty:
|
284 |
-
c.drawString(
|
285 |
y -= 20
|
|
|
286 |
for _, row in amc_df.iterrows():
|
287 |
-
|
|
|
|
|
288 |
y -= 20
|
289 |
if y < 50:
|
290 |
c.showPage()
|
291 |
-
y =
|
292 |
-
|
|
|
|
|
|
|
|
|
293 |
else:
|
294 |
-
c.drawString(
|
295 |
y -= 20
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
c.showPage()
|
298 |
c.save()
|
@@ -308,13 +390,13 @@ with gr.Blocks() as demo:
|
|
308 |
file_input = gr.File(file_count="multiple", label="Upload CSV Files")
|
309 |
process_button = gr.Button("Process Files")
|
310 |
with gr.Row():
|
311 |
-
|
312 |
-
output_plot = gr.Image(label="Usage Plot")
|
313 |
with gr.Row():
|
314 |
-
|
315 |
-
|
316 |
with gr.Row():
|
317 |
-
|
|
|
318 |
|
319 |
process_button.click(
|
320 |
fn=process_files,
|
|
|
7 |
import logging
|
8 |
from reportlab.lib.pagesizes import letter
|
9 |
from reportlab.pdfgen import canvas
|
10 |
+
from reportlab.lib import colors
|
11 |
import tempfile
|
12 |
|
13 |
# Configure logging to match the log format
|
|
|
32 |
|
33 |
def generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path):
|
34 |
"""
|
35 |
+
Generate a detailed and easy-to-understand summary of the processing results.
|
36 |
Returns a markdown string for display in the Gradio interface.
|
37 |
"""
|
38 |
+
summary = []
|
39 |
|
40 |
+
# Overview
|
41 |
+
summary.append("## Overview")
|
42 |
total_records = len(combined_df)
|
43 |
unique_devices = combined_df['equipment'].unique()
|
44 |
+
summary.append(f"We processed **{total_records} log entries** for **{len(unique_devices)} devices** ({', '.join(unique_devices)}).")
|
45 |
+
summary.append("This report helps you understand device usage, identify unusual activity, and plan maintenance.\n")
|
46 |
|
47 |
+
# Unusual Activity (Anomalies)
|
48 |
+
summary.append("## Unusual Activity")
|
49 |
if anomaly_df is not None:
|
50 |
num_anomalies = sum(anomaly_df['anomaly'] == -1)
|
|
|
51 |
if num_anomalies > 0:
|
52 |
+
summary.append(f"We found **{num_anomalies} unusual activities** that might need your attention:")
|
53 |
anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
|
|
|
54 |
for _, row in anomaly_records.iterrows():
|
55 |
+
summary.append(f"- **{row['equipment']}** (Usage: {row['usage_count']}, Status: {row['status']}) - High or low usage compared to others might indicate overuse or underuse.")
|
56 |
else:
|
57 |
+
summary.append("No unusual activity detected. All devices are operating within expected usage patterns.")
|
58 |
else:
|
59 |
+
summary.append("We couldn’t check for unusual activity due to an error.")
|
60 |
summary.append("\n")
|
61 |
|
62 |
+
# Maintenance Alerts (AMC Expiries)
|
63 |
+
summary.append("## Maintenance Alerts")
|
64 |
if amc_df is not None and not amc_df.empty:
|
65 |
unique_devices_amc = amc_df['equipment'].unique()
|
66 |
+
summary.append(f"**{len(unique_devices_amc)} devices** need maintenance soon (within 7 days from 2025-06-05):")
|
|
|
67 |
for _, row in amc_df.iterrows():
|
68 |
+
days_until_expiry = (row['amc_expiry'] - datetime(2025, 6, 5)).days
|
69 |
+
urgency = "Urgent" if days_until_expiry <= 3 else "Upcoming"
|
70 |
+
summary.append(f"- **{row['equipment']}**: Due on {row['amc_expiry'].strftime('%Y-%m-%d')} ({urgency}, {days_until_expiry} days left)")
|
71 |
+
summary.append("Please schedule maintenance to avoid downtime.")
|
72 |
else:
|
73 |
+
summary.append("No devices need maintenance within the next 7 days.")
|
74 |
summary.append("\n")
|
75 |
|
76 |
+
# Generated Reports
|
77 |
+
summary.append("## Generated Reports")
|
78 |
+
summary.append("- **Usage Chart**: A bar chart showing how much each device was used, grouped by status (e.g., Active, Inactive).")
|
79 |
+
summary.append("- **PDF Report**: Download the detailed report below for a full analysis, including a table of all records and a flowchart of our process.")
|
80 |
|
81 |
return "\n".join(summary)
|
82 |
|
|
|
90 |
|
91 |
if not uploaded_files:
|
92 |
logging.warning("No files uploaded.")
|
93 |
+
return None, None, None, "Please upload at least one valid CSV file.", "## Summary\nNo files uploaded."
|
94 |
|
95 |
valid_files = [f for f in uploaded_files if f.name.endswith('.csv')]
|
96 |
logging.info(f"Processing {len(valid_files)} valid files: {valid_files}")
|
97 |
|
98 |
if not valid_files:
|
99 |
logging.warning("No valid CSV files uploaded.")
|
100 |
+
return None, None, None, "Please upload at least one valid CSV file.", "## Summary\nNo valid CSV files uploaded."
|
101 |
|
102 |
logging.info("Loading logs from uploaded files...")
|
103 |
all_data = []
|
|
|
111 |
is_valid, error_msg = validate_csv(df)
|
112 |
if not is_valid:
|
113 |
logging.error(f"Failed to load {file.name}: {error_msg}")
|
114 |
+
return None, None, None, f"Error loading {file.name}: {error_msg}", f"## Summary\nError: {error_msg}"
|
115 |
all_data.append(df)
|
116 |
except Exception as e:
|
117 |
logging.error(f"Failed to load {file.name}: {str(e)}")
|
118 |
+
return None, None, None, f"Error loading {file.name}: {str(e)}", f"## Summary\nError: {str(e)}"
|
119 |
|
120 |
if not all_data:
|
121 |
logging.warning("No data loaded from uploaded files.")
|
122 |
+
return None, None, None, "No valid data found in uploaded files.", "## Summary\nNo data loaded."
|
123 |
|
124 |
combined_df = pd.concat(all_data, ignore_index=True)
|
125 |
logging.info(f"Combined {len(combined_df)} total records.")
|
|
|
132 |
logging.info("Usage plot generated successfully.")
|
133 |
else:
|
134 |
logging.error("Failed to generate usage plot.")
|
135 |
+
return combined_df, None, None, "Failed to generate usage plot.", "## Summary\nUsage plot generation failed."
|
136 |
|
137 |
# Detect anomalies using Local Outlier Factor
|
138 |
logging.info("Detecting anomalies using Local Outlier Factor...")
|
|
|
147 |
amc_message, amc_df = process_amc_expiries(combined_df)
|
148 |
|
149 |
# Generate PDF report
|
150 |
+
logging.info("Generating PDF report...")
|
151 |
pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df)
|
152 |
+
if pdf_path:
|
153 |
+
logging.info("PDF report generated successfully.")
|
154 |
+
else:
|
155 |
+
logging.error("Failed to generate PDF report.")
|
156 |
|
157 |
# Generate summary
|
158 |
logging.info("Generating summary of results...")
|
|
|
162 |
# Prepare output dataframe (combine original data with anomalies)
|
163 |
output_df = combined_df.copy()
|
164 |
if anomaly_df is not None:
|
165 |
+
output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Unusual"})
|
166 |
|
167 |
return output_df, plot_path, pdf_path, amc_message, summary
|
168 |
|
|
|
237 |
|
238 |
def generate_pdf_report(original_df, anomaly_df, amc_df):
|
239 |
"""
|
240 |
+
Generate a professionally formatted PDF report with necessary fields and a flowchart.
|
241 |
Returns the path to the saved PDF.
|
242 |
"""
|
243 |
try:
|
|
|
247 |
|
248 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
|
249 |
c = canvas.Canvas(tmp.name, pagesize=letter)
|
250 |
+
width, height = letter
|
251 |
+
|
252 |
+
def draw_header():
|
253 |
+
c.setFont("Helvetica-Bold", 16)
|
254 |
+
c.setFillColor(colors.darkblue)
|
255 |
+
c.drawString(50, height - 50, "Equipment Log Analysis Report")
|
256 |
+
c.setFont("Helvetica", 10)
|
257 |
+
c.setFillColor(colors.black)
|
258 |
+
c.drawString(50, height - 70, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
259 |
+
c.line(50, height - 80, width - 50, height - 80)
|
260 |
+
|
261 |
+
def draw_section_title(title, y):
|
262 |
+
c.setFont("Helvetica-Bold", 14)
|
263 |
+
c.setFillColor(colors.darkblue)
|
264 |
+
c.drawString(50, y, title)
|
265 |
+
c.setFillColor(colors.black)
|
266 |
+
c.line(50, y - 5, width - 50, y - 5)
|
267 |
+
return y - 30
|
268 |
+
|
269 |
+
y = height - 100
|
270 |
+
draw_header()
|
271 |
|
272 |
# Summary
|
273 |
+
y = draw_section_title("Summary", y)
|
274 |
+
c.setFont("Helvetica", 12)
|
275 |
+
c.drawString(50, y, f"Total Records: {len(original_df)}")
|
276 |
y -= 20
|
277 |
+
c.drawString(50, y, f"Unique Devices: {', '.join(original_df['equipment'].unique())}")
|
278 |
y -= 40
|
279 |
|
280 |
+
# Data Table
|
281 |
+
y = draw_section_title("Device Log Details", y)
|
282 |
+
c.setFont("Helvetica-Bold", 10)
|
283 |
+
headers = ["Equipment", "Usage Count", "Status", "AMC Expiry", "Activity"]
|
284 |
+
x_positions = [50, 150, 250, 350, 450]
|
285 |
+
for i, header in enumerate(headers):
|
286 |
+
c.drawString(x_positions[i], y, header)
|
287 |
+
c.line(50, y - 5, width - 50, y - 5)
|
288 |
y -= 20
|
289 |
+
|
290 |
+
c.setFont("Helvetica", 10)
|
291 |
+
output_df = original_df.copy()
|
292 |
+
if anomaly_df is not None:
|
293 |
+
output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Unusual"})
|
294 |
+
for _, row in output_df.iterrows():
|
295 |
+
c.drawString(50, y, str(row['equipment']))
|
296 |
+
c.drawString(150, y, str(row['usage_count']))
|
297 |
+
c.drawString(250, y, str(row['status']))
|
298 |
+
c.drawString(350, y, str(row['amc_expiry'].strftime('%Y-%m-%d')))
|
299 |
+
c.drawString(450, y, str(row['anomaly']))
|
300 |
+
y -= 20
|
301 |
+
if y < 50:
|
302 |
+
c.showPage()
|
303 |
+
y = height - 100
|
304 |
+
draw_header()
|
305 |
+
c.setFont("Helvetica", 10)
|
306 |
+
|
307 |
+
# Anomalies
|
308 |
+
y = draw_section_title("Unusual Activity (Using Local Outlier Factor)", y)
|
309 |
+
c.setFont("Helvetica", 12)
|
310 |
if anomaly_df is not None:
|
311 |
num_anomalies = sum(anomaly_df['anomaly'] == -1)
|
312 |
+
c.drawString(50, y, f"Unusual Activities Detected: {num_anomalies}")
|
313 |
y -= 20
|
314 |
if num_anomalies > 0:
|
315 |
anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
|
316 |
+
c.drawString(50, y, "Details:")
|
317 |
y -= 20
|
318 |
+
c.setFont("Helvetica-Oblique", 10)
|
319 |
for _, row in anomaly_records.iterrows():
|
320 |
+
c.drawString(50, y, f"{row['equipment']}: Usage Count = {row['usage_count']}, Status = {row['status']}")
|
321 |
+
y -= 20
|
322 |
+
c.drawString(70, y, "Note: This device’s usage is significantly higher or lower than others, which may indicate overuse or underuse.")
|
323 |
y -= 20
|
324 |
if y < 50:
|
325 |
c.showPage()
|
326 |
+
y = height - 100
|
327 |
+
draw_header()
|
328 |
+
c.setFont("Helvetica-Oblique", 10)
|
329 |
else:
|
330 |
+
c.drawString(50, y, "Unable to detect unusual activity due to an error.")
|
331 |
y -= 20
|
332 |
y -= 20
|
333 |
|
334 |
# AMC Expiries
|
335 |
+
y = draw_section_title("Maintenance Alerts (as of 2025-06-05)", y)
|
336 |
+
c.setFont("Helvetica", 12)
|
337 |
if amc_df is not None and not amc_df.empty:
|
338 |
+
c.drawString(50, y, f"Devices Needing Maintenance Soon: {len(amc_df['equipment'].unique())}")
|
339 |
y -= 20
|
340 |
+
c.setFont("Helvetica", 10)
|
341 |
for _, row in amc_df.iterrows():
|
342 |
+
days_until_expiry = (row['amc_expiry'] - datetime(2025, 6, 5)).days
|
343 |
+
urgency = "Urgent" if days_until_expiry <= 3 else "Upcoming"
|
344 |
+
c.drawString(50, y, f"{row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')} ({urgency}, {days_until_expiry} days left)")
|
345 |
y -= 20
|
346 |
if y < 50:
|
347 |
c.showPage()
|
348 |
+
y = height - 100
|
349 |
+
draw_header()
|
350 |
+
c.setFont("Helvetica", 10)
|
351 |
+
c.setFont("Helvetica-Oblique", 10)
|
352 |
+
c.drawString(50, y, "Recommendation: Schedule maintenance to prevent downtime.")
|
353 |
+
y -= 20
|
354 |
else:
|
355 |
+
c.drawString(50, y, "No devices need maintenance within the next 7 days.")
|
356 |
y -= 20
|
357 |
+
y -= 20
|
358 |
+
|
359 |
+
# Flowchart
|
360 |
+
y = draw_section_title("Processing Pipeline Flowchart", y)
|
361 |
+
c.setFont("Helvetica", 10)
|
362 |
+
flowchart = [
|
363 |
+
"1. Upload CSV File(s)",
|
364 |
+
"2. Validate Data (Check for required columns and data types)",
|
365 |
+
"3. Generate Usage Chart (Bar chart of usage by device and status)",
|
366 |
+
"4. Detect Unusual Activity (Using Local Outlier Factor)",
|
367 |
+
"5. Check Maintenance Dates (Identify AMC expiries within 7 days)",
|
368 |
+
"6. Create PDF Report (Detailed analysis with tables and insights)"
|
369 |
+
]
|
370 |
+
for step in flowchart:
|
371 |
+
c.drawString(50, y, step)
|
372 |
+
y -= 20
|
373 |
+
if y < 50:
|
374 |
+
c.showPage()
|
375 |
+
y = height - 100
|
376 |
+
draw_header()
|
377 |
+
c.setFont("Helvetica", 10)
|
378 |
|
379 |
c.showPage()
|
380 |
c.save()
|
|
|
390 |
file_input = gr.File(file_count="multiple", label="Upload CSV Files")
|
391 |
process_button = gr.Button("Process Files")
|
392 |
with gr.Row():
|
393 |
+
output_summary = gr.Markdown(label="Summary of Results")
|
|
|
394 |
with gr.Row():
|
395 |
+
output_df = gr.Dataframe(label="Processed Data")
|
396 |
+
output_plot = gr.Image(label="Usage Chart")
|
397 |
with gr.Row():
|
398 |
+
output_message = gr.Textbox(label="Maintenance Alerts")
|
399 |
+
output_pdf = gr.File(label="Download Detailed PDF Report")
|
400 |
|
401 |
process_button.click(
|
402 |
fn=process_files,
|