MaheshP98 commited on
Commit
c528749
·
verified ·
1 Parent(s): 83ce7a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -60
app.py CHANGED
@@ -7,6 +7,7 @@ import os
7
  import logging
8
  from reportlab.lib.pagesizes import letter
9
  from reportlab.pdfgen import canvas
 
10
  import tempfile
11
 
12
  # Configure logging to match the log format
@@ -31,46 +32,51 @@ def validate_csv(df):
31
 
32
  def generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path):
33
  """
34
- Generate a detailed summary of the processing results.
35
  Returns a markdown string for display in the Gradio interface.
36
  """
37
- summary = ["## Processing Summary\n"]
38
 
39
- # Total records and devices
 
40
  total_records = len(combined_df)
41
  unique_devices = combined_df['equipment'].unique()
42
- summary.append(f"- **Total Records Processed**: {total_records}")
43
- summary.append(f"- **Unique Devices**: {len(unique_devices)} ({', '.join(unique_devices)})\n")
44
 
45
- # Anomalies
 
46
  if anomaly_df is not None:
47
  num_anomalies = sum(anomaly_df['anomaly'] == -1)
48
- summary.append(f"- **Anomalies Detected**: {num_anomalies}")
49
  if num_anomalies > 0:
 
50
  anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
51
- summary.append(" **Anomalous Devices**:")
52
  for _, row in anomaly_records.iterrows():
53
- summary.append(f" - {row['equipment']} (Usage: {row['usage_count']}, Status: {row['status']})")
54
  else:
55
- summary.append(" No anomalies detected.")
56
  else:
57
- summary.append("- **Anomalies Detected**: Failed to detect anomalies.")
58
  summary.append("\n")
59
 
60
- # AMC Expiries
 
61
  if amc_df is not None and not amc_df.empty:
62
  unique_devices_amc = amc_df['equipment'].unique()
63
- summary.append(f"- **Devices with Upcoming AMC Expiries (within 7 days)**: {len(unique_devices_amc)}")
64
- summary.append(" **Details**:")
65
  for _, row in amc_df.iterrows():
66
- summary.append(f" - {row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')}")
 
 
 
67
  else:
68
- summary.append("- **Devices with Upcoming AMC Expiries**: None")
69
  summary.append("\n")
70
 
71
- # Plot and PDF
72
- summary.append("- **Usage Plot**: " + ("Generated successfully." if plot_path else "Failed to generate."))
73
- summary.append("- **PDF Report**: " + ("Available for download." if pdf_path else "Not generated."))
 
74
 
75
  return "\n".join(summary)
76
 
@@ -84,14 +90,14 @@ def process_files(uploaded_files):
84
 
85
  if not uploaded_files:
86
  logging.warning("No files uploaded.")
87
- return None, None, None, "Please upload at least one valid CSV file.", "No files uploaded."
88
 
89
  valid_files = [f for f in uploaded_files if f.name.endswith('.csv')]
90
  logging.info(f"Processing {len(valid_files)} valid files: {valid_files}")
91
 
92
  if not valid_files:
93
  logging.warning("No valid CSV files uploaded.")
94
- return None, None, None, "Please upload at least one valid CSV file.", "No valid CSV files uploaded."
95
 
96
  logging.info("Loading logs from uploaded files...")
97
  all_data = []
@@ -105,15 +111,15 @@ def process_files(uploaded_files):
105
  is_valid, error_msg = validate_csv(df)
106
  if not is_valid:
107
  logging.error(f"Failed to load {file.name}: {error_msg}")
108
- return None, None, None, f"Error loading {file.name}: {error_msg}", f"Error: {error_msg}"
109
  all_data.append(df)
110
  except Exception as e:
111
  logging.error(f"Failed to load {file.name}: {str(e)}")
112
- return None, None, None, f"Error loading {file.name}: {str(e)}", f"Error: {str(e)}"
113
 
114
  if not all_data:
115
  logging.warning("No data loaded from uploaded files.")
116
- return None, None, None, "No valid data found in uploaded files.", "No data loaded."
117
 
118
  combined_df = pd.concat(all_data, ignore_index=True)
119
  logging.info(f"Combined {len(combined_df)} total records.")
@@ -126,7 +132,7 @@ def process_files(uploaded_files):
126
  logging.info("Usage plot generated successfully.")
127
  else:
128
  logging.error("Failed to generate usage plot.")
129
- return combined_df, None, None, "Failed to generate usage plot.", "Usage plot generation failed."
130
 
131
  # Detect anomalies using Local Outlier Factor
132
  logging.info("Detecting anomalies using Local Outlier Factor...")
@@ -141,7 +147,12 @@ def process_files(uploaded_files):
141
  amc_message, amc_df = process_amc_expiries(combined_df)
142
 
143
  # Generate PDF report
 
144
  pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df)
 
 
 
 
145
 
146
  # Generate summary
147
  logging.info("Generating summary of results...")
@@ -151,7 +162,7 @@ def process_files(uploaded_files):
151
  # Prepare output dataframe (combine original data with anomalies)
152
  output_df = combined_df.copy()
153
  if anomaly_df is not None:
154
- output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Anomaly"})
155
 
156
  return output_df, plot_path, pdf_path, amc_message, summary
157
 
@@ -226,7 +237,7 @@ def process_amc_expiries(df):
226
 
227
  def generate_pdf_report(original_df, anomaly_df, amc_df):
228
  """
229
- Generate a PDF report with data summary, anomalies, and AMC expiries.
230
  Returns the path to the saved PDF.
231
  """
232
  try:
@@ -236,63 +247,134 @@ def generate_pdf_report(original_df, anomaly_df, amc_df):
236
 
237
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
238
  c = canvas.Canvas(tmp.name, pagesize=letter)
239
- c.setFont("Helvetica-Bold", 16)
240
- c.drawString(100, 750, "Equipment Log Analysis Report")
241
- c.setFont("Helvetica", 12)
242
- y = 720
243
-
244
- # Report generated timestamp
245
- current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
246
- c.drawString(100, y, f"Generated on: {current_time}")
247
- y -= 30
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
  # Summary
250
- c.drawString(100, y, "Summary")
251
- y -= 20
252
- c.drawString(100, y, f"Total Records: {len(original_df)}")
253
  y -= 20
254
- c.drawString(100, y, f"Unique Devices: {', '.join(original_df['equipment'].unique())}")
255
  y -= 40
256
 
257
- # Anomalies
258
- c.drawString(100, y, "Anomaly Detection Results (Using Local Outlier Factor)")
 
 
 
 
 
 
259
  y -= 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  if anomaly_df is not None:
261
  num_anomalies = sum(anomaly_df['anomaly'] == -1)
262
- c.drawString(100, y, f"Anomalies Detected: {num_anomalies}")
263
  y -= 20
264
  if num_anomalies > 0:
265
  anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
266
- c.drawString(100, y, "Anomalous Records:")
267
  y -= 20
 
268
  for _, row in anomaly_records.iterrows():
269
- c.drawString(100, y, f"{row['equipment']}: Usage Count = {row['usage_count']}, Status = {row['status']}")
 
 
270
  y -= 20
271
  if y < 50:
272
  c.showPage()
273
- y = 750
274
- c.setFont("Helvetica", 12)
 
275
  else:
276
- c.drawString(100, y, "Anomaly detection failed.")
277
  y -= 20
278
  y -= 20
279
 
280
  # AMC Expiries
281
- c.drawString(100, y, "AMC Expiries Within 7 Days (as of 2025-06-05)")
282
- y -= 20
283
  if amc_df is not None and not amc_df.empty:
284
- c.drawString(100, y, f"Devices with Upcoming AMC Expiries: {len(amc_df['equipment'].unique())}")
285
  y -= 20
 
286
  for _, row in amc_df.iterrows():
287
- c.drawString(100, y, f"{row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')}")
 
 
288
  y -= 20
289
  if y < 50:
290
  c.showPage()
291
- y = 750
292
- c.setFont("Helvetica", 12)
 
 
 
 
293
  else:
294
- c.drawString(100, y, "No AMC expiry data available.")
295
  y -= 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  c.showPage()
298
  c.save()
@@ -308,13 +390,13 @@ with gr.Blocks() as demo:
308
  file_input = gr.File(file_count="multiple", label="Upload CSV Files")
309
  process_button = gr.Button("Process Files")
310
  with gr.Row():
311
- output_df = gr.Dataframe(label="Processed Data")
312
- output_plot = gr.Image(label="Usage Plot")
313
  with gr.Row():
314
- output_message = gr.Textbox(label="AMC Expiry Status")
315
- output_pdf = gr.File(label="Download PDF Report")
316
  with gr.Row():
317
- output_summary = gr.Markdown(label="Summary of Results")
 
318
 
319
  process_button.click(
320
  fn=process_files,
 
7
  import logging
8
  from reportlab.lib.pagesizes import letter
9
  from reportlab.pdfgen import canvas
10
+ from reportlab.lib import colors
11
  import tempfile
12
 
13
  # Configure logging to match the log format
 
32
 
33
  def generate_summary(combined_df, anomaly_df, amc_df, plot_path, pdf_path):
34
  """
35
+ Generate a detailed and easy-to-understand summary of the processing results.
36
  Returns a markdown string for display in the Gradio interface.
37
  """
38
+ summary = []
39
 
40
+ # Overview
41
+ summary.append("## Overview")
42
  total_records = len(combined_df)
43
  unique_devices = combined_df['equipment'].unique()
44
+ summary.append(f"We processed **{total_records} log entries** for **{len(unique_devices)} devices** ({', '.join(unique_devices)}).")
45
+ summary.append("This report helps you understand device usage, identify unusual activity, and plan maintenance.\n")
46
 
47
+ # Unusual Activity (Anomalies)
48
+ summary.append("## Unusual Activity")
49
  if anomaly_df is not None:
50
  num_anomalies = sum(anomaly_df['anomaly'] == -1)
 
51
  if num_anomalies > 0:
52
+ summary.append(f"We found **{num_anomalies} unusual activities** that might need your attention:")
53
  anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
 
54
  for _, row in anomaly_records.iterrows():
55
+ summary.append(f"- **{row['equipment']}** (Usage: {row['usage_count']}, Status: {row['status']}) - High or low usage compared to others might indicate overuse or underuse.")
56
  else:
57
+ summary.append("No unusual activity detected. All devices are operating within expected usage patterns.")
58
  else:
59
+ summary.append("We couldn’t check for unusual activity due to an error.")
60
  summary.append("\n")
61
 
62
+ # Maintenance Alerts (AMC Expiries)
63
+ summary.append("## Maintenance Alerts")
64
  if amc_df is not None and not amc_df.empty:
65
  unique_devices_amc = amc_df['equipment'].unique()
66
+ summary.append(f"**{len(unique_devices_amc)} devices** need maintenance soon (within 7 days from 2025-06-05):")
 
67
  for _, row in amc_df.iterrows():
68
+ days_until_expiry = (row['amc_expiry'] - datetime(2025, 6, 5)).days
69
+ urgency = "Urgent" if days_until_expiry <= 3 else "Upcoming"
70
+ summary.append(f"- **{row['equipment']}**: Due on {row['amc_expiry'].strftime('%Y-%m-%d')} ({urgency}, {days_until_expiry} days left)")
71
+ summary.append("Please schedule maintenance to avoid downtime.")
72
  else:
73
+ summary.append("No devices need maintenance within the next 7 days.")
74
  summary.append("\n")
75
 
76
+ # Generated Reports
77
+ summary.append("## Generated Reports")
78
+ summary.append("- **Usage Chart**: A bar chart showing how much each device was used, grouped by status (e.g., Active, Inactive).")
79
+ summary.append("- **PDF Report**: Download the detailed report below for a full analysis, including a table of all records and a flowchart of our process.")
80
 
81
  return "\n".join(summary)
82
 
 
90
 
91
  if not uploaded_files:
92
  logging.warning("No files uploaded.")
93
+ return None, None, None, "Please upload at least one valid CSV file.", "## Summary\nNo files uploaded."
94
 
95
  valid_files = [f for f in uploaded_files if f.name.endswith('.csv')]
96
  logging.info(f"Processing {len(valid_files)} valid files: {valid_files}")
97
 
98
  if not valid_files:
99
  logging.warning("No valid CSV files uploaded.")
100
+ return None, None, None, "Please upload at least one valid CSV file.", "## Summary\nNo valid CSV files uploaded."
101
 
102
  logging.info("Loading logs from uploaded files...")
103
  all_data = []
 
111
  is_valid, error_msg = validate_csv(df)
112
  if not is_valid:
113
  logging.error(f"Failed to load {file.name}: {error_msg}")
114
+ return None, None, None, f"Error loading {file.name}: {error_msg}", f"## Summary\nError: {error_msg}"
115
  all_data.append(df)
116
  except Exception as e:
117
  logging.error(f"Failed to load {file.name}: {str(e)}")
118
+ return None, None, None, f"Error loading {file.name}: {str(e)}", f"## Summary\nError: {str(e)}"
119
 
120
  if not all_data:
121
  logging.warning("No data loaded from uploaded files.")
122
+ return None, None, None, "No valid data found in uploaded files.", "## Summary\nNo data loaded."
123
 
124
  combined_df = pd.concat(all_data, ignore_index=True)
125
  logging.info(f"Combined {len(combined_df)} total records.")
 
132
  logging.info("Usage plot generated successfully.")
133
  else:
134
  logging.error("Failed to generate usage plot.")
135
+ return combined_df, None, None, "Failed to generate usage plot.", "## Summary\nUsage plot generation failed."
136
 
137
  # Detect anomalies using Local Outlier Factor
138
  logging.info("Detecting anomalies using Local Outlier Factor...")
 
147
  amc_message, amc_df = process_amc_expiries(combined_df)
148
 
149
  # Generate PDF report
150
+ logging.info("Generating PDF report...")
151
  pdf_path = generate_pdf_report(combined_df, anomaly_df, amc_df)
152
+ if pdf_path:
153
+ logging.info("PDF report generated successfully.")
154
+ else:
155
+ logging.error("Failed to generate PDF report.")
156
 
157
  # Generate summary
158
  logging.info("Generating summary of results...")
 
162
  # Prepare output dataframe (combine original data with anomalies)
163
  output_df = combined_df.copy()
164
  if anomaly_df is not None:
165
+ output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Unusual"})
166
 
167
  return output_df, plot_path, pdf_path, amc_message, summary
168
 
 
237
 
238
  def generate_pdf_report(original_df, anomaly_df, amc_df):
239
  """
240
+ Generate a professionally formatted PDF report with necessary fields and a flowchart.
241
  Returns the path to the saved PDF.
242
  """
243
  try:
 
247
 
248
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp:
249
  c = canvas.Canvas(tmp.name, pagesize=letter)
250
+ width, height = letter
251
+
252
+ def draw_header():
253
+ c.setFont("Helvetica-Bold", 16)
254
+ c.setFillColor(colors.darkblue)
255
+ c.drawString(50, height - 50, "Equipment Log Analysis Report")
256
+ c.setFont("Helvetica", 10)
257
+ c.setFillColor(colors.black)
258
+ c.drawString(50, height - 70, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
259
+ c.line(50, height - 80, width - 50, height - 80)
260
+
261
+ def draw_section_title(title, y):
262
+ c.setFont("Helvetica-Bold", 14)
263
+ c.setFillColor(colors.darkblue)
264
+ c.drawString(50, y, title)
265
+ c.setFillColor(colors.black)
266
+ c.line(50, y - 5, width - 50, y - 5)
267
+ return y - 30
268
+
269
+ y = height - 100
270
+ draw_header()
271
 
272
  # Summary
273
+ y = draw_section_title("Summary", y)
274
+ c.setFont("Helvetica", 12)
275
+ c.drawString(50, y, f"Total Records: {len(original_df)}")
276
  y -= 20
277
+ c.drawString(50, y, f"Unique Devices: {', '.join(original_df['equipment'].unique())}")
278
  y -= 40
279
 
280
+ # Data Table
281
+ y = draw_section_title("Device Log Details", y)
282
+ c.setFont("Helvetica-Bold", 10)
283
+ headers = ["Equipment", "Usage Count", "Status", "AMC Expiry", "Activity"]
284
+ x_positions = [50, 150, 250, 350, 450]
285
+ for i, header in enumerate(headers):
286
+ c.drawString(x_positions[i], y, header)
287
+ c.line(50, y - 5, width - 50, y - 5)
288
  y -= 20
289
+
290
+ c.setFont("Helvetica", 10)
291
+ output_df = original_df.copy()
292
+ if anomaly_df is not None:
293
+ output_df['anomaly'] = anomaly_df['anomaly'].map({1: "Normal", -1: "Unusual"})
294
+ for _, row in output_df.iterrows():
295
+ c.drawString(50, y, str(row['equipment']))
296
+ c.drawString(150, y, str(row['usage_count']))
297
+ c.drawString(250, y, str(row['status']))
298
+ c.drawString(350, y, str(row['amc_expiry'].strftime('%Y-%m-%d')))
299
+ c.drawString(450, y, str(row['anomaly']))
300
+ y -= 20
301
+ if y < 50:
302
+ c.showPage()
303
+ y = height - 100
304
+ draw_header()
305
+ c.setFont("Helvetica", 10)
306
+
307
+ # Anomalies
308
+ y = draw_section_title("Unusual Activity (Using Local Outlier Factor)", y)
309
+ c.setFont("Helvetica", 12)
310
  if anomaly_df is not None:
311
  num_anomalies = sum(anomaly_df['anomaly'] == -1)
312
+ c.drawString(50, y, f"Unusual Activities Detected: {num_anomalies}")
313
  y -= 20
314
  if num_anomalies > 0:
315
  anomaly_records = anomaly_df[anomaly_df['anomaly'] == -1][['equipment', 'usage_count', 'status']]
316
+ c.drawString(50, y, "Details:")
317
  y -= 20
318
+ c.setFont("Helvetica-Oblique", 10)
319
  for _, row in anomaly_records.iterrows():
320
+ c.drawString(50, y, f"{row['equipment']}: Usage Count = {row['usage_count']}, Status = {row['status']}")
321
+ y -= 20
322
+ c.drawString(70, y, "Note: This device’s usage is significantly higher or lower than others, which may indicate overuse or underuse.")
323
  y -= 20
324
  if y < 50:
325
  c.showPage()
326
+ y = height - 100
327
+ draw_header()
328
+ c.setFont("Helvetica-Oblique", 10)
329
  else:
330
+ c.drawString(50, y, "Unable to detect unusual activity due to an error.")
331
  y -= 20
332
  y -= 20
333
 
334
  # AMC Expiries
335
+ y = draw_section_title("Maintenance Alerts (as of 2025-06-05)", y)
336
+ c.setFont("Helvetica", 12)
337
  if amc_df is not None and not amc_df.empty:
338
+ c.drawString(50, y, f"Devices Needing Maintenance Soon: {len(amc_df['equipment'].unique())}")
339
  y -= 20
340
+ c.setFont("Helvetica", 10)
341
  for _, row in amc_df.iterrows():
342
+ days_until_expiry = (row['amc_expiry'] - datetime(2025, 6, 5)).days
343
+ urgency = "Urgent" if days_until_expiry <= 3 else "Upcoming"
344
+ c.drawString(50, y, f"{row['equipment']}: {row['amc_expiry'].strftime('%Y-%m-%d')} ({urgency}, {days_until_expiry} days left)")
345
  y -= 20
346
  if y < 50:
347
  c.showPage()
348
+ y = height - 100
349
+ draw_header()
350
+ c.setFont("Helvetica", 10)
351
+ c.setFont("Helvetica-Oblique", 10)
352
+ c.drawString(50, y, "Recommendation: Schedule maintenance to prevent downtime.")
353
+ y -= 20
354
  else:
355
+ c.drawString(50, y, "No devices need maintenance within the next 7 days.")
356
  y -= 20
357
+ y -= 20
358
+
359
+ # Flowchart
360
+ y = draw_section_title("Processing Pipeline Flowchart", y)
361
+ c.setFont("Helvetica", 10)
362
+ flowchart = [
363
+ "1. Upload CSV File(s)",
364
+ "2. Validate Data (Check for required columns and data types)",
365
+ "3. Generate Usage Chart (Bar chart of usage by device and status)",
366
+ "4. Detect Unusual Activity (Using Local Outlier Factor)",
367
+ "5. Check Maintenance Dates (Identify AMC expiries within 7 days)",
368
+ "6. Create PDF Report (Detailed analysis with tables and insights)"
369
+ ]
370
+ for step in flowchart:
371
+ c.drawString(50, y, step)
372
+ y -= 20
373
+ if y < 50:
374
+ c.showPage()
375
+ y = height - 100
376
+ draw_header()
377
+ c.setFont("Helvetica", 10)
378
 
379
  c.showPage()
380
  c.save()
 
390
  file_input = gr.File(file_count="multiple", label="Upload CSV Files")
391
  process_button = gr.Button("Process Files")
392
  with gr.Row():
393
+ output_summary = gr.Markdown(label="Summary of Results")
 
394
  with gr.Row():
395
+ output_df = gr.Dataframe(label="Processed Data")
396
+ output_plot = gr.Image(label="Usage Chart")
397
  with gr.Row():
398
+ output_message = gr.Textbox(label="Maintenance Alerts")
399
+ output_pdf = gr.File(label="Download Detailed PDF Report")
400
 
401
  process_button.click(
402
  fn=process_files,