Spaces:

MaheshP98
/

Multi_Device_LabOps_Dashboard

Running

MaheshP98 commited on 6 days ago

Commit

1f542f4

verified ·

1 Parent(s): 0d752e6

Update models/anomaly.py

Files changed (1) hide show

models/anomaly.py CHANGED Viewed

@@ -1,24 +1,34 @@
-import pandas as pd
 from transformers import pipeline
 import logging
 logger = logging.getLogger(__name__)
 def detect_anomalies(df):
-    """Detect anomalies in log data using a Hugging Face model."""
     logger.info("Detecting anomalies...")
     try:
-        detector = pipeline(
             "text-classification",
             model="prajjwal1/bert-tiny",
             tokenizer="prajjwal1/bert-tiny",
-            clean_up_tokenization_spaces=True
         )
-        df["text"] = df["status"] + " Usage:" + df["usage_count"].astype(str)
-        results = detector(df["text"].tolist())
-        df["anomaly"] = [r["label"] for r in results]
-        anomalies = df[df["anomaly"] == "POSITIVE"]
-        logger.info(f"Detected {len(anomalies)} anomalies.")
         return anomalies
     except Exception as e:
         logger.error(f"Failed to detect anomalies: {e}")

 from transformers import pipeline
+import pandas as pd
 import logging
 logger = logging.getLogger(__name__)
 def detect_anomalies(df):
+    """Detect anomalies in device logs using BERT-based text classification."""
     logger.info("Detecting anomalies...")
     try:
+        # Prepare text for anomaly detection
+        df['text'] = df.apply(lambda x: f"{x['status']} Usage:{x['usage_count']}", axis=1)
+        # Load BERT model for classification with explicit tokenizer parameter
+        classifier = pipeline(
             "text-classification",
             model="prajjwal1/bert-tiny",
             tokenizer="prajjwal1/bert-tiny",
+            clean_up_tokenization_spaces=False  # Suppress the warning and avoid the error
         )
+        # Detect anomalies
+        results = classifier(df['text'].tolist())
+        # Add anomaly labels to dataframe
+        df['anomaly'] = [result['label'] for result in results]
+        # Filter for anomalies labeled as "POSITIVE"
+        anomalies = df[df['anomaly'] == "POSITIVE"]
+        logger.info(f"Detected {len(anomalies)} anomalies...")
         return anomalies
     except Exception as e:
         logger.error(f"Failed to detect anomalies: {e}")