MaheshP98 commited on
Commit
1f542f4
·
verified ·
1 Parent(s): 0d752e6

Update models/anomaly.py

Browse files
Files changed (1) hide show
  1. models/anomaly.py +19 -9
models/anomaly.py CHANGED
@@ -1,24 +1,34 @@
1
- import pandas as pd
2
  from transformers import pipeline
 
3
  import logging
4
 
5
  logger = logging.getLogger(__name__)
6
 
7
  def detect_anomalies(df):
8
- """Detect anomalies in log data using a Hugging Face model."""
9
  logger.info("Detecting anomalies...")
10
  try:
11
- detector = pipeline(
 
 
 
 
12
  "text-classification",
13
  model="prajjwal1/bert-tiny",
14
  tokenizer="prajjwal1/bert-tiny",
15
- clean_up_tokenization_spaces=True
16
  )
17
- df["text"] = df["status"] + " Usage:" + df["usage_count"].astype(str)
18
- results = detector(df["text"].tolist())
19
- df["anomaly"] = [r["label"] for r in results]
20
- anomalies = df[df["anomaly"] == "POSITIVE"]
21
- logger.info(f"Detected {len(anomalies)} anomalies.")
 
 
 
 
 
 
22
  return anomalies
23
  except Exception as e:
24
  logger.error(f"Failed to detect anomalies: {e}")
 
 
1
  from transformers import pipeline
2
+ import pandas as pd
3
  import logging
4
 
5
  logger = logging.getLogger(__name__)
6
 
7
  def detect_anomalies(df):
8
+ """Detect anomalies in device logs using BERT-based text classification."""
9
  logger.info("Detecting anomalies...")
10
  try:
11
+ # Prepare text for anomaly detection
12
+ df['text'] = df.apply(lambda x: f"{x['status']} Usage:{x['usage_count']}", axis=1)
13
+
14
+ # Load BERT model for classification with explicit tokenizer parameter
15
+ classifier = pipeline(
16
  "text-classification",
17
  model="prajjwal1/bert-tiny",
18
  tokenizer="prajjwal1/bert-tiny",
19
+ clean_up_tokenization_spaces=False # Suppress the warning and avoid the error
20
  )
21
+
22
+ # Detect anomalies
23
+ results = classifier(df['text'].tolist())
24
+
25
+ # Add anomaly labels to dataframe
26
+ df['anomaly'] = [result['label'] for result in results]
27
+
28
+ # Filter for anomalies labeled as "POSITIVE"
29
+ anomalies = df[df['anomaly'] == "POSITIVE"]
30
+
31
+ logger.info(f"Detected {len(anomalies)} anomalies...")
32
  return anomalies
33
  except Exception as e:
34
  logger.error(f"Failed to detect anomalies: {e}")