kokluch commited on
Commit
6129880
·
1 Parent(s): c5e53a5

Look for commercial STOP to lower score.

Browse files
Files changed (1) hide show
  1. app.py +50 -19
app.py CHANGED
@@ -8,6 +8,7 @@ from phishing_datasets import submit_entry
8
  from url_tools import extract_urls, resolve_short_url, extract_domain_from_url
9
  from urlscan_client import UrlscanClient
10
  import requests
 
11
 
12
  app = FastAPI()
13
  urlscan = UrlscanClient()
@@ -68,14 +69,39 @@ def get_robots_txt():
68
 
69
  @app.post("/predict")
70
  def predict(model: InputModel) -> OutputModel:
 
71
  text = model.query.message.text
72
 
73
- print(f"Predict: {text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  urls = extract_urls(text)
76
 
77
  if urls:
78
- print("Searching for past scans")
 
79
  search_results = [urlscan.search(f"domain:{extract_domain_from_url(url)}") for url in urls]
80
 
81
  scan_results = []
@@ -87,32 +113,37 @@ def predict(model: InputModel) -> OutputModel:
87
  scan_results.append(scan_result)
88
 
89
  if not scan_results:
90
- print("Scanning...")
91
  scan_results = [urlscan.scan(url) for url in urls]
92
 
93
  for result in scan_results:
94
  overall = result.get('verdicts', {}).get('overall', {})
95
- print(f"Checking overall verdict: {overall}")
96
  if overall.get('hasVerdicts'):
97
  score = overall.get('score')
98
- print(f"Has verdicts score {score}")
99
 
100
  if 0 < overall.get('score'):
101
- print("Submitting entry and returning JUNK.")
102
- submit_entry(model.query.sender, model.query.message.text)
103
- return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
104
- # elif overall.get('score') < 0:
105
- # print("Returning ALLOW.")
106
- # return OutputModel(action=ActionModel.ALLOW, sub_action=SubActionModel.NONE)
107
-
108
- label = pipe(text)
109
- if label[0]['label'] == 'LABEL_1':
110
- print("Classify LABEL_1. Submitting entry and returning JUNK.")
111
- submit_entry(model.query.sender, model.query.message.text)
112
- return OutputModel(action=ActionModel.JUNK, sub_action=SubActionModel.NONE)
113
  else:
114
- print("Classify LABEL_0. Submitting entry and returning NONE.")
115
- return OutputModel(action=ActionModel.NONE, sub_action=SubActionModel.NONE)
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  class ReportModel(BaseModel):
118
  sender: str
 
8
  from url_tools import extract_urls, resolve_short_url, extract_domain_from_url
9
  from urlscan_client import UrlscanClient
10
  import requests
11
+ import re
12
 
13
  app = FastAPI()
14
  urlscan = UrlscanClient()
 
69
 
70
  @app.post("/predict")
71
  def predict(model: InputModel) -> OutputModel:
72
+ sender = model.query.sender
73
  text = model.query.message.text
74
 
75
+ print(f"[{sender}] {text}")
76
+
77
+ result = pipe(text)
78
+ label = result[0]['label']
79
+ score = result[0]['score']
80
+
81
+ print(f"classification {label} score {score}")
82
+
83
+ if label == 'LABEL_0':
84
+ score = 1 - score
85
+
86
+ commercial_sender_pattern = r'\b[2-8]\d{4}\b'
87
+ commercial_stop_pattern = r'\bSTOP(?:\s+SMS)?(?:\s+au)?\s+([2-8]\d{4})\b'
88
+ commercial_stop = False
89
+
90
+ if re.search(commercial_sender_pattern, sender):
91
+ print("commercial sender")
92
+ score = score - 0.1
93
+ if re.search(commercial_stop_pattern, text):
94
+ print("STOP founded")
95
+ score = score - 0.2
96
+ commercial_stop = True
97
+ else:
98
+ print("STOP missing")
99
 
100
  urls = extract_urls(text)
101
 
102
  if urls:
103
+ print(f"found URLs: {urls}")
104
+ print("searching for past scans")
105
  search_results = [urlscan.search(f"domain:{extract_domain_from_url(url)}") for url in urls]
106
 
107
  scan_results = []
 
113
  scan_results.append(scan_result)
114
 
115
  if not scan_results:
116
+ print("scanning...")
117
  scan_results = [urlscan.scan(url) for url in urls]
118
 
119
  for result in scan_results:
120
  overall = result.get('verdicts', {}).get('overall', {})
121
+ print(f"overall verdict: {overall}")
122
  if overall.get('hasVerdicts'):
123
  score = overall.get('score')
124
+ print(f"verdict score {score}")
125
 
126
  if 0 < overall.get('score'):
127
+ score = 1.0
128
+ break
129
+ elif overall.get('score') < 0:
130
+ score = score - 0.1
 
 
 
 
 
 
 
 
131
  else:
132
+ print(f"no URL found")
133
+ score = score - 0.1
134
+
135
+ print(f"final score {score}")
136
+ action = ActionModel.NONE
137
+ if score > 0.7:
138
+ action=ActionModel.JUNK
139
+ elif score > 0.5:
140
+ if commercial_stop:
141
+ action=ActionModel.PROMOTION
142
+ else:
143
+ action=ActionModel.JUNK
144
+
145
+ print(f"final action {action}")
146
+ return OutputModel(action=action, sub_action=SubActionModel.NONE)
147
 
148
  class ReportModel(BaseModel):
149
  sender: str