Create new_templates/website_monitor_function_v2
Browse files
new_templates/website_monitor_function_v2
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def website_monitor_function_v2():
|
2 |
+
import requests
|
3 |
+
import threading
|
4 |
+
import time
|
5 |
+
from datetime import datetime
|
6 |
+
from difflib import SequenceMatcher
|
7 |
+
import re
|
8 |
+
|
9 |
+
# Configuration
|
10 |
+
URL = "<add your target url here>" # Replace with your target URL
|
11 |
+
CHECK_INTERVAL = 4 # in seconds
|
12 |
+
CHANGE_THRESHOLD = 0.01 # 1% difference threshold
|
13 |
+
|
14 |
+
# Shared state variables
|
15 |
+
current_state = "Initializing..."
|
16 |
+
previous_html = None
|
17 |
+
last_check_time = None
|
18 |
+
monitor_thread = None
|
19 |
+
|
20 |
+
def extract_text_content(html):
|
21 |
+
"""Extract text content from HTML, removing tags and normalizing whitespace"""
|
22 |
+
# Remove HTML tags
|
23 |
+
text = re.sub(r'<[^>]+>', ' ', html)
|
24 |
+
# Normalize whitespace
|
25 |
+
text = ' '.join(text.split())
|
26 |
+
return text.lower().strip()
|
27 |
+
|
28 |
+
def calculate_text_difference(text1, text2):
|
29 |
+
"""Calculate percentage difference between two text strings"""
|
30 |
+
if not text1 and not text2:
|
31 |
+
return 0.0
|
32 |
+
if not text1 or not text2:
|
33 |
+
return 1.0
|
34 |
+
|
35 |
+
similarity = SequenceMatcher(None, text1, text2).ratio()
|
36 |
+
sim_score = 1.0 - similarity
|
37 |
+
return sim_score
|
38 |
+
|
39 |
+
def fetch_and_compare():
|
40 |
+
"""Fetch URL content and compare with previous version"""
|
41 |
+
nonlocal current_state, previous_html, last_check_time
|
42 |
+
|
43 |
+
try:
|
44 |
+
response = requests.get(URL, timeout=30)
|
45 |
+
response.raise_for_status()
|
46 |
+
current_html = response.text
|
47 |
+
current_time = datetime.now()
|
48 |
+
|
49 |
+
if previous_html is None:
|
50 |
+
# First run
|
51 |
+
previous_html = current_html
|
52 |
+
last_check_time = current_time
|
53 |
+
current_state = f"Initial check completed at {current_time.strftime('%Y-%m-%d %H:%M:%S')}"
|
54 |
+
return
|
55 |
+
|
56 |
+
# Extract and compare text content
|
57 |
+
previous_text = extract_text_content(previous_html)
|
58 |
+
current_text = extract_text_content(current_html)
|
59 |
+
|
60 |
+
difference_ratio = calculate_text_difference(previous_text, current_text)
|
61 |
+
|
62 |
+
if difference_ratio >= CHANGE_THRESHOLD:
|
63 |
+
current_state = f"Changes occurred between {last_check_time.strftime('%Y-%m-%d %H:%M:%S')} - {current_time.strftime('%Y-%m-%d %H:%M:%S')}"
|
64 |
+
previous_html = current_html
|
65 |
+
else:
|
66 |
+
current_state = f"No changes occurred since {last_check_time.strftime('%Y-%m-%d %H:%M:%S')}"
|
67 |
+
|
68 |
+
last_check_time = current_time
|
69 |
+
|
70 |
+
except requests.RequestException as e:
|
71 |
+
current_state = f"Error fetching URL at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {str(e)}"
|
72 |
+
except Exception as e:
|
73 |
+
current_state = f"Unexpected error at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {str(e)}"
|
74 |
+
|
75 |
+
def monitor_loop():
|
76 |
+
"""Continuous monitoring loop"""
|
77 |
+
while True:
|
78 |
+
fetch_and_compare()
|
79 |
+
time.sleep(CHECK_INTERVAL)
|
80 |
+
|
81 |
+
def start_monitoring():
|
82 |
+
"""Start the monitoring thread if not already running"""
|
83 |
+
nonlocal monitor_thread
|
84 |
+
if monitor_thread is None or not monitor_thread.is_alive():
|
85 |
+
monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
|
86 |
+
monitor_thread.start()
|
87 |
+
|
88 |
+
# Start monitoring when function is deployed
|
89 |
+
start_monitoring()
|
90 |
+
|
91 |
+
def score(input_data):
|
92 |
+
"""Score function that returns current monitoring state"""
|
93 |
+
try:
|
94 |
+
# Extract any parameters from input if needed (optional)
|
95 |
+
# For now, just return current state
|
96 |
+
|
97 |
+
score_response = {
|
98 |
+
'predictions': [{
|
99 |
+
'fields': ['monitoring_state', 'check_interval_seconds', 'target_url', 'last_updated'],
|
100 |
+
'values': [[
|
101 |
+
current_state,
|
102 |
+
CHECK_INTERVAL,
|
103 |
+
URL,
|
104 |
+
datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
105 |
+
]]
|
106 |
+
}]
|
107 |
+
}
|
108 |
+
|
109 |
+
return score_response
|
110 |
+
|
111 |
+
except Exception as e:
|
112 |
+
error_response = {
|
113 |
+
'predictions': [{
|
114 |
+
'fields': ['error'],
|
115 |
+
'values': [[f"Error in score function: {str(e)}"]]
|
116 |
+
}]
|
117 |
+
}
|
118 |
+
return error_response
|
119 |
+
|
120 |
+
return score
|
121 |
+
|
122 |
+
# Create the deployable score function
|
123 |
+
score = website_monitor_function_v2()
|