MilanM commited on
Commit
7a20245
·
verified ·
1 Parent(s): 3733ea8

Create new_templates/website_monitor_function_v2

Browse files
new_templates/website_monitor_function_v2 ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def website_monitor_function_v2():
2
+ import requests
3
+ import threading
4
+ import time
5
+ from datetime import datetime
6
+ from difflib import SequenceMatcher
7
+ import re
8
+
9
+ # Configuration
10
+ URL = "<add your target url here>" # Replace with your target URL
11
+ CHECK_INTERVAL = 4 # in seconds
12
+ CHANGE_THRESHOLD = 0.01 # 1% difference threshold
13
+
14
+ # Shared state variables
15
+ current_state = "Initializing..."
16
+ previous_html = None
17
+ last_check_time = None
18
+ monitor_thread = None
19
+
20
+ def extract_text_content(html):
21
+ """Extract text content from HTML, removing tags and normalizing whitespace"""
22
+ # Remove HTML tags
23
+ text = re.sub(r'<[^>]+>', ' ', html)
24
+ # Normalize whitespace
25
+ text = ' '.join(text.split())
26
+ return text.lower().strip()
27
+
28
+ def calculate_text_difference(text1, text2):
29
+ """Calculate percentage difference between two text strings"""
30
+ if not text1 and not text2:
31
+ return 0.0
32
+ if not text1 or not text2:
33
+ return 1.0
34
+
35
+ similarity = SequenceMatcher(None, text1, text2).ratio()
36
+ sim_score = 1.0 - similarity
37
+ return sim_score
38
+
39
+ def fetch_and_compare():
40
+ """Fetch URL content and compare with previous version"""
41
+ nonlocal current_state, previous_html, last_check_time
42
+
43
+ try:
44
+ response = requests.get(URL, timeout=30)
45
+ response.raise_for_status()
46
+ current_html = response.text
47
+ current_time = datetime.now()
48
+
49
+ if previous_html is None:
50
+ # First run
51
+ previous_html = current_html
52
+ last_check_time = current_time
53
+ current_state = f"Initial check completed at {current_time.strftime('%Y-%m-%d %H:%M:%S')}"
54
+ return
55
+
56
+ # Extract and compare text content
57
+ previous_text = extract_text_content(previous_html)
58
+ current_text = extract_text_content(current_html)
59
+
60
+ difference_ratio = calculate_text_difference(previous_text, current_text)
61
+
62
+ if difference_ratio >= CHANGE_THRESHOLD:
63
+ current_state = f"Changes occurred between {last_check_time.strftime('%Y-%m-%d %H:%M:%S')} - {current_time.strftime('%Y-%m-%d %H:%M:%S')}"
64
+ previous_html = current_html
65
+ else:
66
+ current_state = f"No changes occurred since {last_check_time.strftime('%Y-%m-%d %H:%M:%S')}"
67
+
68
+ last_check_time = current_time
69
+
70
+ except requests.RequestException as e:
71
+ current_state = f"Error fetching URL at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {str(e)}"
72
+ except Exception as e:
73
+ current_state = f"Unexpected error at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: {str(e)}"
74
+
75
+ def monitor_loop():
76
+ """Continuous monitoring loop"""
77
+ while True:
78
+ fetch_and_compare()
79
+ time.sleep(CHECK_INTERVAL)
80
+
81
+ def start_monitoring():
82
+ """Start the monitoring thread if not already running"""
83
+ nonlocal monitor_thread
84
+ if monitor_thread is None or not monitor_thread.is_alive():
85
+ monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
86
+ monitor_thread.start()
87
+
88
+ # Start monitoring when function is deployed
89
+ start_monitoring()
90
+
91
+ def score(input_data):
92
+ """Score function that returns current monitoring state"""
93
+ try:
94
+ # Extract any parameters from input if needed (optional)
95
+ # For now, just return current state
96
+
97
+ score_response = {
98
+ 'predictions': [{
99
+ 'fields': ['monitoring_state', 'check_interval_seconds', 'target_url', 'last_updated'],
100
+ 'values': [[
101
+ current_state,
102
+ CHECK_INTERVAL,
103
+ URL,
104
+ datetime.now().strftime('%Y-%m-%d %H:%M:%S')
105
+ ]]
106
+ }]
107
+ }
108
+
109
+ return score_response
110
+
111
+ except Exception as e:
112
+ error_response = {
113
+ 'predictions': [{
114
+ 'fields': ['error'],
115
+ 'values': [[f"Error in score function: {str(e)}"]]
116
+ }]
117
+ }
118
+ return error_response
119
+
120
+ return score
121
+
122
+ # Create the deployable score function
123
+ score = website_monitor_function_v2()