File size: 12,094 Bytes
b613c3c
 
 
 
 
 
 
 
 
 
0e1d4ae
b613c3c
 
 
 
 
 
0d34ea8
 
b613c3c
d2662cc
b613c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d34ea8
 
 
b613c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d34ea8
 
 
b613c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d34ea8
b613c3c
0d34ea8
 
 
 
b613c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e1d4ae
 
b613c3c
 
0e1d4ae
b613c3c
 
0e1d4ae
 
 
 
b613c3c
0e1d4ae
 
 
 
b613c3c
0e1d4ae
b613c3c
0e1d4ae
 
 
 
 
 
 
b613c3c
0e1d4ae
b613c3c
0e1d4ae
 
b613c3c
 
0e1d4ae
b613c3c
 
0e1d4ae
 
 
 
 
 
b613c3c
0e1d4ae
 
 
 
 
 
 
 
 
b613c3c
0e1d4ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b613c3c
0e1d4ae
 
 
 
 
b613c3c
0e1d4ae
 
 
 
 
 
 
 
b613c3c
0e1d4ae
 
 
 
b613c3c
0e1d4ae
 
 
b613c3c
 
0e1d4ae
b613c3c
0e1d4ae
 
 
b613c3c
0e1d4ae
 
 
b613c3c
0e1d4ae
 
b613c3c
0e1d4ae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
"""
System monitoring service for Video Model Studio.
Tracks system resources like CPU, memory, and other metrics.
"""

import os
import time
import logging
import platform
import threading
import pandas as pd
from datetime import datetime, timedelta
from collections import deque
from typing import Dict, List, Optional, Tuple, Any

import psutil

from vms.ui.monitoring.services.gpu import GPUMonitoringService

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

class MonitoringService:
    """Service for monitoring system resources and performance"""
    
    def __init__(self, history_minutes: int = 10, sample_interval: int = 5):
        """Initialize the monitoring service
        
        Args:
            history_minutes: How many minutes of history to keep
            sample_interval: How many seconds between samples
        """
        self.history_minutes = history_minutes
        self.sample_interval = sample_interval
        self.max_samples = (history_minutes * 60) // sample_interval
        
        # Initialize data structures for metrics
        self.timestamps = deque(maxlen=self.max_samples)
        self.cpu_percent = deque(maxlen=self.max_samples)
        self.memory_percent = deque(maxlen=self.max_samples)
        self.memory_used = deque(maxlen=self.max_samples)
        self.memory_available = deque(maxlen=self.max_samples)
        
        # CPU temperature history (might not be available on all systems)
        self.cpu_temp = deque(maxlen=self.max_samples)
        
        # Per-core CPU history
        self.cpu_cores_percent = {}
        
        # Initialize GPU monitoring service
        self.gpu = GPUMonitoringService(history_minutes=history_minutes, sample_interval=sample_interval)
        
        # Track if the monitoring thread is running
        self.is_running = False
        self.thread = None
        
        # Initialize with current values
        self.collect_metrics()
        
    def collect_metrics(self) -> Dict[str, Any]:
        """Collect current system metrics
        
        Returns:
            Dictionary of current metrics
        """
        metrics = {
            'timestamp': datetime.now(),
            'cpu_percent': psutil.cpu_percent(interval=0.1),
            'memory_percent': psutil.virtual_memory().percent,
            'memory_used': psutil.virtual_memory().used / (1024**3),  # GB
            'memory_available': psutil.virtual_memory().available / (1024**3),  # GB
            'cpu_temp': None,
            'per_cpu_percent': psutil.cpu_percent(interval=0.1, percpu=True)
        }
        
        # Try to get CPU temperature (platform specific)
        try:
            if platform.system() == 'Linux':
                # Try to get temperature from psutil
                temps = psutil.sensors_temperatures()
                for name, entries in temps.items():
                    if name.startswith(('coretemp', 'k10temp', 'cpu_thermal')):
                        metrics['cpu_temp'] = entries[0].current
                        break
            elif platform.system() == 'Darwin':  # macOS
                # On macOS, we could use SMC reader but it requires additional dependencies
                # Leaving as None for now
                pass
            elif platform.system() == 'Windows':
                # Windows might require WMI, leaving as None for simplicity
                pass
        except (AttributeError, KeyError, IndexError, NotImplementedError):
            # Sensors not available
            pass
        
        return metrics
    
    def update_history(self, metrics: Dict[str, Any]) -> None:
        """Update metric history with new values
        
        Args:
            metrics: New metrics to add to history
        """
        self.timestamps.append(metrics['timestamp'])
        self.cpu_percent.append(metrics['cpu_percent'])
        self.memory_percent.append(metrics['memory_percent'])
        self.memory_used.append(metrics['memory_used'])
        self.memory_available.append(metrics['memory_available'])
        
        if metrics['cpu_temp'] is not None:
            self.cpu_temp.append(metrics['cpu_temp'])
        
        # Update per-core CPU metrics
        for i, percent in enumerate(metrics['per_cpu_percent']):
            if i not in self.cpu_cores_percent:
                self.cpu_cores_percent[i] = deque(maxlen=self.max_samples)
            self.cpu_cores_percent[i].append(percent)
    
    def start_monitoring(self) -> None:
        """Start background thread for collecting metrics"""
        if self.is_running:
            logger.warning("Monitoring thread already running")
            return
            
        self.is_running = True

        # Start GPU monitoring if available
        self.gpu.start_monitoring()
        
        def _monitor_loop():
            while self.is_running:
                try:
                    metrics = self.collect_metrics()
                    self.update_history(metrics)
                    time.sleep(self.sample_interval)
                except Exception as e:
                    logger.error(f"Error in monitoring thread: {str(e)}", exc_info=True)
                    time.sleep(self.sample_interval)
        
        self.thread = threading.Thread(target=_monitor_loop, daemon=True)
        self.thread.start()
        logger.info("System monitoring thread started")
    
    def stop_monitoring(self) -> None:
        """Stop the monitoring thread"""
        if not self.is_running:
            return

        self.is_running = False

        # Stop GPU monitoring
        self.gpu.stop_monitoring()

        if self.thread:
            self.thread.join(timeout=1.0)
            logger.info("System monitoring thread stopped")
    
    def get_current_metrics(self) -> Dict[str, Any]:
        """Get current system metrics
        
        Returns:
            Dictionary with current system metrics
        """
        return self.collect_metrics()
    
    def get_system_info(self) -> Dict[str, Any]:
        """Get general system information
        
        Returns:
            Dictionary with system details
        """
        cpu_info = {
            'cores_physical': psutil.cpu_count(logical=False),
            'cores_logical': psutil.cpu_count(logical=True),
            'current_frequency': None,
            'architecture': platform.machine(),
        }
        
        # Try to get CPU frequency
        try:
            cpu_freq = psutil.cpu_freq()
            if cpu_freq:
                cpu_info['current_frequency'] = cpu_freq.current
        except Exception:
            pass
            
        memory_info = {
            'total': psutil.virtual_memory().total / (1024**3),  # GB
            'available': psutil.virtual_memory().available / (1024**3),  # GB
            'used': psutil.virtual_memory().used / (1024**3),  # GB
            'percent': psutil.virtual_memory().percent
        }
        
        disk_info = {}
        for part in psutil.disk_partitions(all=False):
            if os.name == 'nt' and ('cdrom' in part.opts or part.fstype == ''):
                # Skip CD-ROM drives on Windows
                continue
            try:
                usage = psutil.disk_usage(part.mountpoint)
                disk_info[part.mountpoint] = {
                    'total': usage.total / (1024**3),  # GB
                    'used': usage.used / (1024**3),  # GB
                    'free': usage.free / (1024**3),  # GB
                    'percent': usage.percent
                }
            except PermissionError:
                continue
        
        sys_info = {
            'system': platform.system(),
            'version': platform.version(),
            'platform': platform.platform(),
            'processor': platform.processor(),
            'hostname': platform.node(),
            'python_version': platform.python_version(),
            'uptime': time.time() - psutil.boot_time()
        }
        
        return {
            'cpu': cpu_info,
            'memory': memory_info,
            'disk': disk_info,
            'system': sys_info,
        }
    
    def get_cpu_data(self) -> pd.DataFrame:
        """Get CPU usage data as a DataFrame
        
        Returns:
            DataFrame with CPU usage data
        """
        if not self.timestamps:
            return pd.DataFrame({
            'time': list(),
            'CPU Usage (%)': list()
        })
            
        data = {
            'time': list(self.timestamps),
            'CPU Usage (%)': list(self.cpu_percent)
        }
        
        # Add temperature if available
        if self.cpu_temp and len(self.cpu_temp) > 0:
            # Ensure temperature data aligns with timestamps
            # If fewer temperature readings than timestamps, pad with None
            temp_data = list(self.cpu_temp)
            if len(temp_data) < len(self.timestamps):
                padding = [None] * (len(self.timestamps) - len(temp_data))
                temp_data = padding + temp_data
            data['CPU Temperature (°C)'] = temp_data
            
        return pd.DataFrame(data)
    
    def get_memory_data(self) -> pd.DataFrame:
        """Get memory usage data as a DataFrame
        
        Returns:
            DataFrame with memory usage data
        """
        if not self.timestamps:
            return pd.DataFrame({
            'time': list(),
            'Memory Usage (%)': list(),
            'Memory Used (GB)': list(),
            'Memory Available (GB)': list()
        })
            
        return pd.DataFrame({
            'time': list(self.timestamps),
            'Memory Usage (%)': list(self.memory_percent),
            'Memory Used (GB)': list(self.memory_used),
            'Memory Available (GB)': list(self.memory_available)
        })
    
    def get_per_core_data(self) -> Dict[int, pd.DataFrame]:
        """Get per-core CPU usage data as DataFrames
        
        Returns:
            Dictionary of DataFrames with per-core CPU usage data
        """
        if not self.timestamps or not self.cpu_cores_percent:
            return {}
            
        core_data = {}
        for core_id, percentages in self.cpu_cores_percent.items():
            # Ensure we don't have more data points than timestamps
            data_length = min(len(percentages), len(self.timestamps))
            core_data[core_id] = pd.DataFrame({
                'time': list(self.timestamps)[-data_length:],
                f'Core {core_id} Usage (%)': list(percentages)[-data_length:]
            })
            
        return core_data
        
    # Replace matplotlib methods with DataFrame methods
    
    # This method is kept for backward compatibility but returns a DataFrame
    def generate_cpu_plot(self) -> pd.DataFrame:
        """Get CPU usage data for plotting
        
        Returns:
            DataFrame with CPU usage data
        """
        return self.get_cpu_data()
    
    # This method is kept for backward compatibility but returns a DataFrame
    def generate_memory_plot(self) -> pd.DataFrame:
        """Get memory usage data for plotting
        
        Returns:
            DataFrame with memory usage data
        """
        return self.get_memory_data()
    
    # This method is kept for backward compatibility but returns a DataFrame of all cores
    def generate_per_core_plot(self) -> pd.DataFrame:
        """Get per-core CPU usage data for plotting
        
        Returns:
            Combined DataFrame with all cores' usage data
        """
        core_data = self.get_per_core_data()
        if not core_data:
            return pd.DataFrame()
            
        # Combine all core data into a single DataFrame using the first core's timestamps
        first_core_id = list(core_data.keys())[0]
        combined_df = core_data[first_core_id][['time']].copy()
        
        for core_id, df in core_data.items():
            combined_df[f'Core {core_id} Usage (%)'] = df[f'Core {core_id} Usage (%)']
            
        return combined_df