mini-omni-s2s

Runtime error

App Files Files Community

mini-omni-s2s / slam_llm /utils /memory_utils.py

xcczach

Upload 73 files

35c1cfd verified about 1 year ago

raw

history blame contribute delete

2.42 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.

	import gc
	import psutil
	import threading

	import torch

	def byte2gb(x):
	return int(x / 2**30)
	# This context manager is used to track the peak memory usage of the process
	class MemoryTrace:
	def __enter__(self):
	gc.collect()
	torch.cuda.empty_cache()
	torch.cuda.reset_max_memory_allocated() # reset the peak gauge to zero
	self.begin = byte2gb(torch.cuda.memory_allocated())
	self.process = psutil.Process()
	self.cpu_begin = byte2gb(self.cpu_mem_used())
	self.peak_monitoring = True
	peak_monitor_thread = threading.Thread(target=self.peak_monitor_func)
	peak_monitor_thread.daemon = True
	peak_monitor_thread.start()
	return self

	def cpu_mem_used(self):
	"""get resident set size memory for the current process"""
	return self.process.memory_info().rss

	def peak_monitor_func(self):
	self.cpu_peak = -1

	while True:
	self.cpu_peak = max(self.cpu_mem_used(), self.cpu_peak)

	# can't sleep or will not catch the peak right (this comment is here on purpose)
	# time.sleep(0.001) # 1msec

	if not self.peak_monitoring:
	break

	def __exit__(self, *exc):
	self.peak_monitoring = False

	gc.collect()
	torch.cuda.empty_cache()
	self.end = byte2gb(torch.cuda.memory_allocated())
	self.peak = byte2gb(torch.cuda.max_memory_allocated())
	cuda_info = torch.cuda.memory_stats()
	self.peak_active_gb = byte2gb(cuda_info["active_bytes.all.peak"])
	self.cuda_malloc_retires = cuda_info.get("num_alloc_retries", 0)
	self.peak_active_gb = byte2gb(cuda_info["active_bytes.all.peak"])
	self.m_cuda_ooms = cuda_info.get("num_ooms", 0)
	self.used = byte2gb(self.end - self.begin)
	self.peaked = byte2gb(self.peak - self.begin)
	self.max_reserved = byte2gb(torch.cuda.max_memory_reserved())

	self.cpu_end = self.cpu_mem_used()
	self.cpu_used = byte2gb(self.cpu_end - self.cpu_begin)
	self.cpu_peaked = byte2gb(self.cpu_peak - self.cpu_begin)
	# print(f"delta used/peak {self.used:4d}/{self.peaked:4d}")