启动显存监控 Returns: bool: 是否成功启动
(self)
| 98 | pass |
| 99 | |
| 100 | def start(self): |
| 101 | """ |
| 102 | 启动显存监控 |
| 103 | |
| 104 | Returns: |
| 105 | bool: 是否成功启动 |
| 106 | """ |
| 107 | if self._running: |
| 108 | # print(f"GPU {self.gpu_index} 监控已经在运行中") |
| 109 | return False |
| 110 | |
| 111 | try: |
| 112 | # 重置峰值数据 |
| 113 | with self._lock: |
| 114 | self.peak_memory_usage = 0 |
| 115 | |
| 116 | # 记录开始时间 |
| 117 | self.start_time = datetime.now() |
| 118 | |
| 119 | # 启动监控线程 |
| 120 | self._running = True |
| 121 | self.monitor_thread = threading.Thread(target=self._monitor_loop) |
| 122 | self.monitor_thread.daemon = True |
| 123 | self.monitor_thread.start() |
| 124 | |
| 125 | # 验证线程已启动 |
| 126 | time.sleep(0.1) |
| 127 | if self.monitor_thread.is_alive(): |
| 128 | # print(f"GPU {self.gpu_index} 监控已启动") |
| 129 | return True |
| 130 | else: |
| 131 | self._running = False |
| 132 | # print(f"GPU {self.gpu_index} 监控线程启动失败") |
| 133 | return False |
| 134 | |
| 135 | except Exception as e: |
| 136 | print(f"启动GPU {self.gpu_index} 监控失败: {e}") |
| 137 | self._running = False |
| 138 | return False |
| 139 | |
| 140 | def stop(self, verbose=False): |
| 141 | """ |
no outgoing calls
no test coverage detected