MCPcopy Index your code
hub / github.com/Turing-Project/WriteGPT / ErrorHandler

Class ErrorHandler

LanguageNetwork/BERT/train.py:79–112  ·  view source on GitHub ↗

A class that listens for exceptions in children processes and propagates the tracebacks to the parent process.

Source from the content-addressed store, hash-verified

77
78
79class ErrorHandler(object):
80 """A class that listens for exceptions in children processes and propagates
81 the tracebacks to the parent process."""
82
83 def __init__(self, error_queue):
84 """ init error handler """
85 import signal
86 import threading
87 self.error_queue = error_queue
88 self.children_pids = []
89 self.error_thread = threading.Thread(
90 target=self.error_listener, daemon=True)
91 self.error_thread.start()
92 signal.signal(signal.SIGUSR1, self.signal_handler)
93
94 def add_child(self, pid):
95 """ error handler """
96 self.children_pids.append(pid)
97
98 def error_listener(self):
99 """ error listener """
100 (rank, original_trace) = self.error_queue.get()
101 self.error_queue.put((rank, original_trace))
102 os.kill(os.getpid(), signal.SIGUSR1)
103
104 def signal_handler(self, signalnum, stackframe):
105 """ signal handler """
106 for pid in self.children_pids:
107 os.kill(pid, signal.SIGINT) # kill children processes
108 (rank, original_trace) = self.error_queue.get()
109 msg = """\n\n-- Tracebacks above this line can probably
110 be ignored --\n\n"""
111 msg += original_trace
112 raise Exception(msg)
113
114
115class Running(object):

Callers 1

multi_card_runMethod · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected