| 1655 | os.kill(os.getpid(), signal.SIGKILL) |
| 1656 | |
| 1657 | def _watchdog(): |
| 1658 | time.sleep(grace) |
| 1659 | # First: write to a file we can recover after the kill. Each |
| 1660 | # of the steps below is wrapped because any of them may hang |
| 1661 | # or raise on a corrupted interpreter state. |
| 1662 | try: |
| 1663 | # pylint: disable=resource-leakage |
| 1664 | # See note in ``_hard_kill``: stdlib ``open`` is |
| 1665 | # intentional here. |
| 1666 | with open(dump_path, "w", encoding="utf-8") as fp: |
| 1667 | fp.write( |
| 1668 | "========== SALT EXIT WATCHDOG ==========\n" |
| 1669 | f"pytest session finished {grace}s ago but the process\n" |
| 1670 | "has not exited. Dumping all-thread tracebacks before\n" |
| 1671 | "forcing termination.\n" |
| 1672 | "========================================\n\n" |
| 1673 | ) |
| 1674 | try: |
| 1675 | faulthandler.dump_traceback(file=fp, all_threads=True) |
| 1676 | except Exception as exc: # pylint: disable=broad-except |
| 1677 | fp.write(f"\nfaulthandler.dump_traceback failed: {exc!r}\n") |
| 1678 | # Best-effort child enumeration to the same file. |
| 1679 | try: |
| 1680 | import psutil # pylint: disable=import-outside-toplevel |
| 1681 | |
| 1682 | me = psutil.Process() |
| 1683 | children = me.children(recursive=True) |
| 1684 | fp.write(f"\n{len(children)} child process(es) still alive:\n") |
| 1685 | for child in children: |
| 1686 | try: |
| 1687 | fp.write( |
| 1688 | f" pid={child.pid} ppid={child.ppid()} " |
| 1689 | f"status={child.status()} " |
| 1690 | f"cmdline={' '.join(child.cmdline()[:6])!r}\n" |
| 1691 | ) |
| 1692 | except (psutil.NoSuchProcess, psutil.AccessDenied): |
| 1693 | continue |
| 1694 | except Exception as exc: # pylint: disable=broad-except |
| 1695 | fp.write(f"\npsutil enumeration failed: {exc!r}\n") |
| 1696 | except OSError: |
| 1697 | pass |
| 1698 | |
| 1699 | # Best-effort stderr banner so the job log shows that the |
| 1700 | # watchdog fired (even if the actual dump is in the artifact |
| 1701 | # file). Wrap in try because stderr may be captured and |
| 1702 | # blocking. |
| 1703 | try: |
| 1704 | sys.stderr.write( |
| 1705 | "\n\n========== SALT EXIT WATCHDOG ==========\n" |
| 1706 | f"pytest session finished {grace}s ago and the process did\n" |
| 1707 | "not exit cleanly. See artifacts/logs/" |
| 1708 | "exit-watchdog-traceback.log for the all-thread traceback\n" |
| 1709 | f"and child-process inventory. Forcing os._exit({exitstatus}).\n" |
| 1710 | "If THIS message is the last thing in the job log, the\n" |
| 1711 | f"SIGKILL backstop will fire in {hard_kill_extra}s.\n" |
| 1712 | "========================================\n\n" |
| 1713 | ) |
| 1714 | sys.stderr.flush() |