| 13 | |
| 14 | |
| 15 | class HPU_Accelerator(DeepSpeedAccelerator): |
| 16 | |
| 17 | def __init__(self): |
| 18 | self._name = 'hpu' |
| 19 | self._communication_backend_name = 'hccl' |
| 20 | self._compile_backend = "hpu_backend" |
| 21 | self.apply_hpu_workarounds() |
| 22 | try: |
| 23 | import habana_frameworks.torch.hpu as hpu |
| 24 | self.hpu = hpu |
| 25 | torch.use_deterministic_algorithms(True) |
| 26 | # TODO: remove this WA when memory mapping break is resolved. |
| 27 | torch.utils.deterministic.fill_uninitialized_memory = False |
| 28 | except ImportError as e: |
| 29 | raise ValueError( |
| 30 | "HPU_Accelerator requires habana_frameworks.torch.hpu, which is not installed on this system.") |
| 31 | |
| 32 | self.fp16_supported = None |
| 33 | |
| 34 | def apply_hpu_workarounds(self): |
| 35 | |
| 36 | def update_wa_env_var(key, value): |
| 37 | if key not in os.environ.keys(): |
| 38 | os.environ[key] = value |
| 39 | |
| 40 | update_wa_env_var("PT_HPU_LAZY_ACC_PAR_MODE", "0") |
| 41 | update_wa_env_var("PT_HPU_ENABLE_REFINE_DYNAMIC_SHAPES", "0") |
| 42 | |
| 43 | # Device APIs |
| 44 | def is_synchronized_device(self): |
| 45 | return False |
| 46 | |
| 47 | def use_host_timers(self): |
| 48 | return False |
| 49 | |
| 50 | def resolves_data_dependency(self): |
| 51 | return True |
| 52 | |
| 53 | def handles_memory_backpressure(self): |
| 54 | return True |
| 55 | |
| 56 | def device_name(self, device_index=None): |
| 57 | # ignoring device_index. |
| 58 | return 'hpu' |
| 59 | |
| 60 | def device(self, device_index=None): |
| 61 | return torch.device(self.device_name(device_index)) |
| 62 | |
| 63 | def set_device(self, device_index): |
| 64 | self.hpu.set_device(device_index) |
| 65 | |
| 66 | def current_device(self): |
| 67 | return (self.hpu.current_device()) |
| 68 | |
| 69 | def current_device_name(self): |
| 70 | return 'hpu:{}'.format(self.current_device()) |
| 71 | |
| 72 | def device_count(self): |
no outgoing calls
no test coverage detected
searching dependent graphs…