MCPcopy
hub / github.com/tinygrad/tinygrad / alloc

Method alloc

tinygrad/runtime/support/system.py:258–272  ·  view source on GitHub ↗
(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, force_devmem=False, **kwargs)

Source from the content-addressed store, hash-verified

256 self.dev, self.vram_bar, self.count = dev, vram_bar, len(hcq_filter_visible_devices(System.list_devices(vendor, devices, base_class), dn))
257
258 def alloc(self, size:int, host=False, uncached=False, cpu_access=False, contiguous=False, force_devmem=False, **kwargs) -> HCQBuffer:
259 should_use_sysmem = host or ((cpu_access if self.is_bar_small() else (uncached and cpu_access)) and not force_devmem)
260
261 # Align size to huge pages for large allocations, otherwise the unaligned tail falls back to 4KB pages, increasing TLB pressure.
262 size = round_up(size, mmap.PAGESIZE if should_use_sysmem else ((2 << 20) if size >= (8 << 20) else (4 << 10)))
263
264 if should_use_sysmem:
265 vaddr = self.dev_impl.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
266 memview, paddrs = self.pci_dev.alloc_sysmem(size, vaddr=vaddr, contiguous=contiguous)
267 mapping = self.dev_impl.mm.map_range(vaddr, size, [(paddr, 0x1000) for paddr in paddrs], aspace=AddrSpace.SYS, snooped=True, uncached=True)
268 return HCQBuffer(vaddr, size, meta=PCIAllocationMeta(mapping, has_cpu_mapping=True, hMemory=paddrs[0]), view=memview, owner=self.dev)
269
270 mapping = self.dev_impl.mm.valloc(size:=round_up(size, 0x1000), uncached=uncached, contiguous=cpu_access)
271 barview = self.pci_dev.map_bar(bar=self.vram_bar, off=mapping.paddrs[0][0], size=mapping.size) if cpu_access else None
272 return HCQBuffer(mapping.va_addr, size, view=barview, meta=PCIAllocationMeta(mapping, cpu_access, hMemory=mapping.paddrs[0][0]), owner=self.dev)
273
274 def free(self, b:HCQBuffer):
275 if b.owner != self.dev: self.dev.iface.dev_impl.mm.unmap_range(b.va_addr, b.size)

Callers 8

__init__Method · 0.45
__init__Method · 0.45
fill_kernargsMethod · 0.45
__init__Method · 0.45
new_signalMethod · 0.45
_reallocMethod · 0.45
alloc_sysmemMethod · 0.45
__init__Method · 0.45

Calls 9

is_bar_smallMethod · 0.95
round_upFunction · 0.90
HCQBufferClass · 0.90
PCIAllocationMetaClass · 0.85
alloc_vaddrMethod · 0.80
vallocMethod · 0.80
alloc_sysmemMethod · 0.45
map_rangeMethod · 0.45
map_barMethod · 0.45

Tested by

no test coverage detected