Any code surrounded by with ENABLE_CUDA_KERNEL(): will invoke ppq's kernel functions for speed boost. This is a helper class for invoking highly-effcient custimized cuda kernel. PPQ developer team has implemented a series of quantization related cuda kernel, They are 5-100
| 913 | |
| 914 | |
| 915 | class ENABLE_CUDA_KERNEL: |
| 916 | """ Any code surrounded by |
| 917 | with ENABLE_CUDA_KERNEL(): |
| 918 | will invoke ppq's kernel functions for speed boost. |
| 919 | |
| 920 | This is a helper class for invoking highly-effcient custimized cuda |
| 921 | kernel. PPQ developer team has implemented a series of quantization related |
| 922 | cuda kernel, They are 5-100x faster than torch kernels, with less gpu |
| 923 | memory cost. |
| 924 | """ |
| 925 | def __init__(self) -> None: |
| 926 | from ppq.core.ffi import CUDA_COMPLIER |
| 927 | CUDA_COMPLIER.complie() |
| 928 | self._state = False |
| 929 | |
| 930 | def __enter__(self): |
| 931 | self._state = PPQ_CONFIG.USING_CUDA_KERNEL |
| 932 | PPQ_CONFIG.USING_CUDA_KERNEL = True |
| 933 | |
| 934 | def __exit__(self, *args): |
| 935 | PPQ_CONFIG.USING_CUDA_KERNEL = self._state |
| 936 | |
| 937 | |
| 938 | class DISABLE_CUDA_KERNEL: |
no outgoing calls
no test coverage detected