| 1 | # Make it more memory efficient by monkey patching the LLaMA model with FlashAttn. |
| 2 | |
| 3 | # Need to call this before importing transformers. |
| 4 | from fastchat.train.llama2_flash_attn_monkey_patch import ( |
nothing calls this directly
no test coverage detected