Disable using speculative decoding for subsequent generations.
(self)
| 367 | self.use_spec_dec = True |
| 368 | |
| 369 | def disable_spec_dec(self) -> None: |
| 370 | """Disable using speculative decoding for subsequent generations.""" |
| 371 | self.request_handler.unset_spec_dec_mode() |
| 372 | # set back to the maximum number of tokens to speculate |
| 373 | self.n_spec_tokens = self.inference_config.max_n_spec_tokens |
| 374 | self.use_glide = False |
| 375 | self.use_spec_dec = False |
| 376 | |
| 377 | def clear_spec_dec(self) -> None: |
| 378 | """Clear relatable structures of speculative decoding, if exist.""" |