tgi params format -> lightllm server params format pub(crate) struct GenerateParameters { pub best_of: Option , pub temperature: Option , pub repetition_penalty: Option , pub frequency_penalty: Option , pub presence_penalty: Option<f
(params, num_beam: int = 1)
| 13 | |
| 14 | |
| 15 | def format_tgi_params(params, num_beam: int = 1): |
| 16 | """ |
| 17 | tgi params format -> lightllm server params format |
| 18 | pub(crate) struct GenerateParameters { |
| 19 | pub best_of: Option<usize>, |
| 20 | pub temperature: Option<f32>, |
| 21 | pub repetition_penalty: Option<f32>, |
| 22 | pub frequency_penalty: Option<f32>, |
| 23 | pub presence_penalty: Option<f32>, |
| 24 | pub top_k: Option<i32>, |
| 25 | pub top_p: Option<f32>, |
| 26 | pub typical_p: Option<f32>, |
| 27 | pub do_sample: bool, |
| 28 | pub max_new_tokens: u32, |
| 29 | pub return_full_text: Option<bool>, |
| 30 | pub stop: Vec<String>, |
| 31 | pub truncate: Option<usize>, |
| 32 | pub watermark: bool, |
| 33 | pub details: bool, |
| 34 | pub decoder_input_details: bool, |
| 35 | pub seed: Option<u64>, |
| 36 | } |
| 37 | """ |
| 38 | # same keys: temperature, repetition_penalty, frequency_penalty, presence_penalty, |
| 39 | # top_k, top_p, do_sample, max_new_tokens |
| 40 | # keys re-map |
| 41 | if "return_details" not in params: |
| 42 | params["return_details"] = params.pop("details", False) |
| 43 | if "stop_sequences" not in params: |
| 44 | params["stop_sequences"] = params.pop("stop", None) |
| 45 | # remove keys lightllm not used |
| 46 | params["best_of"] = num_beam |
| 47 | params.pop("typical_p", 0.0) |
| 48 | params.pop("return_full_text", False) |
| 49 | params.pop("stop", None) |
| 50 | params.pop("truncate", None) |
| 51 | params.pop("watermark", False) |
| 52 | params.pop("details", False) |
| 53 | params.pop("decoder_input_details", False) |
| 54 | params.pop("seed", 0) |
| 55 | params.pop("token_healing_top_k", 0) |
| 56 | params.pop("token_healing_unmerge_last_token", 0) |
| 57 | return params |
| 58 | |
| 59 | |
| 60 | async def tgi_generate_impl(request: Request, httpserver_manager: HttpServerManager) -> Response: |
no test coverage detected