(self, data: dict)
| 243 | return json.dumps(completion) |
| 244 | |
| 245 | def __call__(self, data: dict): |
| 246 | st = time.time() |
| 247 | try: |
| 248 | completion, choices = self.generate(data) |
| 249 | except InferenceServerException as exc: |
| 250 | # status: unavailable -- this happens if the `model` string is invalid |
| 251 | print(exc) |
| 252 | if exc.status() == 'StatusCode.UNAVAILABLE': |
| 253 | print( |
| 254 | f"WARNING: Model '{data['model']}' is not available. Please ensure that " |
| 255 | "`model` is set to either 'fastertransformer' or 'py-model' depending on " |
| 256 | "your installation" |
| 257 | ) |
| 258 | completion = {} |
| 259 | choices = [] |
| 260 | ed = time.time() |
| 261 | print(f"Returned completion in {(ed - st) * 1000} ms") |
| 262 | if data.get('stream', False): |
| 263 | return self.streamed_response(completion, choices) |
| 264 | else: |
| 265 | return self.non_streamed_response(completion, choices) |
nothing calls this directly
no test coverage detected