MCPcopy
hub / github.com/mudler/LocalAI / _predict

Method _predict

backend/python/vllm/backend.py:511–852  ·  view source on GitHub ↗
(self, request, context, streaming=False)

Source from the content-addressed store, hash-verified

509 return backend_pb2.ScoreResponse()
510
511 async def _predict(self, request, context, streaming=False):
512 # Build the sampling parameters
513 # NOTE: this must stay in sync with the vllm backend
514 request_to_sampling_params = {
515 "N": "n",
516 "PresencePenalty": "presence_penalty",
517 "FrequencyPenalty": "frequency_penalty",
518 "RepetitionPenalty": "repetition_penalty",
519 "Temperature": "temperature",
520 "TopP": "top_p",
521 "TopK": "top_k",
522 "MinP": "min_p",
523 "Seed": "seed",
524 "StopPrompts": "stop",
525 "StopTokenIds": "stop_token_ids",
526 "BadWords": "bad_words",
527 "IncludeStopStrInOutput": "include_stop_str_in_output",
528 "IgnoreEOS": "ignore_eos",
529 "Tokens": "max_tokens",
530 "MinTokens": "min_tokens",
531 "Logprobs": "logprobs",
532 "PromptLogprobs": "prompt_logprobs",
533 "SkipSpecialTokens": "skip_special_tokens",
534 "SpacesBetweenSpecialTokens": "spaces_between_special_tokens",
535 "TruncatePromptTokens": "truncate_prompt_tokens",
536 }
537
538 sampling_params = SamplingParams(top_p=0.9, max_tokens=200)
539
540 for request_field, param_field in request_to_sampling_params.items():
541 if hasattr(request, request_field):
542 value = getattr(request, request_field)
543 if value not in (None, 0, [], False):
544 setattr(sampling_params, param_field, value)
545
546 # Structured-output decoding: use Grammar field to pass JSON schema or BNF
547 if HAS_GUIDED_DECODING and request.Grammar:
548 try:
549 json.loads(request.Grammar) # valid JSON = JSON schema
550 sampling_params.structured_outputs = StructuredOutputsParams(json=request.Grammar)
551 except json.JSONDecodeError:
552 sampling_params.structured_outputs = StructuredOutputsParams(grammar=request.Grammar)
553
554 # Extract image paths and process images
555 prompt = request.Prompt
556
557 image_paths = request.Images
558 image_data = [self.load_image(img_path) for img_path in image_paths]
559
560 videos_path = request.Videos
561 video_data = [self.load_video(video_path) for video_path in videos_path]
562
563 # If tokenizer template is enabled and messages are provided instead of prompt, apply the tokenizer template
564 if not request.Prompt and request.UseTokenizerTemplate and request.Messages:
565 messages_dicts = self._messages_to_dicts(request.Messages)
566 template_kwargs = {"tokenize": False, "add_generation_prompt": True}
567
568 # Pass tools for tool calling

Callers 3

PredictMethod · 0.95
PredictStreamMethod · 0.95
runMethod · 0.45

Calls 11

load_imageMethod · 0.95
load_videoMethod · 0.95
_messages_to_dictsMethod · 0.95
generateMethod · 0.80
appendMethod · 0.80
nextFunction · 0.50
itemsMethod · 0.45
getMethod · 0.45
removeMethod · 0.45
extract_tool_callsMethod · 0.45

Tested by 1

runMethod · 0.36