MCPcopy
hub / github.com/allenai/open-instruct / async_call

Method async_call

open_instruct/ground_truth_utils.py:750–820  ·  view source on GitHub ↗

Asynchronous version of __call__ that properly handles the async OpenAI client.

(
        self,
        tokenized_prediction: list[int],
        prediction: str,
        label: str,
        query: str,
        rollout_state: dict | None = None,
    )

Source from the content-addressed store, hash-verified

748 ) / 1_000_000
749
750 async def async_call(
751 self,
752 tokenized_prediction: list[int],
753 prediction: str,
754 label: str,
755 query: str,
756 rollout_state: dict | None = None,
757 ) -> VerificationResult:
758 """
759 Asynchronous version of __call__ that properly handles the async OpenAI client.
760 """
761 final_answer = extract_final_answer(prediction)
762 prompt = self.prompt_template.format(input=query, output=final_answer, label=label)
763
764 max_retries = 3 # for rate limits
765 retry_delay = 1.0
766
767 for attempt in range(max_retries):
768 # judges the quality of a response
769 try:
770 messages = build_messages(prompt)
771
772 # Check if the request would exceed context window
773 if not context_window_checker.check_context_window_limit(
774 messages=messages,
775 max_completion_tokens=self.verifier_config.llm_judge_max_tokens,
776 model_name=self.verifier_config.llm_judge_model,
777 max_context_length=self.verifier_config.llm_judge_max_context_length, # Adjust based on your model
778 safety_margin=150,
779 ):
780 # Try to truncate messages to fit
781 messages = context_window_checker.truncate_messages_to_fit_context(
782 messages=messages,
783 max_completion_tokens=self.verifier_config.llm_judge_max_tokens,
784 model_name=self.verifier_config.llm_judge_model,
785 max_context_length=self.verifier_config.llm_judge_max_context_length,
786 safety_margin=200,
787 )
788
789 # Check again after truncation
790 if not context_window_checker.check_context_window_limit(
791 messages=messages,
792 max_completion_tokens=self.verifier_config.llm_judge_max_tokens,
793 model_name=self.verifier_config.llm_judge_model,
794 max_context_length=self.verifier_config.llm_judge_max_context_length,
795 safety_margin=150,
796 ):
797 logger.error("Cannot fit request within context window even after truncation.")
798 return VerificationResult(score=0.0, cost=0.0, reasoning="Error: Context window exceeded")
799 # end of Faeze's context window check
800 response = await run_litellm_async_raw(
801 model_name=self.verifier_config.llm_judge_model,
802 messages=messages,
803 temperature=self.verifier_config.llm_judge_temperature,
804 max_completion_tokens=self.verifier_config.llm_judge_max_tokens,
805 seed=self.verifier_config.seed,
806 timeout=self.verifier_config.llm_judge_timeout,
807 )

Callers 1

__call__Method · 0.95

Calls 7

parse_completionMethod · 0.95
get_costMethod · 0.95
extract_final_answerFunction · 0.90
build_messagesFunction · 0.90
run_litellm_async_rawFunction · 0.90
VerificationResultClass · 0.85
sleepMethod · 0.80

Tested by

no test coverage detected