Generates content using vLLM API and executes any blocks found. Returns a dictionary containing the full reasoning process.
(
self,
prompt: str,
workspace: str,
temperature: float = 0.5,
max_tokens: int = 32768,
top_p: float = None,
top_k: int = None,
)
| 66 | return f"[Error]:\n{error_message.strip()}" |
| 67 | |
| 68 | def generate( |
| 69 | self, |
| 70 | prompt: str, |
| 71 | workspace: str, |
| 72 | temperature: float = 0.5, |
| 73 | max_tokens: int = 32768, |
| 74 | top_p: float = None, |
| 75 | top_k: int = None, |
| 76 | ) -> dict: |
| 77 | """ |
| 78 | Generates content using vLLM API and executes any <Code> blocks found. |
| 79 | Returns a dictionary containing the full reasoning process. |
| 80 | """ |
| 81 | original_cwd = os.getcwd() |
| 82 | os.chdir(workspace) |
| 83 | reasoning = "" |
| 84 | messages = [{"role": "user", "content": prompt}] |
| 85 | response_message = [] |
| 86 | |
| 87 | try: |
| 88 | for round_idx in range(self.max_rounds): |
| 89 | payload = { |
| 90 | "model": self.model_name, |
| 91 | "messages": messages, |
| 92 | "temperature": temperature, |
| 93 | "max_tokens": max_tokens, |
| 94 | "add_generation_prompt": False, |
| 95 | "stop": ["</Code>"], |
| 96 | } |
| 97 | if top_p is not None: |
| 98 | payload["top_p"] = top_p |
| 99 | if top_k is not None: |
| 100 | payload["top_k"] = top_k |
| 101 | |
| 102 | # Call vLLM API |
| 103 | response = requests.post( |
| 104 | self.api_url, |
| 105 | headers={"Content-Type": "application/json"}, |
| 106 | json=payload, |
| 107 | ) |
| 108 | response.raise_for_status() |
| 109 | response_data = response.json() |
| 110 | |
| 111 | ans = response_data["choices"][0]["message"]["content"] |
| 112 | if response_data["choices"][0].get("stop_reason") == "</Code>": |
| 113 | ans += "</Code>" |
| 114 | |
| 115 | response_message.append(ans) |
| 116 | |
| 117 | # Check for termination: only stop when <Answer> is present |
| 118 | if "<Answer>" in ans: |
| 119 | break |
| 120 | |
| 121 | # Check for <Code> block to execute |
| 122 | code_match = re.search(r"<Code>(.*?)</Code>", ans, re.DOTALL) |
| 123 | if not code_match: |
| 124 | # No <Code> and no <Answer>: intermediate step (e.g. <Analyze>). |
| 125 | # Append and continue so the model can produce <Code> next. |