Clip & bin actions to *the last `n_bins` tokens* of the vocabulary (e.g., tokenizer.vocab[-256:]).
(self, action: np.ndarray)
| 36 | self.action_token_begin_idx: int = int(self.tokenizer.vocab_size - (self.n_bins + 1)) |
| 37 | |
| 38 | def __call__(self, action: np.ndarray) -> Union[str, List[str]]: |
| 39 | """Clip & bin actions to *the last `n_bins` tokens* of the vocabulary (e.g., tokenizer.vocab[-256:]).""" |
| 40 | action = np.clip(action, a_min=float(self.min_action), a_max=float(self.max_action)) |
| 41 | discretized_action = np.digitize(action, self.bins) |
| 42 | |
| 43 | # Handle single element vs. batch |
| 44 | if len(discretized_action.shape) == 1: |
| 45 | return self.tokenizer.decode(list(self.tokenizer.vocab_size - discretized_action)) |
| 46 | else: |
| 47 | return self.tokenizer.batch_decode((self.tokenizer.vocab_size - discretized_action).tolist()) |
| 48 | |
| 49 | def encode_actions_to_token_ids(self, action: np.ndarray) -> np.ndarray: |
| 50 | """Encode continuous actions to discrete action token IDs.""" |
nothing calls this directly
no test coverage detected