(self, handler_kwargs: dict = None, **kwargs)
| 162 | super().__init__(handler, segments, **kwargs) |
| 163 | |
| 164 | def setup_data(self, handler_kwargs: dict = None, **kwargs): |
| 165 | super().setup_data(**kwargs) |
| 166 | |
| 167 | if handler_kwargs is not None: |
| 168 | self.handler.setup_data(**handler_kwargs) |
| 169 | |
| 170 | # pre-fetch data and change index to <code, date> |
| 171 | # NOTE: we will use inplace sort to reduce memory use |
| 172 | try: |
| 173 | df = self.handler._learn.copy() # use copy otherwise recorder will fail |
| 174 | # FIXME: currently we cannot support switching from `_learn` to `_infer` for inference |
| 175 | except Exception: |
| 176 | warnings.warn("cannot access `_learn`, will load raw data") |
| 177 | df = self.handler._data.copy() |
| 178 | df.index = df.index.swaplevel() |
| 179 | df.sort_index(inplace=True) |
| 180 | |
| 181 | # convert to numpy |
| 182 | self._data = df["feature"].values.astype("float32") |
| 183 | np.nan_to_num(self._data, copy=False) # NOTE: fillna in case users forget using the fillna processor |
| 184 | self._label = df["label"].squeeze().values.astype("float32") |
| 185 | self._index = df.index |
| 186 | |
| 187 | if self.input_size is not None and self.input_size != self._data.shape[1]: |
| 188 | warnings.warn("the data has different shape from input_size and the data will be reshaped") |
| 189 | assert self._data.shape[1] % self.input_size == 0, "data mismatch, please check `input_size`" |
| 190 | |
| 191 | # create batch slices |
| 192 | self._batch_slices = _create_ts_slices(self._index, self.seq_len) |
| 193 | |
| 194 | # create daily slices |
| 195 | daily_slices = {date: [] for date in sorted(self._index.unique(level=1))} # sorted by date |
| 196 | for i, (code, date) in enumerate(self._index): |
| 197 | daily_slices[date].append(self._batch_slices[i]) |
| 198 | self._daily_slices = np.array(list(daily_slices.values()), dtype="object") |
| 199 | self._daily_index = pd.Series(list(daily_slices.keys())) # index is the original date index |
| 200 | |
| 201 | # add memory (sample wise and daily) |
| 202 | if self.memory_mode == "sample": |
| 203 | self._memory = np.zeros((len(self._data), self.num_states), dtype=np.float32) |
| 204 | elif self.memory_mode == "daily": |
| 205 | self._memory = np.zeros((len(self._daily_index), self.num_states), dtype=np.float32) |
| 206 | else: |
| 207 | raise ValueError(f"invalid memory_mode `{self.memory_mode}`") |
| 208 | |
| 209 | # padding tensor |
| 210 | self._zeros = np.zeros((self.seq_len, max(self.num_states, self._data.shape[1])), dtype=np.float32) |
| 211 | |
| 212 | def _prepare_seg(self, slc, **kwargs): |
| 213 | fn = _get_date_parse_fn(self._index[0][1]) |
no test coverage detected