| 286 | ) |
| 287 | |
| 288 | def _executor(self, file_path: Path): |
| 289 | file_path = Path(file_path) |
| 290 | |
| 291 | # some symbol_field values such as TRUE, NA are decoded as True(bool), NaN(np.float) by pandas default csv parsing. |
| 292 | # manually defines dtype and na_values of the symbol_field. |
| 293 | default_na = pd._libs.parsers.STR_NA_VALUES # pylint: disable=I1101 |
| 294 | symbol_na = default_na.copy() |
| 295 | symbol_na.remove("NA") |
| 296 | columns = pd.read_csv(file_path, nrows=0).columns |
| 297 | df = pd.read_csv( |
| 298 | file_path, |
| 299 | dtype={self._symbol_field_name: str}, |
| 300 | keep_default_na=False, |
| 301 | na_values={col: symbol_na if col == self._symbol_field_name else default_na for col in columns}, |
| 302 | ) |
| 303 | |
| 304 | # NOTE: It has been reported that there may be some problems here, and the specific issues will be dealt with when they are identified. |
| 305 | df = self._normalize_obj.normalize(df) |
| 306 | if df is not None and not df.empty: |
| 307 | if self._end_date is not None: |
| 308 | _mask = pd.to_datetime(df[self._date_field_name]) <= pd.Timestamp(self._end_date) |
| 309 | df = df[_mask] |
| 310 | df.to_csv(self._target_dir.joinpath(file_path.name), index=False) |
| 311 | |
| 312 | def normalize(self): |
| 313 | logger.info("normalize data......") |