MCPcopy
hub / github.com/microsoft/qlib / process_data

Method process_data

qlib/data/dataset/handler.py:553–613  ·  view source on GitHub ↗

process_data data. Fun `processor.fit` if necessary Notation: (data) [processor] # data processing flow of self.process_type == DataHandlerLP.PTYPE_I .. code-block:: text (self._data)-[shared_processors]-(_shared_df)-[learn_processors]-(_learn_df)

(self, with_fit: bool = False)

Source from the content-addressed store, hash-verified

551 return True
552
553 def process_data(self, with_fit: bool = False):
554 """
555 process_data data. Fun `processor.fit` if necessary
556
557 Notation: (data) [processor]
558
559 # data processing flow of self.process_type == DataHandlerLP.PTYPE_I
560
561 .. code-block:: text
562
563 (self._data)-[shared_processors]-(_shared_df)-[learn_processors]-(_learn_df)
564 \\
565 -[infer_processors]-(_infer_df)
566
567 # data processing flow of self.process_type == DataHandlerLP.PTYPE_A
568
569 .. code-block:: text
570
571 (self._data)-[shared_processors]-(_shared_df)-[infer_processors]-(_infer_df)-[learn_processors]-(_learn_df)
572
573 Parameters
574 ----------
575 with_fit : bool
576 The input of the `fit` will be the output of the previous processor
577 """
578 # shared data processors
579 # 1) assign
580 _shared_df = self._data
581 if not self._is_proc_readonly(self.shared_processors): # avoid modifying the original data
582 _shared_df = _shared_df.copy()
583 # 2) process
584 _shared_df = self._run_proc_l(_shared_df, self.shared_processors, with_fit=with_fit, check_for_infer=True)
585
586 # data for inference
587 # 1) assign
588 _infer_df = _shared_df
589 if not self._is_proc_readonly(self.infer_processors): # avoid modifying the original data
590 _infer_df = _infer_df.copy()
591 # 2) process
592 _infer_df = self._run_proc_l(_infer_df, self.infer_processors, with_fit=with_fit, check_for_infer=True)
593
594 self._infer = _infer_df
595
596 # data for learning
597 # 1) assign
598 if self.process_type == DataHandlerLP.PTYPE_I:
599 _learn_df = _shared_df
600 elif self.process_type == DataHandlerLP.PTYPE_A:
601 # based on `infer_df` and append the processor
602 _learn_df = _infer_df
603 else:
604 raise NotImplementedError(f"This type of input is not supported")
605 if not self._is_proc_readonly(self.learn_processors): # avoid modifying the original data
606 _learn_df = _learn_df.copy()
607 # 2) process
608 _learn_df = self._run_proc_l(_learn_df, self.learn_processors, with_fit=with_fit, check_for_infer=False)
609
610 self._learn = _learn_df

Callers 2

fit_process_dataMethod · 0.95
setup_dataMethod · 0.95

Calls 3

_is_proc_readonlyMethod · 0.95
_run_proc_lMethod · 0.95
copyMethod · 0.80

Tested by

no test coverage detected