Reformats historical data, standardizes y and extra regressors, sets seasonalities and changepoints. Saves the preprocessed data to the instantiated object, and also returns the relevant components as a ModelInputData object.
(self, df: pd.DataFrame, **kwargs)
| 1119 | return k, m |
| 1120 | |
| 1121 | def preprocess(self, df: pd.DataFrame, **kwargs) -> ModelInputData: |
| 1122 | """ |
| 1123 | Reformats historical data, standardizes y and extra regressors, sets seasonalities and changepoints. |
| 1124 | |
| 1125 | Saves the preprocessed data to the instantiated object, and also returns the relevant components |
| 1126 | as a ModelInputData object. |
| 1127 | """ |
| 1128 | if ('ds' not in df) or ('y' not in df): |
| 1129 | raise ValueError( |
| 1130 | 'Dataframe must have columns "ds" and "y" with the dates and ' |
| 1131 | 'values respectively.' |
| 1132 | ) |
| 1133 | history = df[df['y'].notnull()].copy() |
| 1134 | if history.shape[0] < 2: |
| 1135 | raise ValueError('Dataframe has less than 2 non-NaN rows.') |
| 1136 | self.history_dates = pd.to_datetime(pd.Series(df['ds'].unique(), name='ds')).sort_values() |
| 1137 | |
| 1138 | self.history = self.setup_dataframe(history, initialize_scales=True) |
| 1139 | self.set_auto_seasonalities() |
| 1140 | seasonal_features, prior_scales, component_cols, modes = ( |
| 1141 | self.make_all_seasonality_features(self.history)) |
| 1142 | self.train_component_cols = component_cols |
| 1143 | self.component_modes = modes |
| 1144 | self.fit_kwargs = deepcopy(kwargs) |
| 1145 | |
| 1146 | self.set_changepoints() |
| 1147 | |
| 1148 | if self.growth in ['linear', 'flat']: |
| 1149 | cap = np.zeros(self.history.shape[0]) |
| 1150 | else: |
| 1151 | cap = self.history['cap_scaled'] |
| 1152 | |
| 1153 | return ModelInputData( |
| 1154 | T=self.history.shape[0], |
| 1155 | S=len(self.changepoints_t), |
| 1156 | K=seasonal_features.shape[1], |
| 1157 | tau=self.changepoint_prior_scale, |
| 1158 | trend_indicator=TrendIndicator[self.growth.upper()].value, |
| 1159 | y=self.history['y_scaled'], |
| 1160 | t=self.history['t'], |
| 1161 | t_change=self.changepoints_t, |
| 1162 | X=seasonal_features, |
| 1163 | sigmas=prior_scales, |
| 1164 | s_a=component_cols['additive_terms'], |
| 1165 | s_m=component_cols['multiplicative_terms'], |
| 1166 | cap=cap, |
| 1167 | ) |
| 1168 | |
| 1169 | def calculate_initial_params(self, num_total_regressors: int) -> ModelParams: |
| 1170 | """ |
no test coverage detected