Prepare dataframe for fitting or predicting. Adds a time index and scales y. Creates auxiliary columns 't', 't_ix', 'y_scaled', and 'cap_scaled'. These columns are used during both fitting and predicting. Parameters ---------- df: pd.DataFrame with c
(self, df, initialize_scales=False)
| 265 | ) |
| 266 | |
| 267 | def setup_dataframe(self, df, initialize_scales=False): |
| 268 | """Prepare dataframe for fitting or predicting. |
| 269 | |
| 270 | Adds a time index and scales y. Creates auxiliary columns 't', 't_ix', |
| 271 | 'y_scaled', and 'cap_scaled'. These columns are used during both |
| 272 | fitting and predicting. |
| 273 | |
| 274 | Parameters |
| 275 | ---------- |
| 276 | df: pd.DataFrame with columns ds, y, and cap if logistic growth. Any |
| 277 | specified additional regressors must also be present. |
| 278 | initialize_scales: Boolean set scaling factors in self from df. |
| 279 | |
| 280 | Returns |
| 281 | ------- |
| 282 | pd.DataFrame prepared for fitting or predicting. |
| 283 | """ |
| 284 | if 'y' in df: # 'y' will be in training data |
| 285 | df['y'] = pd.to_numeric(df['y']) |
| 286 | if np.isinf(df['y'].values).any(): |
| 287 | raise ValueError('Found infinity in column y.') |
| 288 | if df['ds'].dtype == np.int64: |
| 289 | df['ds'] = df['ds'].astype(str) |
| 290 | df['ds'] = pd.to_datetime(df['ds']) |
| 291 | if df['ds'].dt.tz is not None: |
| 292 | raise ValueError( |
| 293 | 'Column ds has timezone specified, which is not supported. ' |
| 294 | 'Remove timezone.' |
| 295 | ) |
| 296 | if df['ds'].isnull().any(): |
| 297 | raise ValueError('Found NaN in column ds.') |
| 298 | for name in self.extra_regressors: |
| 299 | if name not in df: |
| 300 | raise ValueError( |
| 301 | 'Regressor {name!r} missing from dataframe' |
| 302 | .format(name=name) |
| 303 | ) |
| 304 | df[name] = pd.to_numeric(df[name]) |
| 305 | if df[name].isnull().any(): |
| 306 | raise ValueError( |
| 307 | 'Found NaN in column {name!r}'.format(name=name) |
| 308 | ) |
| 309 | for props in self.seasonalities.values(): |
| 310 | condition_name = props['condition_name'] |
| 311 | if condition_name is not None: |
| 312 | if condition_name not in df: |
| 313 | raise ValueError( |
| 314 | 'Condition {condition_name!r} missing from dataframe' |
| 315 | .format(condition_name=condition_name) |
| 316 | ) |
| 317 | if not df[condition_name].isin([True, False]).all(): |
| 318 | raise ValueError( |
| 319 | 'Found non-boolean in column {condition_name!r}' |
| 320 | .format(condition_name=condition_name) |
| 321 | ) |
| 322 | df[condition_name] = df[condition_name].astype('bool') |
| 323 | |
| 324 | if df.index.name == 'ds': |