Reads the keystats.csv file and prepares it for scikit-learn :return: X_train and y_train numpy arrays
()
| 8 | |
| 9 | |
| 10 | def build_data_set(): |
| 11 | """ |
| 12 | Reads the keystats.csv file and prepares it for scikit-learn |
| 13 | :return: X_train and y_train numpy arrays |
| 14 | """ |
| 15 | training_data = pd.read_csv("keystats.csv", index_col="Date") |
| 16 | training_data.dropna(axis=0, how="any", inplace=True) |
| 17 | features = training_data.columns[6:] |
| 18 | |
| 19 | X_train = training_data[features].values |
| 20 | # Generate the labels: '1' if a stock beats the S&P500 by more than 10%, else '0'. |
| 21 | y_train = list( |
| 22 | status_calc( |
| 23 | training_data["stock_p_change"], |
| 24 | training_data["SP500_p_change"], |
| 25 | OUTPERFORMANCE, |
| 26 | ) |
| 27 | ) |
| 28 | |
| 29 | return X_train, y_train |
| 30 | |
| 31 | |
| 32 | def predict_stocks(): |
no test coverage detected