()
| 30 | |
| 31 | |
| 32 | def predict_stocks(): |
| 33 | X_train, y_train = build_data_set() |
| 34 | # Remove the random_state parameter to generate actual predictions |
| 35 | clf = RandomForestClassifier(n_estimators=100, random_state=0) |
| 36 | clf.fit(X_train, y_train) |
| 37 | |
| 38 | # Now we get the actual data from which we want to generate predictions. |
| 39 | data = pd.read_csv("forward_sample.csv", index_col="Date") |
| 40 | data.dropna(axis=0, how="any", inplace=True) |
| 41 | features = data.columns[6:] |
| 42 | X_test = data[features].values |
| 43 | z = data["Ticker"].values |
| 44 | |
| 45 | # Get the predicted tickers |
| 46 | y_pred = clf.predict(X_test) |
| 47 | if sum(y_pred) == 0: |
| 48 | print("No stocks predicted!") |
| 49 | else: |
| 50 | invest_list = z[y_pred].tolist() |
| 51 | print( |
| 52 | f"{len(invest_list)} stocks predicted to outperform the S&P500 by more than {OUTPERFORMANCE}%:" |
| 53 | ) |
| 54 | print(" ".join(invest_list)) |
| 55 | return invest_list |
| 56 | |
| 57 | |
| 58 | if __name__ == "__main__": |
no test coverage detected