Calculate different stats for future data to show whether the anomalous stocks found were actually better than non-anomalous ones
(self, predictions_with_output_data)
| 291 | print("Results stored successfully in", file_name) |
| 292 | |
| 293 | def calculate_future_stats(self, predictions_with_output_data): |
| 294 | """ |
| 295 | Calculate different stats for future data to show whether the anomalous stocks found were actually better than non-anomalous ones |
| 296 | """ |
| 297 | future_change = [] |
| 298 | anomalous_score = [] |
| 299 | historical_volatilities = [] |
| 300 | future_volatilities = [] |
| 301 | |
| 302 | for item in predictions_with_output_data: |
| 303 | prediction, symbol, historical_price, future_price = item |
| 304 | future_sum_percentage_change, future_volatility = self.calculate_future_performance(future_price) |
| 305 | _, _, historical_volatility = self.calculate_recent_volatility(historical_price) |
| 306 | |
| 307 | # Skip for when there is a reverse split, the yfinance package does not handle that well so percentages get weirdly large |
| 308 | if abs(future_sum_percentage_change) > 250 or self.is_nan(future_sum_percentage_change) == True or self.is_nan(prediction) == True: |
| 309 | continue |
| 310 | |
| 311 | future_change.append(future_sum_percentage_change) |
| 312 | anomalous_score.append(prediction) |
| 313 | future_volatilities.append(future_volatility) |
| 314 | historical_volatilities.append(historical_volatility) |
| 315 | |
| 316 | # Calculate correlation and stats |
| 317 | correlation = np.corrcoef(anomalous_score, future_change)[0, 1] |
| 318 | anomalous_future_changes = np.mean([future_change[x] for x in range(0, len(future_change)) if anomalous_score[x] < 0]) # Anything less than 0 is considered anomalous |
| 319 | normal_future_changes = np.mean([future_change[x] for x in range(0, len(future_change)) if anomalous_score[x] >= 0]) |
| 320 | anomalous_future_volatilities = np.mean([future_volatilities[x] for x in range(0, len(future_volatilities)) if anomalous_score[x] < 0]) # Anything less than 0 is considered anomalous |
| 321 | normal_future_volatilities = np.mean([future_volatilities[x] for x in range(0, len(future_volatilities)) if anomalous_score[x] >= 0]) |
| 322 | anomalous_historical_volatilities = np.mean([historical_volatilities[x] for x in range(0, len(historical_volatilities)) if anomalous_score[x] < 0]) # Anything less than 0 is considered anomalous |
| 323 | normal_historical_volatilities = np.mean([historical_volatilities[x] for x in range(0, len(historical_volatilities)) if anomalous_score[x] >= 0]) |
| 324 | |
| 325 | print("\n*************** Future Performance ***************") |
| 326 | print("Correlation between future absolute change vs anomalous score (lower is better, range = (-1, 1)): **%.2f**\nTotal absolute change in future for Anomalous Stocks: **%.3f**\nTotal absolute change in future for Normal Stocks: **%.3f**\nAverage future volatility of Anomalous Stocks: **%.3f**\nAverage future volatility of Normal Stocks: **%.3f**\nHistorical volatility for Anomalous Stocks: **%.3f**\nHistorical volatility for Normal Stocks: **%.3f**\n" % ( |
| 327 | correlation, |
| 328 | anomalous_future_changes, normal_future_changes, |
| 329 | anomalous_future_volatilities, normal_future_volatilities, |
| 330 | anomalous_historical_volatilities, normal_historical_volatilities)) |
| 331 | |
| 332 | # Plot |
| 333 | FONT_SIZE = 14 |
| 334 | colors = ['#c91414' if anomalous_score[x] < 0 else '#035AA6' for x in range(0, len(anomalous_score))] |
| 335 | anomalous_vs_normal = np.array([1 if anomalous_score[x] < 0 else 0 for x in range(0, len(anomalous_score))]) |
| 336 | plt.scatter(np.array(anomalous_score)[anomalous_vs_normal == 1], np.array(future_change)[anomalous_vs_normal == 1], marker='v', color = '#c91414') |
| 337 | plt.scatter(np.array(anomalous_score)[anomalous_vs_normal == 0], np.array(future_change)[anomalous_vs_normal == 0], marker='P', color = '#035AA6') |
| 338 | plt.axvline(x = 0, linestyle = '--', color = '#848484') |
| 339 | plt.xlabel("Anomaly Score", fontsize = FONT_SIZE) |
| 340 | plt.ylabel("Absolute Future Change", fontsize = FONT_SIZE) |
| 341 | plt.xticks(fontsize = FONT_SIZE) |
| 342 | plt.yticks(fontsize = FONT_SIZE) |
| 343 | plt.legend(["Anomalous", "Normal"], fontsize = FONT_SIZE) |
| 344 | plt.title("Absolute Future Change", fontsize = FONT_SIZE) |
| 345 | plt.tight_layout() |
| 346 | plt.grid() |
| 347 | plt.show() |
| 348 | |
| 349 | |
| 350 | # Check arguments |
no test coverage detected