MCPcopy Index your code
hub / github.com/robertmartin8/MachineLearningStocks / forward

Function forward

current_data.py:92–150  ·  view source on GitHub ↗

Creates the forward sample by parsing the current data html files that we downloaded in check_yahoo(). :return: a pandas dataframe containing all of the current data for each ticker.

()

Source from the content-addressed store, hash-verified

90
91
92def forward():
93 """
94 Creates the forward sample by parsing the current data html files that we downloaded in check_yahoo().
95 :return: a pandas dataframe containing all of the current data for each ticker.
96 """
97 # Creating an empty dataframe which we will later fill. In addition to the features, we need some index variables
98 # (date, unix timestamp, ticker), and of course the dependent variables (prices).
99 df_columns = [
100 "Date",
101 "Unix",
102 "Ticker",
103 "Price",
104 "stock_p_change",
105 "SP500",
106 "SP500_p_change",
107 ] + features
108
109 df = pd.DataFrame(columns=df_columns)
110
111 tickerfile_list = os.listdir("forward/")
112
113 # Required in macOS to remove the hidden index file.
114 if ".DS_Store" in tickerfile_list:
115 tickerfile_list.remove(".DS_Store")
116
117 # This is the actual parsing. This needs to be fixed every time yahoo changes their UI.
118 for tickerfile in tqdm(tickerfile_list, desc="Parsing progress:", unit="tickers"):
119 ticker = tickerfile.split(".html")[0].upper()
120 source = open(f"forward/{tickerfile}").read()
121 # Remove commas from the html to make parsing easier.
122 source = source.replace(",", "")
123
124 # Regex search for the different variables in the html file, then append to value_list
125 value_list = []
126 for variable in features:
127 try:
128 # Basically, look for the first number present after we an occurence of the variable
129 regex = (
130 r">"
131 + re.escape(variable)
132 + r".*?(\-?\d+\.*\d*K?M?B?|N/A[\\n|\s]*|>0|NaN)%?"
133 r"(</td>|</span>)"
134 )
135 value = re.search(regex, source, flags=re.DOTALL).group(1)
136
137 # Dealing with number formatting
138 value_list.append(data_string_to_float(value))
139
140 # The data may not be present. Process accordingly.
141 except AttributeError:
142 value_list.append("N/A")
143 # print(ticker, variable)
144
145 # Append the ticker and the features to the dataframe
146 new_df_row = [0, 0, ticker, 0, 0, 0, 0] + value_list
147
148 df = df.append(dict(zip(df_columns, new_df_row)), ignore_index=True)
149

Callers 1

current_data.pyFile · 0.85

Calls 1

data_string_to_floatFunction · 0.90

Tested by

no test coverage detected