A 3-stock trading environment. State: vector of size 7 (n_stock * 2 + 1) - # shares of stock 1 owned - # shares of stock 2 owned - # shares of stock 3 owned - price of stock 1 (using daily close price) - price of stock 2 - price of stock 3 - cash owned (can be used t
| 108 | |
| 109 | |
| 110 | class MultiStockEnv: |
| 111 | """ |
| 112 | A 3-stock trading environment. |
| 113 | State: vector of size 7 (n_stock * 2 + 1) |
| 114 | - # shares of stock 1 owned |
| 115 | - # shares of stock 2 owned |
| 116 | - # shares of stock 3 owned |
| 117 | - price of stock 1 (using daily close price) |
| 118 | - price of stock 2 |
| 119 | - price of stock 3 |
| 120 | - cash owned (can be used to purchase more stocks) |
| 121 | Action: categorical variable with 27 (3^3) possibilities |
| 122 | - for each stock, you can: |
| 123 | - 0 = sell |
| 124 | - 1 = hold |
| 125 | - 2 = buy |
| 126 | """ |
| 127 | def __init__(self, data, initial_investment=20000): |
| 128 | # data |
| 129 | self.stock_price_history = data |
| 130 | self.n_step, self.n_stock = self.stock_price_history.shape |
| 131 | |
| 132 | # instance attributes |
| 133 | self.initial_investment = initial_investment |
| 134 | self.cur_step = None |
| 135 | self.stock_owned = None |
| 136 | self.stock_price = None |
| 137 | self.cash_in_hand = None |
| 138 | |
| 139 | self.action_space = np.arange(3**self.n_stock) |
| 140 | |
| 141 | # action permutations |
| 142 | # returns a nested list with elements like: |
| 143 | # [0,0,0] |
| 144 | # [0,0,1] |
| 145 | # [0,0,2] |
| 146 | # [0,1,0] |
| 147 | # [0,1,1] |
| 148 | # etc. |
| 149 | # 0 = sell |
| 150 | # 1 = hold |
| 151 | # 2 = buy |
| 152 | self.action_list = list(map(list, itertools.product([0, 1, 2], repeat=self.n_stock))) |
| 153 | |
| 154 | # calculate size of state |
| 155 | self.state_dim = self.n_stock * 2 + 1 |
| 156 | |
| 157 | self.reset() |
| 158 | |
| 159 | |
| 160 | def reset(self): |
| 161 | self.cur_step = 0 |
| 162 | self.stock_owned = np.zeros(self.n_stock) |
| 163 | self.stock_price = self.stock_price_history[self.cur_step] |
| 164 | self.cash_in_hand = self.initial_investment |
| 165 | return self._get_obs() |
| 166 | |
| 167 |