(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda)
| 131 | |
| 132 | # 梯度 |
| 133 | def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda): |
| 134 | length = nn_params.shape[0] |
| 135 | Theta1 = nn_params[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1).copy() # 这里使用copy函数,否则下面修改Theta的值,nn_params也会一起修改 |
| 136 | Theta2 = nn_params[hidden_layer_size*(input_layer_size+1):length].reshape(num_labels,hidden_layer_size+1).copy() |
| 137 | m = X.shape[0] |
| 138 | class_y = np.zeros((m,num_labels)) # 数据的y对应0-9,需要映射为0/1的关系 |
| 139 | # 映射y |
| 140 | for i in range(num_labels): |
| 141 | class_y[:,i] = np.int32(y==i).reshape(1,-1) # 注意reshape(1,-1)才可以赋值 |
| 142 | |
| 143 | '''去掉theta1和theta2的第一列,因为正则化时从1开始''' |
| 144 | Theta1_colCount = Theta1.shape[1] |
| 145 | Theta1_x = Theta1[:,1:Theta1_colCount] |
| 146 | Theta2_colCount = Theta2.shape[1] |
| 147 | Theta2_x = Theta2[:,1:Theta2_colCount] |
| 148 | |
| 149 | Theta1_grad = np.zeros((Theta1.shape)) #第一层到第二层的权重 |
| 150 | Theta2_grad = np.zeros((Theta2.shape)) #第二层到第三层的权重 |
| 151 | |
| 152 | |
| 153 | '''正向传播,每次需要补上一列1的偏置bias''' |
| 154 | a1 = np.hstack((np.ones((m,1)),X)) |
| 155 | z2 = np.dot(a1,np.transpose(Theta1)) |
| 156 | a2 = sigmoid(z2) |
| 157 | a2 = np.hstack((np.ones((m,1)),a2)) |
| 158 | z3 = np.dot(a2,np.transpose(Theta2)) |
| 159 | h = sigmoid(z3) |
| 160 | |
| 161 | |
| 162 | '''反向传播,delta为误差,''' |
| 163 | delta3 = np.zeros((m,num_labels)) |
| 164 | delta2 = np.zeros((m,hidden_layer_size)) |
| 165 | for i in range(m): |
| 166 | #delta3[i,:] = (h[i,:]-class_y[i,:])*sigmoidGradient(z3[i,:]) # 均方误差的误差率 |
| 167 | delta3[i,:] = h[i,:]-class_y[i,:] # 交叉熵误差率 |
| 168 | Theta2_grad = Theta2_grad+np.dot(np.transpose(delta3[i,:].reshape(1,-1)),a2[i,:].reshape(1,-1)) |
| 169 | delta2[i,:] = np.dot(delta3[i,:].reshape(1,-1),Theta2_x)*sigmoidGradient(z2[i,:]) |
| 170 | Theta1_grad = Theta1_grad+np.dot(np.transpose(delta2[i,:].reshape(1,-1)),a1[i,:].reshape(1,-1)) |
| 171 | |
| 172 | Theta1[:,0] = 0 |
| 173 | Theta2[:,0] = 0 |
| 174 | '''梯度''' |
| 175 | grad = (np.vstack((Theta1_grad.reshape(-1,1),Theta2_grad.reshape(-1,1)))+Lambda*np.vstack((Theta1.reshape(-1,1),Theta2.reshape(-1,1))))/m |
| 176 | return np.ravel(grad) |
| 177 | |
| 178 | # S型函数 |
| 179 | def sigmoid(z): |
no test coverage detected