(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda)
| 94 | |
| 95 | # 代价函数 |
| 96 | def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda): |
| 97 | length = nn_params.shape[0] # theta的中长度 |
| 98 | # 还原theta1和theta2 |
| 99 | Theta1 = nn_params[0:hidden_layer_size*(input_layer_size+1)].reshape(hidden_layer_size,input_layer_size+1) |
| 100 | Theta2 = nn_params[hidden_layer_size*(input_layer_size+1):length].reshape(num_labels,hidden_layer_size+1) |
| 101 | |
| 102 | # np.savetxt("Theta1.csv",Theta1,delimiter=',') |
| 103 | |
| 104 | m = X.shape[0] |
| 105 | class_y = np.zeros((m,num_labels)) # 数据的y对应0-9,需要映射为0/1的关系 |
| 106 | # 映射y |
| 107 | for i in range(num_labels): |
| 108 | class_y[:,i] = np.int32(y==i).reshape(1,-1) # 注意reshape(1,-1)才可以赋值 |
| 109 | |
| 110 | '''去掉theta1和theta2的第一列,因为正则化时从1开始''' |
| 111 | Theta1_colCount = Theta1.shape[1] |
| 112 | Theta1_x = Theta1[:,1:Theta1_colCount] |
| 113 | Theta2_colCount = Theta2.shape[1] |
| 114 | Theta2_x = Theta2[:,1:Theta2_colCount] |
| 115 | # 正则化向theta^2 |
| 116 | term = np.dot(np.transpose(np.vstack((Theta1_x.reshape(-1,1),Theta2_x.reshape(-1,1)))),np.vstack((Theta1_x.reshape(-1,1),Theta2_x.reshape(-1,1)))) |
| 117 | |
| 118 | '''正向传播,每次需要补上一列1的偏置bias''' |
| 119 | a1 = np.hstack((np.ones((m,1)),X)) |
| 120 | z2 = np.dot(a1,np.transpose(Theta1)) |
| 121 | a2 = sigmoid(z2) |
| 122 | a2 = np.hstack((np.ones((m,1)),a2)) |
| 123 | z3 = np.dot(a2,np.transpose(Theta2)) |
| 124 | h = sigmoid(z3) |
| 125 | '''代价''' |
| 126 | J = -(np.dot(np.transpose(class_y.reshape(-1,1)),np.log(h.reshape(-1,1)))+np.dot(np.transpose(1-class_y.reshape(-1,1)),np.log(1-h.reshape(-1,1)))-Lambda*term/2)/m |
| 127 | #temp1 = (h.reshape(-1,1)-class_y.reshape(-1,1)) |
| 128 | #temp2 = (temp1**2).sum() |
| 129 | #J = 1/(2*m)*temp2 |
| 130 | return np.ravel(J) |
| 131 | |
| 132 | # 梯度 |
| 133 | def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambda): |
no test coverage detected