建立决策树 featureIndex,类型是元组,它记录了X中的特征在原始数据中对应的下标。
(self,X,y,featureIndex)
| 145 | |
| 146 | |
| 147 | def _createTree(self,X,y,featureIndex): |
| 148 | """建立决策树 |
| 149 | featureIndex,类型是元组,它记录了X中的特征在原始数据中对应的下标。 |
| 150 | """ |
| 151 | labelList = list(y) |
| 152 | #所有label都相同的话,则停止分割,返回该label |
| 153 | if labelList.count(labelList[0]) == len(labelList): |
| 154 | return labelList[0] |
| 155 | #没有特征可分割时,停止分割,返回出现次数最多的label |
| 156 | if len(featureIndex) == 0: |
| 157 | return self._majorityCnt(labelList) |
| 158 | |
| 159 | #可以继续分割的话,确定最佳分割特征 |
| 160 | if self._mode == 'C4.5': |
| 161 | bestFeatIndex = self._chooseBestFeatureToSplit_C45(X,y) |
| 162 | elif self._mode == 'ID3': |
| 163 | bestFeatIndex = self._chooseBestFeatureToSplit_ID3(X,y) |
| 164 | |
| 165 | bestFeatStr = featureIndex[bestFeatIndex] |
| 166 | featureIndex = list(featureIndex) |
| 167 | featureIndex.remove(bestFeatStr) |
| 168 | featureIndex = tuple(featureIndex) |
| 169 | #用字典存储决策树。最佳分割特征作为key,而对应的键值仍然是一棵树(仍然用字典存储) |
| 170 | myTree = {bestFeatStr:{}} |
| 171 | featValues = X[:,bestFeatIndex] |
| 172 | uniqueVals = set(featValues) |
| 173 | for value in uniqueVals: |
| 174 | #对每个value递归地创建树 |
| 175 | sub_X,sub_y = self._splitDataSet(X,y, bestFeatIndex, value) |
| 176 | myTree[bestFeatStr][value] = self._createTree(sub_X,sub_y,featureIndex) |
| 177 | return myTree |
| 178 | |
| 179 | def fit(self,X,y): |
| 180 | #类型检查 |
no test coverage detected