| 253 | |
| 254 | |
| 255 | def test_gbdt(N=1): |
| 256 | np.random.seed(12345) |
| 257 | i = 1 |
| 258 | while i <= N: |
| 259 | n_ex = np.random.randint(2, 100) |
| 260 | n_feats = np.random.randint(2, 100) |
| 261 | n_trees = np.random.randint(2, 100) |
| 262 | max_depth = np.random.randint(1, 5) |
| 263 | |
| 264 | classifier = np.random.choice([True, False]) |
| 265 | if classifier: |
| 266 | # create classification problem |
| 267 | n_classes = np.random.randint(2, 10) |
| 268 | X, Y = make_blobs( |
| 269 | n_samples=n_ex, centers=n_classes, n_features=n_feats, random_state=i |
| 270 | ) |
| 271 | X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.3, random_state=i) |
| 272 | |
| 273 | # initialize model |
| 274 | def loss(yp, y): |
| 275 | return 1 - accuracy_score(yp, y) |
| 276 | |
| 277 | # initialize model |
| 278 | criterion = np.random.choice(["entropy", "gini"]) |
| 279 | mine = GradientBoostedDecisionTree( |
| 280 | n_iter=n_trees, |
| 281 | classifier=classifier, |
| 282 | max_depth=max_depth, |
| 283 | learning_rate=0.1, |
| 284 | loss="crossentropy", |
| 285 | step_size="constant", |
| 286 | ) |
| 287 | gold = RandomForestClassifier( |
| 288 | n_estimators=n_trees, |
| 289 | max_features=n_feats, |
| 290 | criterion=criterion, |
| 291 | max_depth=max_depth, |
| 292 | bootstrap=True, |
| 293 | ) |
| 294 | else: |
| 295 | # create regeression problem |
| 296 | X, Y = make_regression(n_samples=n_ex, n_features=n_feats, random_state=i) |
| 297 | X, X_test, Y, Y_test = train_test_split(X, Y, test_size=0.3, random_state=i) |
| 298 | |
| 299 | # initialize model |
| 300 | criterion = "mse" |
| 301 | loss = mean_squared_error |
| 302 | mine = GradientBoostedDecisionTree( |
| 303 | n_iter=n_trees, |
| 304 | max_depth=max_depth, |
| 305 | classifier=classifier, |
| 306 | learning_rate=0.1, |
| 307 | loss="mse", |
| 308 | step_size="constant", |
| 309 | ) |
| 310 | gold = RandomForestRegressor( |
| 311 | n_estimators=n_trees, |
| 312 | max_features=n_feats, |