Долгие годы использование ранней остановки при обучении бустингов было больной темой. С нативными интерфейсами xgboost/lightgbm/catboost проблем, разумеется, нет. Но что делать, если нужно затюнить гиперпараметры более сложной модели с обучаемыми этапами предварительной обработки (такими как импутация пропусков или таргет энкодинг), используя при этом (кросс)валидацию? UPD содержимое поста устарело, по данной теме см. https://mlr3book.mlr-org.com/chapters/chapter15/predsets_valid_inttune.html
Удивительно, но в библиотеке scikit-learn не оказалось “коробочного” решения. Рассматриваемую проблему предлагается решать перебором возможных вариантов значений гиперпараметров в цикле, например так:
# Настраиваемые гиперпараметры
= {
param_grid "max_depth": [14, 15]
}= list(ParameterGrid(param_grid))
param_grid
# Фиксированные гиперпараметры
= {
params "n_estimators": 100,
"objective": "regression_l1",
"learning_rate": 0.05,
"colsample_bytree": 0.89,
"colsample_bynode": 0.596,
"lambda_l1": 3.4895,
"lambda_l2": 1.489,
"num_leaves": 490,
"min_data_in_leaf": 48,
"max_bin": 500,
"device": "cpu",
"verbose": 1,
"metric": "l1"
}
# Список для результатов
= []
cv_results
# Датасеты
= lgb.Dataset(X_train, y_train)
lgb_train = lgb.Dataset(X_val, y_val)
lgb_val
# Тюнинг
for hpar in param_grid:
= lgb.train(
validation_summary = {**params, **hpar},
params = lgb_train,
train_set = [lgb_train, lgb_val],
valid_sets = ["train", "val"],
valid_names = [
callbacks = 50),
lgb.early_stopping(stopping_rounds 50)
lgb.log_evaluation(
]
)
cv_results.append((hpar, validation_summary))
# Другой вариант решения проблемы
= []
cv_results
= KFold(n_splits = 5, shuffle = False)
kf
for train_index, test_index in kf.split(X):
= X.iloc[train_index], X.iloc[test_index]
X_train, X_val = y.iloc[train_index], y.iloc[test_index]
y_train, y_val
# Список из -1 для трейна и 0 для валидации
= [-1 if x in train_index else 0 for x in X.index]
split_index = PredefinedSplit(test_fold = split_index)
pds
= lgb.LGBMRegressor(**params)
model_consumption
= GridSearchCV(
clf = model_consumption,
estimator = pds,
cv = param_grid,
param_grid = "neg_mean_absolute_error",
scoring = False
refit
)
clf.fit(X, y, =[(X_train, y_train), (X_val, y_val)],
eval_set= [
callbacks = 50),
lgb.early_stopping(stopping_rounds 50)
lgb.log_evaluation(
]
) cv_results.append(pd.DataFrame(clf.cv_results_))
На R для xgboost и фреймворка mlr3 раньше можно обойтись немного костыльным решением, состоящим в создании отдельного преобразованного набора проверочных данных (он может совпадать с валидационным, который задается далее в схеме валидации). Такое решение не годится при использовании кросс-валидации, т.к. для обучения процедур предварительной обработки придется использовать всю выборку, которая затем разбивается на фолды. Это чревато переобучением, поэтому так лучше не делать. Альтернативой было написание своего цикла для перебора гиперпараметров, подобного коду на python-е выше.
Действительно удобное решение появилось относительно недавно - см. пост Early Stopping with XGBoost. Теперь достаточно просто указать параметр early_stopping_set = "test"
при инициализации модели. Но в этом примере используется колбэк clbk("mlr3tuning.early_stopping")
, предназначенный только для xgboost и только для отдельной модели (не работает для GraphLearner-а). Чтобы добиться большей универсальности, необходимо написать свой колбэк:
library(mlr3verse)
Loading required package: mlr3
library(mlr3extralearners)
library(mlr3tuning)
Loading required package: paradox
library(mlr3misc)
library(data.table)
::get_logger("mlr3")$set_threshold("error")
lgr::get_logger("bbotk")$set_threshold("error")
lgr
<- tsk("german_credit")$data()
data <- data[, c("credit_risk", "amount", "age")]
data <- TaskClassif$new("boston", backend = data, target = "credit_risk")
task
<- callback_tuning(
callback_early_stopping "callback_early_stopping",
on_optimization_begin = function(callback, context) {
= context$instance$objective$learner
learner $state$store_models = context$instance$objective$store_models
callback$instance$objective$store_models = TRUE
context
},
on_eval_after_benchmark = function(callback, context) {
$state$max_nrounds = map_dbl(
callback$benchmark_result$resample_results$resample_result,
contextfunction(rr) {
max(map_dbl(get_private(rr)$.data$learner_states(get_private(rr)$.view),
function(state) {state$model$boosting$model$best_iteration})
)
}
)
},
on_eval_before_archive = function(callback, context) {
set(context$aggregated_performance,
j = "max_nrounds",
value = callback$state$max_nrounds)
if (!callback$state$store_models) context$benchmark_result$discard(models = TRUE)
},
on_result = function(callback, context) {
$result$learner_param_vals[[1]]$early_stopping_rounds = NULL
context$result$learner_param_vals[[1]]$nrounds =
context$instance$archive$best()$max_nrounds
context$result$learner_param_vals[[1]]$early_stopping_set = "none"
context$instance$objective$store_models = callback$state$store_models
context
}
)
<- lrn(
learner "classif.xgboost",
id = "boosting",
nthread = 8,
nrounds = 1000,
early_stopping_rounds = 3,
early_stopping_set = "test",
max_depth = to_tune(3, 4)
)
<-
gr po("fixfactors") %>>%
po("encodeimpact", impute_zero = TRUE) %>>%
po(learner)
<- as_learner(gr)
gl
<- tune(
instance tuner = tnr("grid_search"),
task = task,
learner = gl,
resampling = rsmp("cv", folds = 5),
measure = msr("classif.ce"),
term_evals = 100,
callbacks = callback_early_stopping
)$archive |> as.data.table() instance
boosting.max_depth classif.ce x_domain_boosting.max_depth runtime_learners
<int> <num> <int> <num>
1: 4 0.302 4 0.58
2: 3 0.299 3 0.53
timestamp batch_nr warnings errors max_nrounds resample_result
<POSc> <int> <int> <int> <num> <list>
1: 2024-02-25 11:13:20 1 0 0 14 <ResampleResult>
2: 2024-02-25 11:13:21 2 0 0 10 <ResampleResult>
boosting
в state$model$boosting$model$best_iteration
- это id модели, заданный при ее инициализации: lrn("classif.xgboost", id = "boosting", ...)
. Удобно всегда указывать один и тот же id, чтобы не править код функции или не думать над ее параметризацией. Этот код легко адаптируется для lightgbm. Достаточно поменять state$model$boosting$model$best_iteration
на state$model$boosting$model$best_iter
, а также context$result$learner_param_vals[[1]]$nrounds
на context$result$learner_param_vals[[1]]$num_iterations
:
<- tsk("german_credit")$data()
data <- data[, c("credit_risk", "amount", "purpose", "age")]
data <- TaskClassif$new("boston", backend = data, target = "credit_risk")
task
<- callback_tuning(
callback_early_stopping "callback_early_stopping",
on_optimization_begin = function(callback, context) {
= context$instance$objective$learner
learner $state$store_models = context$instance$objective$store_models
callback$instance$objective$store_models = TRUE
context
},
on_eval_after_benchmark = function(callback, context) {
$state$max_nrounds = map_dbl(
callback$benchmark_result$resample_results$resample_result,
contextfunction(rr) {
max(map_dbl(get_private(rr)$.data$learner_states(get_private(rr)$.view),
function(state) {state$model$boosting$model$best_iter})
)
}
)
},
on_eval_before_archive = function(callback, context) {
set(context$aggregated_performance,
j = "max_nrounds",
value = callback$state$max_nrounds)
if (!callback$state$store_models) context$benchmark_result$discard(models = TRUE)
},
on_result = function(callback, context) {
$result$learner_param_vals[[1]]$early_stopping_rounds = NULL
context$result$learner_param_vals[[1]]$num_iterations =
context$instance$archive$best()$max_nrounds
context$result$learner_param_vals[[1]]$early_stopping_set = "none"
context$instance$objective$store_models = callback$state$store_models
context
}
)
<- lrn(
learner "classif.lightgbm",
id = "boosting",
num_iterations = 1000,
early_stopping = TRUE,
early_stopping_rounds = 3,
max_depth = to_tune(3, 4),
eval_freq = 1,
verbose = 1
)
# Сам learner нельзя использовать из-за глюка с количеством фич на тесте
<-
gr po("fixfactors") %>>%
po("encodeimpact", impute_zero = TRUE) %>>%
po(learner)
<- as_learner(gr)
gl
<- tune(
instance tuner = tnr("grid_search"),
task = task,
learner = gl,
resampling = rsmp("cv", folds = 5),
measure = msr("classif.ce"),
term_evals = 10,
callbacks = callback_early_stopping
)
[LightGBM] [Info] Number of positive: 558, number of negative: 242
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 328
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.697500 -> initscore=0.835421
[LightGBM] [Info] Start training from score 0.835421
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.598909
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.59604
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.59223
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.589521
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.586935
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.586282
[7]: test's binary_logloss:0.584635
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.583373
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]: test's binary_logloss:0.582228
[10]: test's binary_logloss:0.580164
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]: test's binary_logloss:0.580446
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]: test's binary_logloss:0.580774
[13]: test's binary_logloss:0.581857
Early stopping, best iteration is: [10]: test's binary_logloss:0.580164
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 326
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[1]: test's binary_logloss:0.60672
Will train until there is no improvement in 3 rounds.
[2]: test's binary_logloss:0.603212
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.601098
[4]: test's binary_logloss:0.598526
[5]: test's binary_logloss:0.597445
[6]: test's binary_logloss:0.596942
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.596631
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.595948
[9]: test's binary_logloss:0.59484
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]: test's binary_logloss:0.594303
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]: test's binary_logloss:0.594234
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]: test's binary_logloss:0.593922
[13]: test's binary_logloss:0.593382
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]: test's binary_logloss:0.593212
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]: test's binary_logloss:0.592573
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[16]: test's binary_logloss:0.592517
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[17]: test's binary_logloss:0.591712
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[18]: test's binary_logloss:0.591235
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[19]: test's binary_logloss:0.591183
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[20]: test's binary_logloss:0.591613
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[21]: test's binary_logloss:0.591766
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[22]: test's binary_logloss:0.590327
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[23]: test's binary_logloss:0.590783
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[24]: test's binary_logloss:0.590239
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[25]: test's binary_logloss:0.589348
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[26]: test's binary_logloss:0.590099
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[27]: test's binary_logloss:0.590969
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[28]: test's binary_logloss:0.590471
Early stopping, best iteration is: [25]: test's binary_logloss:0.589348
[LightGBM] [Info] Number of positive: 555, number of negative: 245
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000045 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.693750 -> initscore=0.817710
[LightGBM] [Info] Start training from score 0.817710
[1]: test's binary_logloss:0.58887
Will train until there is no improvement in 3 rounds.
[2]: test's binary_logloss:0.588492
[3]: test's binary_logloss:0.588975
[4]: test's binary_logloss:0.590194
[5]: test's binary_logloss:0.591667
Early stopping, best iteration is: [2]: test's binary_logloss:0.588492
[LightGBM] [Info] Number of positive: 567, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000044 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 327
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.708750 -> initscore=0.889321
[LightGBM] [Info] Start training from score 0.889321
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.638232
Will train until there is no improvement in 3 rounds.
[2]: test's binary_logloss:0.634382
[3]: test's binary_logloss:0.633384
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.630724
[5]: test's binary_logloss:0.627601
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.628098
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.626592
[8]: test's binary_logloss:0.625043
[9]: test's binary_logloss:0.62328
[10]: test's binary_logloss:0.623558
[11]: test's binary_logloss:0.620961
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]: test's binary_logloss:0.620536
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]: test's binary_logloss:0.619728
[14]: test's binary_logloss:0.61929
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]: test's binary_logloss:0.619325
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[16]: test's binary_logloss:0.618309
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[17]: test's binary_logloss:0.618669
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[18]: test's binary_logloss:0.617463
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[19]: test's binary_logloss:0.616883
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[20]: test's binary_logloss:0.617277
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[21]: test's binary_logloss:0.617728
[22]: test's binary_logloss:0.617161
Early stopping, best iteration is: [19]: test's binary_logloss:0.616883
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000070 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[1]: test's binary_logloss:0.609981
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.608462
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.607229
[4]: test's binary_logloss:0.607689
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.606191
[6]: test's binary_logloss:0.607022
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.607459
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.60763
Early stopping, best iteration is: [5]: test's binary_logloss:0.606191
[LightGBM] [Info] Number of positive: 558, number of negative: 242
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000045 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 328
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.697500 -> initscore=0.835421
[LightGBM] [Info] Start training from score 0.835421
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.599161
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.596906
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.594817
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.593478
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.591668
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.589909
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.588413
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.588181
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]: test's binary_logloss:0.585997
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]: test's binary_logloss:0.584412
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]: test's binary_logloss:0.582727
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]: test's binary_logloss:0.582996
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]: test's binary_logloss:0.583912
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]: test's binary_logloss:0.583633
Early stopping, best iteration is: [11]: test's binary_logloss:0.582727
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 326
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.604933
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.598988
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.594877
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.594388
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.592711
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.59216
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.592661
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.591881
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]: test's binary_logloss:0.590483
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]: test's binary_logloss:0.591141
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]: test's binary_logloss:0.590416
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]: test's binary_logloss:0.589944
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]: test's binary_logloss:0.589605
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]: test's binary_logloss:0.589017
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]: test's binary_logloss:0.588321
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[16]: test's binary_logloss:0.588128
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[17]: test's binary_logloss:0.588951
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[18]: test's binary_logloss:0.588879
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[19]: test's binary_logloss:0.588596
Early stopping, best iteration is: [16]: test's binary_logloss:0.588128
[LightGBM] [Info] Number of positive: 555, number of negative: 245
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.693750 -> initscore=0.817710
[LightGBM] [Info] Start training from score 0.817710
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.586341
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.583811
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.582304
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.582482
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.58225
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.580098
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.580683
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.578144
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]: test's binary_logloss:0.578972
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]: test's binary_logloss:0.58013
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]: test's binary_logloss:0.581357
Early stopping, best iteration is: [8]: test's binary_logloss:0.578144
[LightGBM] [Info] Number of positive: 567, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 327
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.708750 -> initscore=0.889321
[LightGBM] [Info] Start training from score 0.889321
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.637881
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.63333
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.631162
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.629422
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.626704
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.625901
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.624439
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.62314
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]: test's binary_logloss:0.621935
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]: test's binary_logloss:0.622286
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]: test's binary_logloss:0.621409
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]: test's binary_logloss:0.619832
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]: test's binary_logloss:0.620485
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]: test's binary_logloss:0.620966
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]: test's binary_logloss:0.621902
Early stopping, best iteration is: [12]: test's binary_logloss:0.619832
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]: test's binary_logloss:0.611072
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]: test's binary_logloss:0.608581
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]: test's binary_logloss:0.606915
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]: test's binary_logloss:0.606352
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]: test's binary_logloss:0.60558
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]: test's binary_logloss:0.605502
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]: test's binary_logloss:0.606145
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]: test's binary_logloss:0.605522
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]: test's binary_logloss:0.606255
Early stopping, best iteration is: [6]: test's binary_logloss:0.605502
$archive |> as.data.table() instance
boosting.max_depth classif.ce x_domain_boosting.max_depth runtime_learners
<int> <num> <int> <num>
1: 3 0.299 3 1.13
2: 4 0.297 4 1.10
timestamp batch_nr warnings errors max_nrounds resample_result
<POSc> <int> <int> <int> <num> <list>
1: 2024-02-25 11:13:23 1 0 0 25 <ResampleResult>
2: 2024-02-25 11:13:24 2 0 0 16 <ResampleResult>
Комментариев нет:
Отправить комментарий