Биостатистика и язык R: Ранняя остановка при обучении бустингов с mlr3, или Шах и мат, sklearn

Долгие годы использование ранней остановки при обучении бустингов было больной темой. С нативными интерфейсами xgboost/lightgbm/catboost проблем, разумеется, нет. Но что делать, если нужно затюнить гиперпараметры более сложной модели с обучаемыми этапами предварительной обработки (такими как импутация пропусков или таргет энкодинг), используя при этом (кросс)валидацию? UPD содержимое поста устарело, по данной теме см. https://mlr3book.mlr-org.com/chapters/chapter15/predsets_valid_inttune.html

Удивительно, но в библиотеке scikit-learn не оказалось “коробочного” решения. Рассматриваемую проблему предлагается решать перебором возможных вариантов значений гиперпараметров в цикле, например так:

# Настраиваемые гиперпараметры
param_grid = {
    "max_depth": [14, 15]
    }
param_grid = list(ParameterGrid(param_grid))

# Фиксированные гиперпараметры
params = {
    "n_estimators": 100,
    "objective": "regression_l1",
    "learning_rate": 0.05,
    "colsample_bytree": 0.89,
    "colsample_bynode": 0.596,
    "lambda_l1": 3.4895,
    "lambda_l2": 1.489,
    "num_leaves": 490,
    "min_data_in_leaf": 48,
    "max_bin": 500,
    "device": "cpu",
    "verbose": 1,
    "metric": "l1"
    }

# Список для результатов
cv_results = []

# Датасеты
lgb_train = lgb.Dataset(X_train, y_train)
lgb_val = lgb.Dataset(X_val, y_val)

# Тюнинг
for hpar in param_grid:
    validation_summary = lgb.train(
        params = {**params, **hpar}, 
        train_set = lgb_train,
        valid_sets = [lgb_train, lgb_val],
        valid_names = ["train", "val"],
        callbacks = [
            lgb.early_stopping(stopping_rounds = 50), 
            lgb.log_evaluation(50)
            ]
    )

    cv_results.append((hpar, validation_summary))
    

# Другой вариант решения проблемы
cv_results = []

kf = KFold(n_splits = 5, shuffle = False)

for train_index, test_index in kf.split(X):
    
    X_train, X_val = X.iloc[train_index], X.iloc[test_index]
    y_train, y_val = y.iloc[train_index], y.iloc[test_index]

    # Список из -1 для трейна и 0 для валидации
    split_index = [-1 if x in train_index else 0 for x in X.index]
    pds = PredefinedSplit(test_fold = split_index)

    model_consumption = lgb.LGBMRegressor(**params)

    clf = GridSearchCV(
        estimator = model_consumption,
        cv = pds,
        param_grid = param_grid,
        scoring = "neg_mean_absolute_error",
        refit = False  
    )

    clf.fit(X, y, 
        eval_set=[(X_train, y_train), (X_val, y_val)], 
        callbacks = [
            lgb.early_stopping(stopping_rounds = 50), 
            lgb.log_evaluation(50)
            ]
    )
    cv_results.append(pd.DataFrame(clf.cv_results_))

На R для xgboost и фреймворка mlr3 раньше можно обойтись немного костыльным решением, состоящим в создании отдельного преобразованного набора проверочных данных (он может совпадать с валидационным, который задается далее в схеме валидации). Такое решение не годится при использовании кросс-валидации, т.к. для обучения процедур предварительной обработки придется использовать всю выборку, которая затем разбивается на фолды. Это чревато переобучением, поэтому так лучше не делать. Альтернативой было написание своего цикла для перебора гиперпараметров, подобного коду на python-е выше.

Действительно удобное решение появилось относительно недавно - см. пост Early Stopping with XGBoost. Теперь достаточно просто указать параметр early_stopping_set = "test" при инициализации модели. Но в этом примере используется колбэк clbk("mlr3tuning.early_stopping"), предназначенный только для xgboost и только для отдельной модели (не работает для GraphLearner-а). Чтобы добиться большей универсальности, необходимо написать свой колбэк:

library(mlr3verse)

Loading required package: mlr3

library(mlr3extralearners)
library(mlr3tuning)

Loading required package: paradox

library(mlr3misc)
library(data.table)


lgr::get_logger("mlr3")$set_threshold("error")
lgr::get_logger("bbotk")$set_threshold("error")

data <- tsk("german_credit")$data()
data <- data[, c("credit_risk", "amount", "age")]
task <- TaskClassif$new("boston", backend = data, target = "credit_risk")

callback_early_stopping <- callback_tuning(
  "callback_early_stopping",
  on_optimization_begin = function(callback, context) {
    learner = context$instance$objective$learner
    callback$state$store_models = context$instance$objective$store_models
    context$instance$objective$store_models = TRUE
  },
  
  on_eval_after_benchmark = function(callback, context) {
    callback$state$max_nrounds = map_dbl(
      context$benchmark_result$resample_results$resample_result, 
      function(rr) {
        max(map_dbl(get_private(rr)$.data$learner_states(get_private(rr)$.view), 
                    function(state) {state$model$boosting$model$best_iteration})
        )
      }
    )
  },
  
  on_eval_before_archive = function(callback, context) {
    set(context$aggregated_performance, 
        j = "max_nrounds", 
        value = callback$state$max_nrounds)
    if (!callback$state$store_models) context$benchmark_result$discard(models = TRUE)
  },
  
  on_result = function(callback, context) {
    context$result$learner_param_vals[[1]]$early_stopping_rounds = NULL
    context$result$learner_param_vals[[1]]$nrounds = 
      context$instance$archive$best()$max_nrounds
    context$result$learner_param_vals[[1]]$early_stopping_set = "none"
    context$instance$objective$store_models = callback$state$store_models
  }
)

learner <- lrn(
  "classif.xgboost",
  id = "boosting",
  nthread = 8,
  nrounds = 1000,
  early_stopping_rounds = 3, 
  early_stopping_set = "test",
  max_depth = to_tune(3, 4)
)

gr <- 
  po("fixfactors") %>>% 
  po("encodeimpact", impute_zero = TRUE) %>>% 
  po(learner)

gl <- as_learner(gr)

instance <- tune(
  tuner = tnr("grid_search"),
  task = task,
  learner = gl,
  resampling = rsmp("cv", folds = 5),
  measure = msr("classif.ce"),
  term_evals = 100,
  callbacks = callback_early_stopping
)
instance$archive |> as.data.table()

   boosting.max_depth classif.ce x_domain_boosting.max_depth runtime_learners
                <int>      <num>                       <int>            <num>
1:                  4      0.302                           4             0.58
2:                  3      0.299                           3             0.53
             timestamp batch_nr warnings errors max_nrounds  resample_result
                <POSc>    <int>    <int>  <int>       <num>           <list>
1: 2024-02-25 11:13:20        1        0      0          14 <ResampleResult>
2: 2024-02-25 11:13:21        2        0      0          10 <ResampleResult>

boosting в state$model$boosting$model$best_iteration - это id модели, заданный при ее инициализации: lrn("classif.xgboost", id = "boosting", ...). Удобно всегда указывать один и тот же id, чтобы не править код функции или не думать над ее параметризацией. Этот код легко адаптируется для lightgbm. Достаточно поменять state$model$boosting$model$best_iteration на state$model$boosting$model$best_iter, а также context$result$learner_param_vals[[1]]$nrounds на context$result$learner_param_vals[[1]]$num_iterations:

data <- tsk("german_credit")$data()
data <- data[, c("credit_risk", "amount", "purpose", "age")]
task <- TaskClassif$new("boston", backend = data, target = "credit_risk")

callback_early_stopping <- callback_tuning(
  "callback_early_stopping",
  on_optimization_begin = function(callback, context) {
    learner = context$instance$objective$learner
    callback$state$store_models = context$instance$objective$store_models
    context$instance$objective$store_models = TRUE
  },
  
  on_eval_after_benchmark = function(callback, context) {
    callback$state$max_nrounds = map_dbl(
      context$benchmark_result$resample_results$resample_result, 
      function(rr) {
        max(map_dbl(get_private(rr)$.data$learner_states(get_private(rr)$.view), 
                    function(state) {state$model$boosting$model$best_iter})
        )
      }
    )
  },
  
  on_eval_before_archive = function(callback, context) {
    set(context$aggregated_performance, 
        j = "max_nrounds", 
        value = callback$state$max_nrounds)
    if (!callback$state$store_models) context$benchmark_result$discard(models = TRUE)
  },
  
  on_result = function(callback, context) {
    context$result$learner_param_vals[[1]]$early_stopping_rounds = NULL
    context$result$learner_param_vals[[1]]$num_iterations = 
      context$instance$archive$best()$max_nrounds
    context$result$learner_param_vals[[1]]$early_stopping_set = "none"
    context$instance$objective$store_models = callback$state$store_models
  }
)

learner <- lrn(
  "classif.lightgbm",
  id = "boosting",
  num_iterations = 1000,
  early_stopping = TRUE,
  early_stopping_rounds = 3,
  max_depth = to_tune(3, 4), 
  eval_freq = 1, 
  verbose = 1
)


# Сам learner нельзя использовать из-за глюка с количеством фич на тесте
gr <- 
  po("fixfactors") %>>% 
  po("encodeimpact", impute_zero = TRUE) %>>% 
  po(learner)
gl <- as_learner(gr)

instance <-  tune(
  tuner = tnr("grid_search"),
  task = task,
  learner = gl,
  resampling = rsmp("cv", folds = 5),
  measure = msr("classif.ce"),
  term_evals = 10,
  callbacks = callback_early_stopping
)

[LightGBM] [Info] Number of positive: 558, number of negative: 242
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 328
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.697500 -> initscore=0.835421
[LightGBM] [Info] Start training from score 0.835421
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.598909 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.59604 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.59223 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.589521 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.586935 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.586282 
[7]:  test's binary_logloss:0.584635 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.583373 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]:  test's binary_logloss:0.582228 
[10]:  test's binary_logloss:0.580164 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]:  test's binary_logloss:0.580446 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]:  test's binary_logloss:0.580774 
[13]:  test's binary_logloss:0.581857 
Early stopping, best iteration is: [10]:  test's binary_logloss:0.580164
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 326
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[1]:  test's binary_logloss:0.60672 
Will train until there is no improvement in 3 rounds.
[2]:  test's binary_logloss:0.603212 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.601098 
[4]:  test's binary_logloss:0.598526 
[5]:  test's binary_logloss:0.597445 
[6]:  test's binary_logloss:0.596942 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.596631 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.595948 
[9]:  test's binary_logloss:0.59484 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]:  test's binary_logloss:0.594303 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]:  test's binary_logloss:0.594234 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]:  test's binary_logloss:0.593922 
[13]:  test's binary_logloss:0.593382 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]:  test's binary_logloss:0.593212 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]:  test's binary_logloss:0.592573 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[16]:  test's binary_logloss:0.592517 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[17]:  test's binary_logloss:0.591712 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[18]:  test's binary_logloss:0.591235 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[19]:  test's binary_logloss:0.591183 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[20]:  test's binary_logloss:0.591613 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[21]:  test's binary_logloss:0.591766 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[22]:  test's binary_logloss:0.590327 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[23]:  test's binary_logloss:0.590783 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[24]:  test's binary_logloss:0.590239 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[25]:  test's binary_logloss:0.589348 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[26]:  test's binary_logloss:0.590099 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[27]:  test's binary_logloss:0.590969 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[28]:  test's binary_logloss:0.590471 
Early stopping, best iteration is: [25]:  test's binary_logloss:0.589348
[LightGBM] [Info] Number of positive: 555, number of negative: 245
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000045 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.693750 -> initscore=0.817710
[LightGBM] [Info] Start training from score 0.817710
[1]:  test's binary_logloss:0.58887 
Will train until there is no improvement in 3 rounds.
[2]:  test's binary_logloss:0.588492 
[3]:  test's binary_logloss:0.588975 
[4]:  test's binary_logloss:0.590194 
[5]:  test's binary_logloss:0.591667 
Early stopping, best iteration is: [2]:  test's binary_logloss:0.588492
[LightGBM] [Info] Number of positive: 567, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000044 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 327
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.708750 -> initscore=0.889321
[LightGBM] [Info] Start training from score 0.889321
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.638232 
Will train until there is no improvement in 3 rounds.
[2]:  test's binary_logloss:0.634382 
[3]:  test's binary_logloss:0.633384 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.630724 
[5]:  test's binary_logloss:0.627601 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.628098 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.626592 
[8]:  test's binary_logloss:0.625043 
[9]:  test's binary_logloss:0.62328 
[10]:  test's binary_logloss:0.623558 
[11]:  test's binary_logloss:0.620961 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]:  test's binary_logloss:0.620536 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]:  test's binary_logloss:0.619728 
[14]:  test's binary_logloss:0.61929 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]:  test's binary_logloss:0.619325 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[16]:  test's binary_logloss:0.618309 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[17]:  test's binary_logloss:0.618669 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[18]:  test's binary_logloss:0.617463 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[19]:  test's binary_logloss:0.616883 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[20]:  test's binary_logloss:0.617277 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[21]:  test's binary_logloss:0.617728 
[22]:  test's binary_logloss:0.617161 
Early stopping, best iteration is: [19]:  test's binary_logloss:0.616883
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000070 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[1]:  test's binary_logloss:0.609981 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.608462 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.607229 
[4]:  test's binary_logloss:0.607689 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.606191 
[6]:  test's binary_logloss:0.607022 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.607459 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.60763 
Early stopping, best iteration is: [5]:  test's binary_logloss:0.606191
[LightGBM] [Info] Number of positive: 558, number of negative: 242
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000045 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 328
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.697500 -> initscore=0.835421
[LightGBM] [Info] Start training from score 0.835421
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.599161 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.596906 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.594817 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.593478 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.591668 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.589909 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.588413 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.588181 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]:  test's binary_logloss:0.585997 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]:  test's binary_logloss:0.584412 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]:  test's binary_logloss:0.582727 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]:  test's binary_logloss:0.582996 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]:  test's binary_logloss:0.583912 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]:  test's binary_logloss:0.583633 
Early stopping, best iteration is: [11]:  test's binary_logloss:0.582727
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 326
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.604933 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.598988 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.594877 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.594388 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.592711 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.59216 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.592661 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.591881 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]:  test's binary_logloss:0.590483 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]:  test's binary_logloss:0.591141 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]:  test's binary_logloss:0.590416 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]:  test's binary_logloss:0.589944 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]:  test's binary_logloss:0.589605 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]:  test's binary_logloss:0.589017 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]:  test's binary_logloss:0.588321 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[16]:  test's binary_logloss:0.588128 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[17]:  test's binary_logloss:0.588951 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[18]:  test's binary_logloss:0.588879 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[19]:  test's binary_logloss:0.588596 
Early stopping, best iteration is: [16]:  test's binary_logloss:0.588128
[LightGBM] [Info] Number of positive: 555, number of negative: 245
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000043 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.693750 -> initscore=0.817710
[LightGBM] [Info] Start training from score 0.817710
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.586341 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.583811 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.582304 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.582482 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.58225 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.580098 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.580683 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.578144 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]:  test's binary_logloss:0.578972 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]:  test's binary_logloss:0.58013 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]:  test's binary_logloss:0.581357 
Early stopping, best iteration is: [8]:  test's binary_logloss:0.578144
[LightGBM] [Info] Number of positive: 567, number of negative: 233
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 327
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.708750 -> initscore=0.889321
[LightGBM] [Info] Start training from score 0.889321
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.637881 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.63333 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.631162 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.629422 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.626704 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.625901 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.624439 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.62314 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]:  test's binary_logloss:0.621935 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[10]:  test's binary_logloss:0.622286 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[11]:  test's binary_logloss:0.621409 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[12]:  test's binary_logloss:0.619832 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[13]:  test's binary_logloss:0.620485 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[14]:  test's binary_logloss:0.620966 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[15]:  test's binary_logloss:0.621902 
Early stopping, best iteration is: [12]:  test's binary_logloss:0.619832
[LightGBM] [Info] Number of positive: 560, number of negative: 240
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000048 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 325
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 4
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.700000 -> initscore=0.847298
[LightGBM] [Info] Start training from score 0.847298
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[1]:  test's binary_logloss:0.611072 
Will train until there is no improvement in 3 rounds.
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2]:  test's binary_logloss:0.608581 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[3]:  test's binary_logloss:0.606915 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[4]:  test's binary_logloss:0.606352 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[5]:  test's binary_logloss:0.60558 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[6]:  test's binary_logloss:0.605502 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[7]:  test's binary_logloss:0.606145 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[8]:  test's binary_logloss:0.605522 
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[9]:  test's binary_logloss:0.606255 
Early stopping, best iteration is: [6]:  test's binary_logloss:0.605502

instance$archive |> as.data.table()

   boosting.max_depth classif.ce x_domain_boosting.max_depth runtime_learners
                <int>      <num>                       <int>            <num>
1:                  3      0.299                           3             1.13
2:                  4      0.297                           4             1.10
             timestamp batch_nr warnings errors max_nrounds  resample_result
                <POSc>    <int>    <int>  <int>       <num>           <list>
1: 2024-02-25 11:13:23        1        0      0          25 <ResampleResult>
2: 2024-02-25 11:13:24        2        0      0          16 <ResampleResult>

Биостатистика и язык R

среда, 10 января 2024 г.

Ранняя остановка при обучении бустингов с mlr3, или Шах и мат, sklearn

Комментариев нет:

Отправить комментарий

среда, 10 января 2024 г.

Ранняя остановка при обучении бустингов с mlr3, или Шах и мат, sklearn

Комментариев нет:

Отправить комментарий

среда, 10 января 2024 г.