mlr-org · awinterstetter · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025 · Oct 20, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -133,6 +133,7 @@ Collate:
     'PipeOpChunk.R'
     'PipeOpClassBalancing.R'
     'PipeOpClassWeights.R'
+    'PipeOpClassWeightsEx.R'
     'PipeOpClassifAvg.R'
     'PipeOpColApply.R'
     'PipeOpColRoles.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -105,6 +105,7 @@ export(PipeOpBranch)
 export(PipeOpChunk)
 export(PipeOpClassBalancing)
 export(PipeOpClassWeights)
+export(PipeOpClassWeightsEx)
 export(PipeOpClassifAvg)
 export(PipeOpColApply)
 export(PipeOpColRoles)

diff --git a/R/PipeOpClassWeights.R b/R/PipeOpClassWeights.R
@@ -5,25 +5,26 @@
 #' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`].
 #'
 #' @description
-#' Adds a class weight column to the [`Task`][mlr3::Task] that different [`Learner`][mlr3::Learner]s may be
-#' able to use for sample weighting. Sample weights are added to each sample according to the target class.
+#' Adds a class weight column to the [`Task`][mlr3::Task], influencing how different [`Learner`][mlr3::Learner]s weight samples during training.
+#' It is also possible to add a weight column to the [`Task`][mlr3::Task], which affects how samples are weighted during evaluation.
+#' Sample weights are assigned to each observation according to its target class.
 #'
 #' Only binary [classification tasks][mlr3::TaskClassif] are supported.
 #'
 #' Caution: when constructed naively without parameter, the weights are all set to 1. The `minor_weight` parameter
 #' must be adjusted for this [`PipeOp`] to be useful.
 #'
-#' Note this only sets the `"weights_learner"` column.
-#' It therefore influences the behaviour of subsequent [`Learner`][mlr3::Learner]s, but does not influence resampling or evaluation metric weights.
+#' It is possible to set either one of the `"weights_learner"` and `"weights_measure"` columns, both of them or none of them.
+#' Thus, the behavior of subsequent [`Learner`][mlr3::Learner]s or evaluation metric weights can be determined.
 #'
 #' @section Construction:
 #' ```
 #' PipeOpClassWeights$new(id = "classweights", param_vals = list())
 #' ```
 #'
-#' * `id` :: `character(1)`
+#' * `id` :: `character(1)` \cr
 #'   Identifier of the resulting  object, default `"classweights"`
-#' * `param_vals` :: named `list`\cr
+#' * `param_vals` :: named `list` \cr
 #'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
 #'
 #' @section Input and Output Channels:
@@ -40,12 +41,14 @@
 #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`]; however, the `affect_columns` parameter is *not* present. Further parameters are:
 #' * `minor_weight` :: `numeric(1)` \cr
 #'   Weight given to samples of the minor class. Major class samples have weight 1. Initialized to 1.
-#' @section Internals:
-#' Introduces, or overwrites, the "weights" column in the [`Task`][mlr3::Task]. However, the [`Learner`][mlr3::Learner] method needs to
-#' respect weights for this to have an effect.
+#' * `weight_type` :: `character` \cr
+#'   Determines whether `"weights_learner"`, `"weights_measure"`, both or none of the columns will be set. Defaults to `"learner"`. An empty
+#'   vector leaves the task unchanged.
 #'
-#' The newly introduced column is named `.WEIGHTS`; there will be a naming conflict if this column already exists and is *not* a
-#' weight column itself.
+#' @section Internals:
+#' Adds a `.WEIGHTS` column to the [`Task`][mlr3::Task], which is removed from the feature role and mapped to the requested weight roles.
+#' The [`Learner`][mlr3::Learner] must support weights for this to have an effect. There will be a naming conflict if this column already
+#' exists and is *not* as weight column itself.
 #'
 #' @section Fields:
 #' Only fields inherited from [`PipeOp`].
@@ -84,20 +87,26 @@ PipeOpClassWeights = R6Class("PipeOpClassWeights",
   public = list(
     initialize = function(id = "classweights", param_vals = list()) {
       ps = ps(
-        minor_weight = p_dbl(lower = 0, upper = Inf, tags = "train")
-      )
-      ps$values = list(minor_weight = 1)
+        minor_weight = p_dbl(init = 1, lower = 0, upper = Inf, tags = "train"),
+        weight_type = p_uty(init = "learner", tags = "train",
+                            custom_check = crate(function(x) check_character(x, max.len = 2) %check&&% check_subset(x, choices = c("learner", "measure"))))
+        )
       super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data")
     }
   ),
   private = list(
-
     .train_task = function(task) {
 
+      pv = self$param_set$get_values(tags = "train")
+
       if ("twoclass" %nin% task$properties) {
         stop("Only binary classification Tasks are supported.")
       }
 
+      # return task as is, if weight_type is an empty list
+      if (length(pv$weight_type) == 0)
+      return(task)
+
       weightcolname = ".WEIGHTS"
       if (weightcolname %in% unlist(task$col_roles)) {
         stopf("Weight column '%s' is already in the Task", weightcolname)
@@ -106,14 +115,17 @@ PipeOpClassWeights = R6Class("PipeOpClassWeights",
       truth = task$truth()
       minor = names(which.min(table(task$truth())))
 
-      wcol = setnames(data.table(ifelse(truth == minor, self$param_set$values$minor_weight, 1)), weightcolname)
+      wcol = setnames(data.table(ifelse(truth == minor, pv$minor_weight, 1)), weightcolname)
 
       task$cbind(wcol)
       task$col_roles$feature = setdiff(task$col_roles$feature, weightcolname)
-      if ("weights_learner" %in% mlr_reflections$task_col_roles$classif) {
-        task$col_roles$weights_learner = weightcolname
-      } else {
-        task$col_roles$weight = weightcolname
+
+      classif_roles = mlr_reflections$task_col_roles$classif
+
+      for (type in pv$weight_type) {
+        preferred_role = paste0("weights_", type)
+        final_role = if (preferred_role %in% classif_roles) preferred_role else "weight"
+        task$col_roles[[final_role]] = weightcolname
       }
       task
     },

diff --git a/R/PipeOpClassWeightsEx.R b/R/PipeOpClassWeightsEx.R
@@ -0,0 +1,171 @@
+#' @title Class Weights for Sample Weighting - Extended
+#'
+#' @usage NULL
+#' @name mlr_pipeops_classweightsex
+#' @format [`R6Class`][R6::R6Class] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @description
+#' Adds a class weight column to the [`Task`][mlr3::Task], influencing how different [`Learner`][mlr3::Learner]s weight samples during training.
+#' It is also possible to add a weight column to the [`Task`][mlr3::Task], which affects how samples are weighted during evaluation.
+#' Sample weights are assigned to each observation according to its target class.
+#'
+#' Binary as well as multiclass [classification tasks][mlr3::TaskClassif] are supported.
+#'
+#' It is possible to set either one of the `"weights_learner"` and `"weights_measure"` columns, both of them or none of them.
+#' Thus, the behavior of subsequent [`Learner`][mlr3::Learner]s or evaluation metric weights can be determined.
+#'
+#' @section Construction:
+#' ```
+#' PipeOpClassWeightsEx$new(id = "classweightsex", param_vals = list())
+#' ```
+#'
+#' * `id` :: `character(1)` \cr
+#'   Identifier of the resulting  object, default `"classweightsex"`
+#' * `param_vals` :: named `list`\cr
+#'   List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise be set during construction. Default `list()`.
+#'
+#' @section Input and Output Channels:
+#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. Instead of a [`Task`][mlr3::Task], a
+#' [`TaskClassif`][mlr3::TaskClassif] is used as input and output during training and prediction.
+#'
+#' The output during training is the input [`Task`][mlr3::Task] with added weights column according to target class.
+#' The output during prediction is the unchanged input.
+#'
+#' @section State:
+#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`].
+#'
+#' @section Parameters:
+#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`]; however, the `affect_columns` parameter is *not* present. Further parameters are:
+#' * `weight_type` :: `character` \cr
+#'   Determines whether `"weights_learner"`, `"weights_measure"`, both or none of the columns will be set.
+#' * `weight_method` :: `character(1)` \cr
+#'   The method that is chosen to determine the weights of the samples. Methods encompass (`"inverse_class_frequency"`, `"inverse_square_root_of_frequency"`, `"median_frequency_balancing"`, `"explicit"`).
+#' * `mapping` :: named `numeric` \cr
+#'   Depends on `"weight_method" = "explicit"`. Must be a named numeric vector that specifies a finite weight for each target class in the task.
+#'
+#' The newly introduced column is named `.WEIGHTS`; there will be a naming conflict if this column already exists and is *not* a
+#' weight column itself.
+#'
+#' @section Internals:
+#' The `.WEIGHTS` column is removed from the feature role and re-assigned to the requested weight roles. When `weight_method = "explicit"`,
+#' the mapping must cover every class present in the training data and may not contain additional classes.
+#'
+#' @section Fields:
+#' Only fields inherited from [`PipeOp`].
+#'
+#' @section Methods:
+#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`].
+#'
+#' @family PipeOps
+#' @template seealso_pipeopslist
+#' @include PipeOpTaskPreproc.R
+#' @export
+#' @examples
+#' library("mlr3")
+#'
+#' task = tsk("spam")
+#'
+#' poicf = po("classweightsex", param_vals = list(weight_type = c("learner", "measure"), weight_method = "inverse_class_frequency"))
+#' result = poicf$train(list(task))[[1L]]
+#'
+#' if ("weights_learner" %in% names(result)) {
+#'   result$weights_learner  # recent mlr3-versions
+#' } else {
+#'   result$weights  # old mlr3-versions
+#' }
+#'
+#' result$weights_measure
+#'
+#'
+#' if ("weights_measure" %in% names(result)) {
+#'   result$weights_measure  # recent mlr3-versions
+#' } else {
+#'   result$weights  # old mlr3-versions
+#' }
+
+PipeOpClassWeightsEx = R6Class("PipeOpClassWeightsEx",
+  inherit = PipeOpTaskPreproc,
+
+  public = list(
+    initialize = function(id = "classweightsex", param_vals = list()) {
+      ps = ps(
+        weight_type = p_uty(init = "learner", tags = "train",
+                            custom_check = crate(function(x) check_character(x, max.len = 2) %check&&% check_subset(x, choices = c("learner", "measure")))),
+        weight_method = p_fct(init = "explicit",
+                              levels = c("inverse_class_frequency", "inverse_square_root_of_frequency", "median_frequency_balancing", "effective_number_of_samples", "explicit"), tags = c("train", "required")),
+        mapping = p_uty(tags = "train",
+                        custom_check = crate(function(x) {
+                          if (is.null(x)) {
+                            return(TRUE)
+                          }
+                          check_numeric(x, any.missing = FALSE, finite = TRUE) %check&&%
+                            check_character(names(x), any.missing = FALSE, unique = TRUE, min.chars = 1)
+                        }),
+                        depends = weight_method == "explicit")
+      )
+      super$initialize(id, param_set = ps, param_vals = param_vals, can_subset_cols = FALSE, task_type = "TaskClassif", tags = "imbalanced data")
+    }
+  ),
+  private = list(
+
+    .train_task = function(task) {
+      pv = self$param_set$get_values(tags = "train")
+
+      if (is.null(pv$weight_type) ||
+          is.null(pv$weight_method) ||
+          (pv$weight_method == "explicit" && is.null(pv$mapping))) {
+        return(task)
+      }
+
+      class_names = task$class_names
+      if (identical(pv$weight_method, "explicit")) {
+        mapping_names = names(pv$mapping)
+        missing = setdiff(class_names, mapping_names)
+        extra = setdiff(mapping_names, class_names)
+
+        if (length(missing)) {
+          stopf("Explicit class weights must cover every class in the task; missing: %s", paste(missing, collapse = ", "))
+        }
+        if (length(extra)) {
+          stopf("Explicit class weights contain labels not present in the task: %s", paste(extra, collapse = ", "))
+        }
+      }
+
+
+      weightcolname = ".WEIGHTS"
+      if (weightcolname %in% unlist(task$col_roles)) {
+        stopf("Weight column '%s' is already in the Task", weightcolname)
+      }
+
+      truth = task$truth()
+
+      class_frequency = prop.table(table(truth))
+      class_names = names(class_frequency)
+
+      weights_by_class = switch(pv$weight_method,
+        "inverse_class_frequency" = 1 / class_frequency,
+        "inverse_square_root_of_frequency" = 1 / sqrt(class_frequency),
+        "median_frequency_balancing" = median(class_frequency) / class_frequency,
+        "explicit" = pv$mapping
+      )
+
+      weights_table = data.table(weights_by_class[truth])
+      wcol = setnames(as.data.table(weights_table[[ncol(weights_table)]]), weightcolname)
+      task$cbind(wcol)
+      task$col_roles$feature = setdiff(task$col_roles$feature, weightcolname)
+
+
+      classif_roles = mlr_reflections$task_col_roles$classif
+      for (type in pv$weight_type) {
+        preferred_role = paste0("weights_", type)
+        final_role = if (preferred_role %in% classif_roles) preferred_role else "weight"
+        task$col_roles[[final_role]] = weightcolname
+      }
+      task
+    },
+
+    .predict_task = identity
+  )
+)
+
+mlr_pipeops$add("classweightsex", PipeOpClassWeightsEx)
diff --git a/attic/PipeOpClassWeightsEx - test code.R b/attic/PipeOpClassWeightsEx - test code.R
@@ -0,0 +1,23 @@
+# library("mlr3")
+#
+#' task = tsk("spam")
+#' opb = po("classweightsex", param_vals = list(weight_method = "inverse_class_frequency"))
+#' opb = po("classweightsex", param_vals = list(weight_method = "inverse square root of frequency"))
+#' opb = po("classweightsex", param_vals = list(weight_method = "median frequency balancing"))
+# opb = po("classweightsex", param_vals = list(weight_method = "explicit", mapping = c("setosa" = 0.3, "virginica" = 0.5, "versicolor" = 0.4)))
+#
+# task weights
+# if ("weights_learner" %in% names(task)) {
+#   task$weights_learner  # recent mlr3-versions
+# } else {
+#   task$weights  # old mlr3-versions
+# }
+#
+# double the instances in the minority class (spam)
+# opb$param_set$values$minor_weight = 2
+# result = opb$train(list(task))[[1L]]
+# if ("weights_learner" %in% names(result)) {
+#   result$weights_learner  # recent mlr3-versions
+# } else {
+#   result$weights  # old mlr3-versions
+# }
diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd
diff --git a/man/PipeOpEncodePL.Rd b/man/PipeOpEncodePL.Rd
diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd
diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd
diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd
diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd
diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd
diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd
diff --git a/man/mlr_pipeops_adas.Rd b/man/mlr_pipeops_adas.Rd
diff --git a/man/mlr_pipeops_blsmote.Rd b/man/mlr_pipeops_blsmote.Rd