|
| 1 | +#' Fit a simple, non-informative model |
| 2 | +#' |
| 3 | +#' Fit a single mean or largest class model |
| 4 | +#' |
| 5 | +#' \code{nullmodel} emulates other model building functions, but returns the |
| 6 | +#' simplest model possible given a training set: a single mean for numeric |
| 7 | +#' outcomes and the most prevalent class for factor outcomes. When class |
| 8 | +#' probabilities are requested, the percentage of the training set samples with |
| 9 | +#' the most prevalent class is returned. |
| 10 | +#' |
| 11 | +#' @aliases nullmodel nullmodel.default predict.nullmodel |
| 12 | +#' @param x An optional matrix or data frame of predictors. These values are |
| 13 | +#' not used in the model fit |
| 14 | +#' @param y A numeric vector (for regression) or factor (for classification) of |
| 15 | +#' outcomes |
| 16 | +#' @param \dots Optional arguments (not yet used) |
| 17 | +#' @param object An object of class \code{nullmodel} |
| 18 | +#' @param new_data A matrix or data frame of predictors (only used to determine |
| 19 | +#' the number of predictions to return) |
| 20 | +#' @param type Either "raw" (for regression), "class" or "prob" (for |
| 21 | +#' classification) |
| 22 | +#' @return The output of \code{nullmodel} is a list of class \code{nullmodel} |
| 23 | +#' with elements \item{call }{the function call} \item{value }{the mean of |
| 24 | +#' \code{y} or the most prevalent class} \item{levels }{when \code{y} is a |
| 25 | +#' factor, a vector of levels. \code{NULL} otherwise} \item{pct }{when \code{y} |
| 26 | +#' is a factor, a data frame with a column for each class (\code{NULL} |
| 27 | +#' otherwise). The column for the most prevalent class has the proportion of |
| 28 | +#' the training samples with that class (the other columns are zero). } \item{n |
| 29 | +#' }{the number of elements in \code{y}} |
| 30 | +#' |
| 31 | +#' \code{predict.nullmodel} returns a either a factor or numeric vector |
| 32 | +#' depending on the class of \code{y}. All predictions are always the same. |
| 33 | +#' @keywords models |
| 34 | +#' @examples |
| 35 | +#' |
| 36 | +#' outcome <- factor(sample(letters[1:2], |
| 37 | +#' size = 100, |
| 38 | +#' prob = c(.1, .9), |
| 39 | +#' replace = TRUE)) |
| 40 | +#' useless <- nullmodel(y = outcome) |
| 41 | +#' useless |
| 42 | +#' predict(useless, matrix(NA, nrow = 5)) |
| 43 | +#' |
| 44 | +#' @export |
| 45 | +nullmodel <- function (x, ...) UseMethod("nullmodel") |
| 46 | + |
| 47 | +#' @export |
| 48 | +#' @rdname nullmodel |
| 49 | +nullmodel.default <- function(x = NULL, y, ...) { |
| 50 | + |
| 51 | + |
| 52 | + if(is.factor(y)) { |
| 53 | + lvls <- levels(y) |
| 54 | + tab <- table(y) |
| 55 | + value <- names(tab)[which.max(tab)] |
| 56 | + pct <- tab/sum(tab) |
| 57 | + } else { |
| 58 | + lvls <- NULL |
| 59 | + pct <- NULL |
| 60 | + if(is.null(dim(y))) { |
| 61 | + value <- mean(y, na.rm = TRUE) |
| 62 | + } else { |
| 63 | + value <- colMeans(y, na.rm = TRUE) |
| 64 | + } |
| 65 | + } |
| 66 | + |
| 67 | + structure( |
| 68 | + list(call = match.call(), |
| 69 | + value = value, |
| 70 | + levels = lvls, |
| 71 | + pct = pct, |
| 72 | + n = length(y[[1]])), |
| 73 | + class = "nullmodel") |
| 74 | +} |
| 75 | + |
| 76 | +#' @export |
| 77 | +#' @rdname nullmodel |
| 78 | +print.nullmodel <- function(x, ...) { |
| 79 | + cat("Null", |
| 80 | + ifelse(is.null(x$levels), "Classification", "Regression"), |
| 81 | + "Model\n") |
| 82 | + x$call |
| 83 | + |
| 84 | + if (length(x$value) == 1) { |
| 85 | + cat("Predicted Value:", |
| 86 | + ifelse(is.null(x$levels), format(x$value), x$value), |
| 87 | + "\n") |
| 88 | + } else { |
| 89 | + cat("Predicted Value:\n", |
| 90 | + names(x$value), "\n", |
| 91 | + x$value, |
| 92 | + "\n") |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +#' @export |
| 97 | +#' @rdname nullmodel |
| 98 | +predict.nullmodel <- function (object, new_data = NULL, type = NULL, ...) { |
| 99 | + if(is.null(type)) { |
| 100 | + type <- if(is.null(object$levels)) "raw" else "class" |
| 101 | + } |
| 102 | + |
| 103 | + n <- if(is.null(new_data)) object$n else nrow(new_data) |
| 104 | + if(!is.null(object$levels)) { |
| 105 | + if(type == "prob") { |
| 106 | + out <- matrix(rep(object$pct, n), nrow = n, byrow = TRUE) |
| 107 | + colnames(out) <- object$levels |
| 108 | + out <- as.data.frame(out) |
| 109 | + } else { |
| 110 | + out <- factor(rep(object$value, n), levels = object$levels) |
| 111 | + } |
| 112 | + } else { |
| 113 | + if(type %in% c("prob", "class")) stop("ony raw predicitons are applicable to regression models") |
| 114 | + if(length(object$value) == 1) { |
| 115 | + out <- rep(object$value, n) |
| 116 | + } else { |
| 117 | + out <- as_tibble(matrix(rep(object$value, n), |
| 118 | + ncol = length(object$value), byrow = TRUE)) |
| 119 | + |
| 120 | + names(out) <- names(object$value) |
| 121 | + } |
| 122 | + } |
| 123 | + out |
| 124 | +} |
| 125 | + |
| 126 | +#' General Interface for null models |
| 127 | +#' |
| 128 | +#' `null_model` is a way to generate a _specification_ of a model before |
| 129 | +#' fitting and allows the model to be created using R. It doens't have any |
| 130 | +#' main arguments. |
| 131 | +#' |
| 132 | +#' @param mode A single character string for the type of model. |
| 133 | +#' Possible values for this model are "unknown", "regression", or |
| 134 | +#' "classification". |
| 135 | +#' @details The model can be created using the `fit()` function using the |
| 136 | +#' following _engines_: |
| 137 | +#' \itemize{ |
| 138 | +#' \item \pkg{R}: `"parsnip"` |
| 139 | +#' } |
| 140 | +#' |
| 141 | +#' @section Engine Details: |
| 142 | +#' |
| 143 | +#' Engines may have pre-set default arguments when executing the |
| 144 | +#' model fit call. For this type of |
| 145 | +#' model, the template of the fit calls are: |
| 146 | +#' |
| 147 | +#' \pkg{parsnip} classification |
| 148 | +#' |
| 149 | +#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::null_model(mode = "classification"), "parsnip")} |
| 150 | +#' |
| 151 | +#' \pkg{parsnip} regression |
| 152 | +#' |
| 153 | +#' \Sexpr[results=rd]{parsnip:::show_fit(parsnip:::null_model(mode = "regression"), "parsnip")} |
| 154 | +#' |
| 155 | +#' @importFrom purrr map_lgl |
| 156 | +#' @seealso [varying()], [fit()] |
| 157 | +#' @examples |
| 158 | +#' null_model(mode = "regression") |
| 159 | +#' @export |
| 160 | +null_model <- |
| 161 | + function(mode = "classification") { |
| 162 | + # Check for correct mode |
| 163 | + if (!(mode %in% null_model_modes)) |
| 164 | + stop("`mode` should be one of: ", |
| 165 | + paste0("'", null_model_modes, "'", collapse = ", "), |
| 166 | + call. = FALSE) |
| 167 | + |
| 168 | + # Capture the arguments in quosures |
| 169 | + args <- list() |
| 170 | + |
| 171 | + # Save some empty slots for future parts of the specification |
| 172 | + out <- list(args = args, eng_args = NULL, |
| 173 | + mode = mode, method = NULL, engine = NULL) |
| 174 | + |
| 175 | + # set classes in the correct order |
| 176 | + class(out) <- make_classes("null_model") |
| 177 | + out |
| 178 | + } |
0 commit comments