Skip to content

Commit 70cd3cd

Browse files
authored
Advance returning !=1 row per summarise() group to defunct (#7767)
1 parent d1cecde commit 70cd3cd

File tree

11 files changed

+266
-279
lines changed

11 files changed

+266
-279
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ S3method(summarise,grouped_df)
159159
S3method(summarise,rowwise_df)
160160
S3method(summarise_,data.frame)
161161
S3method(summarise_,tbl_df)
162+
S3method(summarise_bullets,"dplyr:::reframe_incompatible_size")
162163
S3method(summarise_bullets,"dplyr:::summarise_incompatible_size")
163164
S3method(summarise_bullets,"dplyr:::summarise_mixed_null")
164165
S3method(summarise_bullets,"dplyr:::summarise_unsupported_type")

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@
127127

128128
* Using `across()` and data frames in `filter()`. Deprecated in 1.0.8, use `if_any()` or `if_all()` instead.
129129

130+
* Returning more or less than 1 row per group in `summarise()`. Deprecated in 1.1.0, use `reframe()` instead.
131+
130132
* `multiple = NULL` in joins. Deprecated in 1.1.1, use `multiple = "all"` instead.
131133

132134
* `multiple = "error" / "warning"` in joins. Deprecated in 1.1.1, use `relationship = "many-to-one"` instead.

R/conditions.R

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,9 @@ dplyr_error_handler <- function(
234234
}
235235

236236
# FIXME: Must be after calling `bullets()` because the
237-
# `dplyr:::summarise_incompatible_size` method sets the correct
238-
# group by side effect
237+
# `dplyr:::summarise_incompatible_size` and
238+
# `dplyr:::reframe_incompatible_size` methods set the correct group by side
239+
# effect
239240
message <- c(
240241
cnd_bullet_header(action),
241242
"i" = if (has_active_group_context(mask)) cnd_bullet_cur_group_label()

R/summarise.R

Lines changed: 86 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -31,40 +31,32 @@
3131
#' results, consider using new names for your summary variables, especially when
3232
#' creating multiple summaries.
3333
#'
34-
#' @export
3534
#' @inheritParams arrange
3635
#' @inheritParams args_by
36+
#'
3737
#' @param ... <[`data-masking`][rlang::args_data_masking]> Name-value pairs of
3838
#' summary functions. The name will be the name of the variable in the result.
3939
#'
4040
#' The value can be:
4141
#' * A vector of length 1, e.g. `min(x)`, `n()`, or `sum(is.na(y))`.
42-
#' * A data frame, to add multiple columns from a single expression.
42+
#' * A data frame with 1 row, to add multiple columns from a single expression.
4343
#'
44-
#' `r lifecycle::badge("deprecated")` Returning values with size 0 or >1 was
45-
#' deprecated as of 1.1.0. Please use [reframe()] for this instead.
4644
#' @param .groups `r lifecycle::badge("experimental")` Grouping structure of the
4745
#' result.
4846
#'
49-
#' * "drop_last": dropping the last level of grouping. This was the
47+
#' * `"drop_last"`: drops the last level of grouping. This was the
5048
#' only supported option before version 1.0.0.
51-
#' * "drop": All levels of grouping are dropped.
52-
#' * "keep": Same grouping structure as `.data`.
53-
#' * "rowwise": Each row is its own group.
49+
#' * `"drop"`: All levels of grouping are dropped.
50+
#' * `"keep"`: Same grouping structure as `.data`.
51+
#' * `"rowwise"`: Each row is its own group.
5452
#'
55-
#' When `.groups` is not specified, it is chosen
56-
#' based on the number of rows of the results:
57-
#' * If all the results have 1 row, you get "drop_last".
58-
#' * If the number of rows varies, you get "keep" (note that returning a
59-
#' variable number of rows was deprecated in favor of [reframe()], which
60-
#' also unconditionally drops all levels of grouping).
53+
#' When `.groups` is not specified, it is set to `"drop_last"` for a grouped
54+
#' data frame, and `"keep"` for a rowwise data frame. In addition, a message
55+
#' informs you of how the result will be grouped unless the result is
56+
#' ungrouped, the option `"dplyr.summarise.inform"` is set to `FALSE`, or when
57+
#' `summarise()` is called from a function in a package.
6158
#'
62-
#' In addition, a message informs you of that choice, unless the result is ungrouped,
63-
#' the option "dplyr.summarise.inform" is set to `FALSE`,
64-
#' or when `summarise()` is called from a function in a package.
65-
#'
66-
#' @family single table verbs
67-
#' @return
59+
#' @returns
6860
#' An object _usually_ of the same type as `.data`.
6961
#'
7062
#' * The rows come from the underlying [group_keys()].
@@ -74,13 +66,17 @@
7466
#' output may be another [grouped_df], a [tibble] or a [rowwise] data frame.
7567
#' * Data frame attributes are **not** preserved, because `summarise()`
7668
#' fundamentally creates a new data frame.
69+
#'
7770
#' @section Methods:
7871
#' This function is a **generic**, which means that packages can provide
7972
#' implementations (methods) for other classes. See the documentation of
8073
#' individual methods for extra arguments and differences in behaviour.
8174
#'
8275
#' The following methods are currently available in loaded packages:
8376
#' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}.
77+
#'
78+
#' @family single table verbs
79+
#' @export
8480
#' @examples
8581
#' # A summary applied to ungrouped tbl returns a single row
8682
#' mtcars |>
@@ -107,17 +103,6 @@
107103
#' var <- "mass"
108104
#' summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE))
109105
#' # Learn more in ?rlang::args_data_masking
110-
#'
111-
#' # In dplyr 1.1.0, returning multiple rows per group was deprecated in favor
112-
#' # of `reframe()`, which never messages and always returns an ungrouped
113-
#' # result:
114-
#' mtcars |>
115-
#' group_by(cyl) |>
116-
#' summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75))
117-
#' # ->
118-
#' mtcars |>
119-
#' group_by(cyl) |>
120-
#' reframe(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75))
121106
summarise <- function(.data, ..., .by = NULL, .groups = NULL) {
122107
by <- enquo(.by)
123108

@@ -138,10 +123,6 @@ summarise.data.frame <- function(.data, ..., .by = NULL, .groups = NULL) {
138123
cols <- summarise_cols(.data, dplyr_quosures(...), by, "summarise")
139124
out <- summarise_build(by, cols)
140125

141-
if (!cols$all_one) {
142-
summarise_deprecate_variable_size()
143-
}
144-
145126
if (!is_tibble(.data)) {
146127
# The `by` group data we build from is always a tibble,
147128
# so we have to manually downcast as needed
@@ -164,16 +145,8 @@ summarise.grouped_df <- function(.data, ..., .by = NULL, .groups = NULL) {
164145
out <- summarise_build(by, cols)
165146
verbose <- summarise_verbose(.groups, caller_env())
166147

167-
if (!cols$all_one) {
168-
summarise_deprecate_variable_size()
169-
}
170-
171148
if (is.null(.groups)) {
172-
if (cols$all_one) {
173-
.groups <- "drop_last"
174-
} else {
175-
.groups <- "keep"
176-
}
149+
.groups <- "drop_last"
177150
}
178151

179152
group_vars <- by$names
@@ -217,12 +190,12 @@ summarise.rowwise_df <- function(.data, ..., .by = NULL, .groups = NULL) {
217190
out <- summarise_build(by, cols)
218191
verbose <- summarise_verbose(.groups, caller_env())
219192

220-
if (!cols$all_one) {
221-
summarise_deprecate_variable_size()
193+
if (is.null(.groups)) {
194+
.groups <- "keep"
222195
}
223196

224197
group_vars <- by$names
225-
if (is.null(.groups) || identical(.groups, "keep")) {
198+
if (identical(.groups, "keep")) {
226199
if (verbose && length(group_vars)) {
227200
new_groups <- glue_collapse(paste0("'", group_vars, "'"), sep = ", ")
228201
summarise_inform("has grouped output by {new_groups}")
@@ -252,12 +225,9 @@ summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) {
252225

253226
warnings_state <- env(warnings = list())
254227

255-
cols <- list()
256-
257-
sizes <- 1L
258228
chunks <- list()
259-
results <- list()
260229
types <- list()
230+
results <- list()
261231
out_names <- character()
262232

263233
local_error_context(dots, 0L, mask = mask)
@@ -316,21 +286,35 @@ summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) {
316286
}
317287
}
318288

319-
# Recycle horizontally across sets of chunks.
320-
# Modifies `chunks` and `results` in place for efficiency!
321-
sizes <- .Call(`dplyr_summarise_recycle_chunks_in_place`, chunks, results)
289+
if (verb == "summarise") {
290+
# For `summarise()`, check that all chunks are size 1.
291+
.Call(`dplyr_summarise_check_all_size_one`, chunks)
292+
sizes <- NULL
293+
} else {
294+
# For `reframe()`, recycle horizontally across sets of chunks.
295+
# Modifies `chunks` and `results` in place for efficiency!
296+
sizes <- .Call(
297+
`dplyr_reframe_recycle_horizontally_in_place`,
298+
chunks,
299+
results
300+
)
322301

323-
# Materialize columns, regenerate any `results` that were `NULL`ed
324-
# during the recycling process.
325-
for (i in seq_along(chunks)) {
326-
result <- results[[i]] %||% vec_c(!!!chunks[[i]], .ptype = types[[i]])
327-
cols[[out_names[i]]] <- result
302+
# Regenerate any `results` that were `NULL`ed in place during the
303+
# recycling process due to recycling of `chunks` changing the size
304+
for (i in seq_along(chunks)) {
305+
if (is.null(results[[i]])) {
306+
results[[i]] <- vec_c(!!!chunks[[i]], .ptype = types[[i]])
307+
}
308+
}
328309
}
329310
},
330311
error = function(cnd) {
331-
if (inherits(cnd, "dplyr:::summarise_incompatible_size")) {
312+
if (inherits(cnd, "dplyr:::reframe_incompatible_size")) {
332313
action <- "recycle"
333-
i <- cnd$dplyr_error_data$index
314+
i <- cnd$dplyr_error_data$index_expression
315+
} else if (inherits(cnd, "dplyr:::summarise_incompatible_size")) {
316+
action <- "compute"
317+
i <- cnd$dplyr_error_data$index_expression
334318
} else {
335319
action <- "compute"
336320
i <- i
@@ -351,9 +335,16 @@ summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) {
351335
)
352336
)
353337

338+
# Build output `cols`, assigning by name so `summarise(df, a = expr, a = expr)`
339+
# only retains the 2nd assignment
340+
cols <- list()
341+
for (i in seq_along(results)) {
342+
cols[[out_names[i]]] <- results[[i]]
343+
}
344+
354345
signal_warnings(warnings_state, error_call)
355346

356-
list(new = cols, sizes = sizes, all_one = all(sizes == 1L))
347+
list(new = cols, sizes = sizes)
357348
}
358349

359350
summarise_eval_one <- function(quo, mask) {
@@ -391,7 +382,8 @@ summarise_eval_one <- function(quo, mask) {
391382

392383
summarise_build <- function(by, cols) {
393384
out <- group_keys0(by$data)
394-
if (!cols$all_one) {
385+
if (!is_null(cols$sizes)) {
386+
# Repeat keys for `reframe()`
395387
out <- vec_rep_each(out, cols$sizes)
396388
}
397389
dplyr_col_modify(out, cols$new)
@@ -413,20 +405,46 @@ summarise_bullets <- function(cnd, ...) {
413405

414406
#' @export
415407
`summarise_bullets.dplyr:::summarise_incompatible_size` <- function(cnd, ...) {
408+
index_group <- cnd$dplyr_error_data$index_group
409+
actual_size <- cnd$dplyr_error_data$actual_size
410+
411+
error_context <- peek_error_context()
412+
error_name <- ctxt_error_label(error_context)
413+
414+
# FIXME: So that cnd_bullet_cur_group_label() correctly reports the
415+
# faulty group
416+
peek_mask()$set_current_group(index_group)
417+
418+
c(
419+
cli::format_inline(
420+
"{.code {error_name}} must be size 1, not {actual_size}."
421+
),
422+
i = cli::format_inline(
423+
"To return more or less than 1 row per group, use {.fn reframe}."
424+
)
425+
)
426+
}
427+
428+
#' @export
429+
`summarise_bullets.dplyr:::reframe_incompatible_size` <- function(cnd, ...) {
430+
index_group <- cnd$dplyr_error_data$index_group
431+
actual_size <- cnd$dplyr_error_data$actual_size
416432
expected_size <- cnd$dplyr_error_data$expected_size
417-
size <- cnd$dplyr_error_data$size
418-
group <- cnd$dplyr_error_data$group
419433

420434
error_context <- peek_error_context()
421435
error_name <- ctxt_error_label(error_context)
422436

423437
# FIXME: So that cnd_bullet_cur_group_label() correctly reports the
424438
# faulty group
425-
peek_mask()$set_current_group(group)
439+
peek_mask()$set_current_group(index_group)
426440

427441
c(
428-
glue("`{error_name}` must be size {or_1(expected_size)}, not {size}."),
429-
i = glue("An earlier column had size {expected_size}.")
442+
cli::format_inline(
443+
"{.code {error_name}} must be size {or_1(expected_size)}, not {actual_size}."
444+
),
445+
i = cli::format_inline(
446+
"An earlier column had size {expected_size}."
447+
)
430448
)
431449
}
432450

@@ -464,21 +482,3 @@ summarise_inform <- function(..., .env = parent.frame()) {
464482
'. You can override using the `.groups` argument.'
465483
))
466484
}
467-
468-
summarise_deprecate_variable_size <- function(
469-
env = caller_env(),
470-
user_env = caller_env(2)
471-
) {
472-
lifecycle::deprecate_warn(
473-
when = "1.1.0",
474-
what = I("Returning more (or less) than 1 row per `summarise()` group"),
475-
with = "reframe()",
476-
details = paste0(
477-
"When switching from `summarise()` to `reframe()`, remember that ",
478-
"`reframe()` always returns an ungrouped data frame and adjust accordingly."
479-
),
480-
env = env,
481-
user_env = user_env,
482-
always = TRUE
483-
)
484-
}

man/summarise.Rd

Lines changed: 11 additions & 32 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)