3131# ' results, consider using new names for your summary variables, especially when
3232# ' creating multiple summaries.
3333# '
34- # ' @export
3534# ' @inheritParams arrange
3635# ' @inheritParams args_by
36+ # '
3737# ' @param ... <[`data-masking`][rlang::args_data_masking]> Name-value pairs of
3838# ' summary functions. The name will be the name of the variable in the result.
3939# '
4040# ' The value can be:
4141# ' * A vector of length 1, e.g. `min(x)`, `n()`, or `sum(is.na(y))`.
42- # ' * A data frame, to add multiple columns from a single expression.
42+ # ' * A data frame with 1 row , to add multiple columns from a single expression.
4343# '
44- # ' `r lifecycle::badge("deprecated")` Returning values with size 0 or >1 was
45- # ' deprecated as of 1.1.0. Please use [reframe()] for this instead.
4644# ' @param .groups `r lifecycle::badge("experimental")` Grouping structure of the
4745# ' result.
4846# '
49- # ' * "drop_last": dropping the last level of grouping. This was the
47+ # ' * ` "drop_last"`: drops the last level of grouping. This was the
5048# ' only supported option before version 1.0.0.
51- # ' * "drop": All levels of grouping are dropped.
52- # ' * "keep": Same grouping structure as `.data`.
53- # ' * "rowwise": Each row is its own group.
49+ # ' * ` "drop"` : All levels of grouping are dropped.
50+ # ' * ` "keep"` : Same grouping structure as `.data`.
51+ # ' * ` "rowwise"` : Each row is its own group.
5452# '
55- # ' When `.groups` is not specified, it is chosen
56- # ' based on the number of rows of the results:
57- # ' * If all the results have 1 row, you get "drop_last".
58- # ' * If the number of rows varies, you get "keep" (note that returning a
59- # ' variable number of rows was deprecated in favor of [reframe()], which
60- # ' also unconditionally drops all levels of grouping).
53+ # ' When `.groups` is not specified, it is set to `"drop_last"` for a grouped
54+ # ' data frame, and `"keep"` for a rowwise data frame. In addition, a message
55+ # ' informs you of how the result will be grouped unless the result is
56+ # ' ungrouped, the option `"dplyr.summarise.inform"` is set to `FALSE`, or when
57+ # ' `summarise()` is called from a function in a package.
6158# '
62- # ' In addition, a message informs you of that choice, unless the result is ungrouped,
63- # ' the option "dplyr.summarise.inform" is set to `FALSE`,
64- # ' or when `summarise()` is called from a function in a package.
65- # '
66- # ' @family single table verbs
67- # ' @return
59+ # ' @returns
6860# ' An object _usually_ of the same type as `.data`.
6961# '
7062# ' * The rows come from the underlying [group_keys()].
7466# ' output may be another [grouped_df], a [tibble] or a [rowwise] data frame.
7567# ' * Data frame attributes are **not** preserved, because `summarise()`
7668# ' fundamentally creates a new data frame.
69+ # '
7770# ' @section Methods:
7871# ' This function is a **generic**, which means that packages can provide
7972# ' implementations (methods) for other classes. See the documentation of
8073# ' individual methods for extra arguments and differences in behaviour.
8174# '
8275# ' The following methods are currently available in loaded packages:
8376# ' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}.
77+ # '
78+ # ' @family single table verbs
79+ # ' @export
8480# ' @examples
8581# ' # A summary applied to ungrouped tbl returns a single row
8682# ' mtcars |>
107103# ' var <- "mass"
108104# ' summarise(starwars, avg = mean(.data[[var]], na.rm = TRUE))
109105# ' # Learn more in ?rlang::args_data_masking
110- # '
111- # ' # In dplyr 1.1.0, returning multiple rows per group was deprecated in favor
112- # ' # of `reframe()`, which never messages and always returns an ungrouped
113- # ' # result:
114- # ' mtcars |>
115- # ' group_by(cyl) |>
116- # ' summarise(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75))
117- # ' # ->
118- # ' mtcars |>
119- # ' group_by(cyl) |>
120- # ' reframe(qs = quantile(disp, c(0.25, 0.75)), prob = c(0.25, 0.75))
121106summarise <- function (.data , ... , .by = NULL , .groups = NULL ) {
122107 by <- enquo(.by )
123108
@@ -138,10 +123,6 @@ summarise.data.frame <- function(.data, ..., .by = NULL, .groups = NULL) {
138123 cols <- summarise_cols(.data , dplyr_quosures(... ), by , " summarise" )
139124 out <- summarise_build(by , cols )
140125
141- if (! cols $ all_one ) {
142- summarise_deprecate_variable_size()
143- }
144-
145126 if (! is_tibble(.data )) {
146127 # The `by` group data we build from is always a tibble,
147128 # so we have to manually downcast as needed
@@ -164,16 +145,8 @@ summarise.grouped_df <- function(.data, ..., .by = NULL, .groups = NULL) {
164145 out <- summarise_build(by , cols )
165146 verbose <- summarise_verbose(.groups , caller_env())
166147
167- if (! cols $ all_one ) {
168- summarise_deprecate_variable_size()
169- }
170-
171148 if (is.null(.groups )) {
172- if (cols $ all_one ) {
173- .groups <- " drop_last"
174- } else {
175- .groups <- " keep"
176- }
149+ .groups <- " drop_last"
177150 }
178151
179152 group_vars <- by $ names
@@ -217,12 +190,12 @@ summarise.rowwise_df <- function(.data, ..., .by = NULL, .groups = NULL) {
217190 out <- summarise_build(by , cols )
218191 verbose <- summarise_verbose(.groups , caller_env())
219192
220- if (! cols $ all_one ) {
221- summarise_deprecate_variable_size()
193+ if (is.null( .groups ) ) {
194+ .groups <- " keep "
222195 }
223196
224197 group_vars <- by $ names
225- if (is.null( .groups ) || identical(.groups , " keep" )) {
198+ if (identical(.groups , " keep" )) {
226199 if (verbose && length(group_vars )) {
227200 new_groups <- glue_collapse(paste0(" '" , group_vars , " '" ), sep = " , " )
228201 summarise_inform(" has grouped output by {new_groups}" )
@@ -252,12 +225,9 @@ summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) {
252225
253226 warnings_state <- env(warnings = list ())
254227
255- cols <- list ()
256-
257- sizes <- 1L
258228 chunks <- list ()
259- results <- list ()
260229 types <- list ()
230+ results <- list ()
261231 out_names <- character ()
262232
263233 local_error_context(dots , 0L , mask = mask )
@@ -316,21 +286,35 @@ summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) {
316286 }
317287 }
318288
319- # Recycle horizontally across sets of chunks.
320- # Modifies `chunks` and `results` in place for efficiency!
321- sizes <- .Call(`dplyr_summarise_recycle_chunks_in_place` , chunks , results )
289+ if (verb == " summarise" ) {
290+ # For `summarise()`, check that all chunks are size 1.
291+ .Call(`dplyr_summarise_check_all_size_one` , chunks )
292+ sizes <- NULL
293+ } else {
294+ # For `reframe()`, recycle horizontally across sets of chunks.
295+ # Modifies `chunks` and `results` in place for efficiency!
296+ sizes <- .Call(
297+ `dplyr_reframe_recycle_horizontally_in_place` ,
298+ chunks ,
299+ results
300+ )
322301
323- # Materialize columns, regenerate any `results` that were `NULL`ed
324- # during the recycling process.
325- for (i in seq_along(chunks )) {
326- result <- results [[i ]] %|| % vec_c(!!! chunks [[i ]], .ptype = types [[i ]])
327- cols [[out_names [i ]]] <- result
302+ # Regenerate any `results` that were `NULL`ed in place during the
303+ # recycling process due to recycling of `chunks` changing the size
304+ for (i in seq_along(chunks )) {
305+ if (is.null(results [[i ]])) {
306+ results [[i ]] <- vec_c(!!! chunks [[i ]], .ptype = types [[i ]])
307+ }
308+ }
328309 }
329310 },
330311 error = function (cnd ) {
331- if (inherits(cnd , " dplyr:::summarise_incompatible_size " )) {
312+ if (inherits(cnd , " dplyr:::reframe_incompatible_size " )) {
332313 action <- " recycle"
333- i <- cnd $ dplyr_error_data $ index
314+ i <- cnd $ dplyr_error_data $ index_expression
315+ } else if (inherits(cnd , " dplyr:::summarise_incompatible_size" )) {
316+ action <- " compute"
317+ i <- cnd $ dplyr_error_data $ index_expression
334318 } else {
335319 action <- " compute"
336320 i <- i
@@ -351,9 +335,16 @@ summarise_cols <- function(data, dots, by, verb, error_call = caller_env()) {
351335 )
352336 )
353337
338+ # Build output `cols`, assigning by name so `summarise(df, a = expr, a = expr)`
339+ # only retains the 2nd assignment
340+ cols <- list ()
341+ for (i in seq_along(results )) {
342+ cols [[out_names [i ]]] <- results [[i ]]
343+ }
344+
354345 signal_warnings(warnings_state , error_call )
355346
356- list (new = cols , sizes = sizes , all_one = all( sizes == 1L ) )
347+ list (new = cols , sizes = sizes )
357348}
358349
359350summarise_eval_one <- function (quo , mask ) {
@@ -391,7 +382,8 @@ summarise_eval_one <- function(quo, mask) {
391382
392383summarise_build <- function (by , cols ) {
393384 out <- group_keys0(by $ data )
394- if (! cols $ all_one ) {
385+ if (! is_null(cols $ sizes )) {
386+ # Repeat keys for `reframe()`
395387 out <- vec_rep_each(out , cols $ sizes )
396388 }
397389 dplyr_col_modify(out , cols $ new )
@@ -413,20 +405,46 @@ summarise_bullets <- function(cnd, ...) {
413405
414406# ' @export
415407`summarise_bullets.dplyr:::summarise_incompatible_size` <- function (cnd , ... ) {
408+ index_group <- cnd $ dplyr_error_data $ index_group
409+ actual_size <- cnd $ dplyr_error_data $ actual_size
410+
411+ error_context <- peek_error_context()
412+ error_name <- ctxt_error_label(error_context )
413+
414+ # FIXME: So that cnd_bullet_cur_group_label() correctly reports the
415+ # faulty group
416+ peek_mask()$ set_current_group(index_group )
417+
418+ c(
419+ cli :: format_inline(
420+ " {.code {error_name}} must be size 1, not {actual_size}."
421+ ),
422+ i = cli :: format_inline(
423+ " To return more or less than 1 row per group, use {.fn reframe}."
424+ )
425+ )
426+ }
427+
428+ # ' @export
429+ `summarise_bullets.dplyr:::reframe_incompatible_size` <- function (cnd , ... ) {
430+ index_group <- cnd $ dplyr_error_data $ index_group
431+ actual_size <- cnd $ dplyr_error_data $ actual_size
416432 expected_size <- cnd $ dplyr_error_data $ expected_size
417- size <- cnd $ dplyr_error_data $ size
418- group <- cnd $ dplyr_error_data $ group
419433
420434 error_context <- peek_error_context()
421435 error_name <- ctxt_error_label(error_context )
422436
423437 # FIXME: So that cnd_bullet_cur_group_label() correctly reports the
424438 # faulty group
425- peek_mask()$ set_current_group(group )
439+ peek_mask()$ set_current_group(index_group )
426440
427441 c(
428- glue(" `{error_name}` must be size {or_1(expected_size)}, not {size}." ),
429- i = glue(" An earlier column had size {expected_size}." )
442+ cli :: format_inline(
443+ " {.code {error_name}} must be size {or_1(expected_size)}, not {actual_size}."
444+ ),
445+ i = cli :: format_inline(
446+ " An earlier column had size {expected_size}."
447+ )
430448 )
431449}
432450
@@ -464,21 +482,3 @@ summarise_inform <- function(..., .env = parent.frame()) {
464482 ' . You can override using the `.groups` argument.'
465483 ))
466484}
467-
468- summarise_deprecate_variable_size <- function (
469- env = caller_env(),
470- user_env = caller_env(2 )
471- ) {
472- lifecycle :: deprecate_warn(
473- when = " 1.1.0" ,
474- what = I(" Returning more (or less) than 1 row per `summarise()` group" ),
475- with = " reframe()" ,
476- details = paste0(
477- " When switching from `summarise()` to `reframe()`, remember that " ,
478- " `reframe()` always returns an ungrouped data frame and adjust accordingly."
479- ),
480- env = env ,
481- user_env = user_env ,
482- always = TRUE
483- )
484- }
0 commit comments