Skip to content

Commit c22e0f6

Browse files
committed
Make evaluation and expansion cases more consistent
And add a battery of tests to ensure we don't regress on this consistency
1 parent 3aecb7c commit c22e0f6

File tree

6 files changed

+1038
-121
lines changed

6 files changed

+1038
-121
lines changed

NEWS.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
# dplyr (development version)
22

3-
* `if_any()` now correctly returns `FALSE` when called without inputs, matching the behavior of `any()` (#7077).
3+
* `if_any()` and `if_all()` are now more consistent in all use cases (#7059, #7077, #7746, @jrwinget). In particular:
44

5-
* `if_any()` and `if_all()` now properly return logical vectors rather than the column itself when called with a single input (#7746).
5+
* When called with zero inputs, `if_any()` returns `FALSE` and `if_all()` returns `TRUE`.
6+
7+
* When called with one input, both now return logical vectors rather than the original column.
8+
9+
* The result of applying `.fns` now must be a logical vector.
610

711
* Empty `rowwise()` list-column elements now resolve to `logical()` rather than a random logical of length 1 (#7710).
812

@@ -101,10 +105,6 @@
101105
* Fixed an issue where duckplyr's ALTREP data frames were being materialized
102106
early due to internal usage of `ncol()` (#7049).
103107

104-
* `if_any()` and `if_all()` are now fully consistent with `any()` and `all()`.
105-
In particular, when called with empty inputs `if_any()` returns `FALSE` and
106-
`if_all()` returns `TRUE` (#7059, @jrwinget).
107-
108108
# dplyr 1.1.4
109109

110110
* `join_by()` now allows its helper functions to be namespaced with `dplyr::`,

R/across.R

Lines changed: 97 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -349,17 +349,71 @@ across <- function(.cols, .fns, ..., .names = NULL, .unpack = FALSE) {
349349
if_any <- function(.cols, .fns, ..., .names = NULL) {
350350
context_local("across_if_fn", "if_any")
351351
context_local("across_frame", current_env())
352-
if_across("any", across({{ .cols }}, .fns, ..., .names = .names))
352+
df <- across({{ .cols }}, .fns, ..., .names = .names)
353+
x <- dplyr_new_list(df)
354+
size <- vec_size(df)
355+
dplyr_list_pany(x, size = size)
353356
}
357+
354358
#' @rdname across
355359
#' @export
356360
if_all <- function(.cols, .fns, ..., .names = NULL) {
357361
context_local("across_if_fn", "if_all")
358362
context_local("across_frame", current_env())
359-
if_across("all", across({{ .cols }}, .fns, ..., .names = .names))
363+
df <- across({{ .cols }}, .fns, ..., .names = .names)
364+
x <- dplyr_new_list(df)
365+
size <- vec_size(df)
366+
dplyr_list_pall(x, size = size)
360367
}
361368

362-
if_across <- function(variant, df) {
369+
dplyr_list_pall <- function(
370+
x,
371+
...,
372+
size = NULL,
373+
error_call = caller_env()
374+
) {
375+
dplyr_list_pany_pall(x, "all", ..., size = size, error_call = error_call)
376+
}
377+
378+
dplyr_list_pany <- function(
379+
x,
380+
...,
381+
size = NULL,
382+
error_call = caller_env()
383+
) {
384+
dplyr_list_pany_pall(x, "any", ..., size = size, error_call = error_call)
385+
}
386+
387+
dplyr_list_pany_pall <- function(
388+
x,
389+
variant,
390+
...,
391+
size = NULL,
392+
error_call = caller_env()
393+
) {
394+
check_dots_empty0(...)
395+
396+
obj_check_list(x, arg = "", call = error_call)
397+
398+
# Doesn't allow `NULL`, doesn't do type casting
399+
for (i in seq_along(x)) {
400+
check_logical(
401+
x[[i]],
402+
arg = (vec_names(x) %||% paste0("..", seq_len(vec_size(x))))[[i]],
403+
call = error_call
404+
)
405+
}
406+
407+
# Doesn't do recycling, asserts that all inputs are the same size
408+
if (is.null(size)) {
409+
if (length(x) == 0L) {
410+
size <- 0L
411+
} else {
412+
size <- vec_size(x[[1L]])
413+
}
414+
}
415+
list_check_all_size(x, size, arg = "", call = error_call)
416+
363417
switch(
364418
variant,
365419
any = {
@@ -373,9 +427,9 @@ if_across <- function(variant, df) {
373427
abort("Unreachable", .internal = TRUE)
374428
)
375429

376-
init <- vec_rep(init, times = vec_size(df))
430+
init <- vec_rep(init, times = size)
377431

378-
reduce(df, op, .init = init)
432+
reduce(x, op, .init = init)
379433
}
380434

381435
#' Combine values from multiple columns
@@ -610,79 +664,68 @@ dplyr_quosures <- function(...) {
610664
quosures
611665
}
612666

613-
# When mutate() or summarise() have an unnamed call to across() at the top level, e.g.
614-
# summarise(across(<...>)) or mutate(across(<...>))
615-
#
616-
# a call to top_across(<...>) is evaluated instead.
617-
# top_across() returns a flattened list of expressions along with some
618-
# information about the "current column" for each expression
619-
# in the "columns" attribute:
620-
#
621-
# For example with: summarise(across(c(x, y), mean, .names = "mean_{.col}")) top_across() will return
622-
# something like:
623-
#
624-
# structure(
625-
# list(mean_x = expr(mean(x)), mean_y = expr(mean(y)))
626-
# columns = c("x", "y")
627-
# )
628-
629-
# Technically this always returns a single quosure but we wrap it in a
630-
# list to follow the pattern in `expand_across()`
667+
# Always guaranteed to be 1 quosure in, 1 quosure out, unlike `expand_across()`
631668
expand_if_across <- function(quo) {
632-
quo_data <- attr(quo, "dplyr:::data")
633-
if (!quo_is_call(quo, c("if_any", "if_all"), ns = c("", "dplyr"))) {
634-
return(list(quo))
669+
if (quo_is_call(quo, "if_any", ns = c("", "dplyr"))) {
670+
variant <- "any"
671+
} else if (quo_is_call(quo, "if_all", ns = c("", "dplyr"))) {
672+
variant <- "all"
673+
} else {
674+
# Refuse to expand
675+
return(quo)
635676
}
636677

678+
# `definition` is the same between the two for the purposes of `match.call()`
679+
definition <- if_any
680+
637681
call <- match.call(
638-
definition = if_any,
682+
definition = definition,
639683
call = quo_get_expr(quo),
640684
expand.dots = FALSE,
641685
envir = quo_get_env(quo)
642686
)
687+
643688
if (!is_null(call$...)) {
644-
return(list(quo))
689+
# Refuse to expand
690+
return(quo)
645691
}
646692

647-
if (is_call(call, "if_any")) {
648-
op <- "|"
693+
if (variant == "any") {
649694
if_fn <- "if_any"
650-
empty <- FALSE
695+
dplyr_fn <- "dplyr_list_pany"
651696
} else {
652-
op <- "&"
653697
if_fn <- "if_all"
654-
empty <- TRUE
698+
dplyr_fn <- "dplyr_list_pall"
655699
}
656700

701+
# `expand_across()` will always expand at this point given that we bailed on
702+
# `...` usage early on, which is the only case that would stop expansion.
703+
#
704+
# Set frame here for backtrace truncation. But override error call via
705+
# `local_error_call()` so it refers to the function we're expanding, e.g.
706+
# `if_any()` and not `expand_if_across()`.
657707
context_local("across_if_fn", if_fn)
658-
659-
# Set frame here for backtrace truncation. But override error call
660-
# via `local_error_call()` so it refers to the function we're
661-
# expanding, e.g. `if_any()` and not `expand_if_across()`.
662708
context_local("across_frame", current_env())
663709
local_error_call(call(if_fn))
664-
665710
call[[1]] <- quote(across)
666711
quos <- expand_across(quo_set_expr(quo, call))
667712

668-
# Select all rows if there are no inputs for if_all(),
669-
# but select no rows if there are no inputs for if_any().
670-
if (!length(quos)) {
671-
return(list(quo(!!empty)))
672-
}
713+
expr <- expr({
714+
ns <- asNamespace("dplyr")
673715

674-
combine <- function(x, y) {
675-
if (is_null(x)) {
676-
y
677-
} else {
678-
call(op, x, y)
679-
}
680-
}
681-
expr <- reduce(quos, combine, .init = NULL)
716+
x <- list(!!!quos)
717+
718+
# In the evaluation path, `across()` automatically recycles to common size,
719+
# so we must here as well for compatibility. `across()` also returns a 0
720+
# col, 1 row data frame in the case of no inputs so that it will recycle to
721+
# the group size, which we also do here.
722+
size <- ns[["dplyr_list_size_common"]](x, absent = 1L, call = call(!!if_fn))
723+
x <- ns[["dplyr_list_recycle_common"]](x, size = size, call = call(!!if_fn))
724+
725+
ns[[!!dplyr_fn]](x, size = size, error_call = call(!!if_fn))
726+
})
682727

683-
# Use `as_quosure()` instead of `new_quosure()` to avoid rewrapping
684-
# quosure in case of single input
685-
list(as_quosure(expr, env = baseenv()))
728+
new_quosure(expr, env = baseenv())
686729
}
687730

688731
expand_across <- function(quo) {

R/filter.R

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,16 @@ filter_rows <- function(
147147
mask <- DataMask$new(data, by, "filter", error_call = error_call)
148148
on.exit(mask$forget(), add = TRUE)
149149

150-
dots <- filter_expand(dots, mask = mask, error_call = error_call)
151-
filter_eval(dots, mask = mask, error_call = error_call, user_env = user_env)
150+
# 1:1 mapping between `dots` and `dots_expanded`
151+
dots_expanded <- filter_expand(dots, mask = mask, error_call = error_call)
152+
153+
filter_eval(
154+
dots = dots,
155+
dots_expanded = dots_expanded,
156+
mask = mask,
157+
error_call = error_call,
158+
user_env = user_env
159+
)
152160
}
153161

154162
check_filter <- function(dots, error_call = caller_env()) {
@@ -174,6 +182,7 @@ check_filter <- function(dots, error_call = caller_env()) {
174182

175183
filter_expand <- function(dots, mask, error_call = caller_env()) {
176184
env_filter <- env()
185+
177186
filter_expand_one <- function(dot, index) {
178187
env_filter$current_expression <- index
179188
dot <- expand_pick(dot, mask)
@@ -190,13 +199,15 @@ filter_expand <- function(dots, mask, error_call = caller_env()) {
190199
}
191200
)
192201

193-
dots <- list_flatten(dots)
194-
195202
new_quosures(dots)
196203
}
197204

205+
# We evaluate `dots_expanded` but report errors relative to `dots` so that
206+
# we show "In argument: `if_any(c(x, y), is.na)`" rather than its expanded form.
207+
# This works because `dots` and `dots_expanded` have a 1:1 mapping.
198208
filter_eval <- function(
199209
dots,
210+
dots_expanded,
200211
mask,
201212
error_call = caller_env(),
202213
user_env = caller_env(2)
@@ -218,7 +229,7 @@ filter_eval <- function(
218229
)
219230

220231
out <- withCallingHandlers(
221-
mask$eval_all_filter(dots, env_filter),
232+
mask$eval_all_filter(dots_expanded, env_filter),
222233
error = dplyr_error_handler(
223234
dots = dots,
224235
mask = mask,

R/vctrs.R

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,28 @@ dplyr_vec_ptype_common <- function(chunks, name) {
2121
error = common_handler(name)
2222
)
2323
}
24+
25+
# Version of `vec_size_common()` that takes a list.
26+
# Useful for delaying `!!!` when used within an `expr()` call.
27+
dplyr_list_size_common <- function(
28+
x,
29+
...,
30+
size = NULL,
31+
absent = 0L,
32+
call = caller_env()
33+
) {
34+
check_dots_empty0(...)
35+
vec_size_common(!!!x, .size = size, .absent = absent, .call = call)
36+
}
37+
38+
# Version of `vec_recycle_common()` that takes a list.
39+
# Useful for delaying `!!!` when used within an `expr()` call.
40+
dplyr_list_recycle_common <- function(
41+
x,
42+
...,
43+
size = NULL,
44+
call = caller_env()
45+
) {
46+
check_dots_empty0(...)
47+
vec_recycle_common(!!!x, .size = size, .call = call)
48+
}

0 commit comments

Comments
 (0)